In [8]:
import pandas as pd
import re
from scipy.stats import chi2_contingency

# Load the dataset
data = pd.read_csv("cleaned_dataset.csv")

In [10]:
# Function to detect sensationalism in text
def detect_sensationalism(text):
    # Check if text is a string
    if isinstance(text, str):
        sensational_keywords = ["shocking", "outrageous", "unbelievable", "mind-blowing", "explosive"]
        # Compile the regex for all keywords
        pattern = re.compile(r'\b(?:' + '|'.join(sensational_keywords) + r')\b', re.IGNORECASE)
        return bool(pattern.search(text))
    return False


In [11]:
# Apply the sensationalism detection function to the text column
data["Sensationalism"] = data["text"].apply(detect_sensationalism)

# Create a contingency table
contingency_table = pd.crosstab(data["Sensationalism"], data["label"])
print(contingency_table)

label           Fake  Real
Sensationalism            
False           1208   770
True              76    31


In [12]:
# Perform Chi-squared test
chi2, p, _, _ = chi2_contingency(contingency_table)
print(f"Chi-squared statistic: {chi2:.4f}")
print(f"P-value: {p:.4f}")

# Significance level
alpha = 0.05
if p < alpha:
    print("There is a significant association between sensationalism and credibility.")
else:
    print("There is no significant association between sensationalism and credibility.")


Chi-squared statistic: 3.8427
P-value: 0.0500
There is a significant association between sensationalism and credibility.
