In [1]:
import pandas as pd
import scipy.stats as stats
from collections import Counter
import numpy as np

In [15]:
def chi_sq_test(df1, df2, senti_col='story_sentiment'):
    ai_sentiment_list = df1[senti_col]
    non_ai_sentiment_list = df2[senti_col]

    ai_counts = Counter(ai_sentiment_list)
    non_ai_counts = Counter(non_ai_sentiment_list)

    # Create contingency table with raw counts
    observed = np.array([
        [ai_counts[-1], ai_counts[0], ai_counts[1]],  # AI sentiment counts
        [non_ai_counts[-1], non_ai_counts[0], non_ai_counts[1]]   # Non-AI sentiment counts
    ])

    # Chi-Square test
    chi2_stat, p_value, dof, expected = stats.chi2_contingency(observed)

    print(f"Chi-Square Statistic: {chi2_stat}")
    print(f"P-value: {p_value}")

    if p_value < 0.05:
        print("Reject H₀: Sentiment distribution is significantly different between AI and non-AI stories.")
    else:
        print("Fail to reject H₀: No significant difference in sentiment distribution.")

# AI & Non-AI Story

In [2]:
ai_story_df = pd.read_csv('..\\dataset\\hn_gh_ai_story_sentiment.csv')
non_ai_story_df = pd.read_csv('..\\dataset\\hn_gh_non_ai_story_sentiment.csv')

In [16]:
chi_sq_test(ai_story_df, non_ai_story_df)

Chi-Square Statistic: 117.18142191520026
P-value: 3.584078723899005e-26
Reject H₀: Sentiment distribution is significantly different between AI and non-AI stories.


# AI & Non-AI Comment

In [11]:
ai_comment_df = pd.read_csv('..\\dataset\\hn_gh_ai_comment_sentiment.csv')
non_ai_comment_df = pd.read_csv('..\\dataset\\hn_gh_non_ai_comment_sentiment.csv')

In [17]:
chi_sq_test(ai_comment_df, non_ai_comment_df, senti_col='comment_sentiment')

Chi-Square Statistic: 114.56282943381389
P-value: 1.3273873449134634e-25
Reject H₀: Sentiment distribution is significantly different between AI and non-AI stories.
