In [2]:
import pandas as pd
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
# import researchpy as rp
import statsmodels.api as sm
from statsmodels.formula.api import ols
from scipy.stats import f_oneway
from scipy.stats import ttest_ind
from scipy.stats import levene
import ast

In [3]:
df = pd.read_csv("antiKremlin_dataset.csv")

In [4]:
df["post_datetime"] = pd.to_datetime(df['post_datetime'])
df['post_date'] = pd.to_datetime(df['post_datetime'].dt.date)

df['reactions'] = df['reactions'].apply(lambda x: ast.literal_eval(x) if pd.notna(x) else x)

In [5]:
def stat_test(*data):
    levene_f_statistic, levene_p_value = levene(*data)
    # print(f'Levene F-statistic: {levene_f_statistic}')
    print(f'Levene p-value: {levene_p_value}')

    if levene_p_value > 0.05:
        t, p = ttest_ind(*data)
        print(f'Equal variance t-test stat: {t}')
        print(f'Equal variance t-test p-value: {p}')

        # Calculate degrees of freedom for equal variances
        n1 = len(data[0])
        n2 = len(data[1])
        dof = n1 + n2 - 2
        print("Degrees of freedom (equal variance):", dof)

    else:
        t, p = ttest_ind(*data, equal_var=False)
        print(f'Unequal variance t-test stat: {t}')
        print(f'Unequal variance t-test p-value: {p}')

        group1 = np.array(data[0])
        group2 = np.array(data[1])

        n1 = len(group1)
        n2 = len(group2)

        var1 = np.var(group1, ddof=1)
        var2 = np.var(group2, ddof=1)

        # Welch-Satterthwaite equation for degrees of freedom
        numerator = (var1/n1 + var2/n2)**2
        denominator = ((var1/n1)**2 / (n1 - 1)) + ((var2/n2)**2 / (n2 - 1))
        dof = numerator / denominator
        
        print("degree of freedom (unequal variance):", dof)

    if p < 0.05:
        print('Reject the null hypothesis - significant difference between groups')
    else:
        print('Fail to reject the null hypothesis - no significant difference between groups')

### Topic 1: International Politics	

In [6]:
# net reactions for economy category in phase 1
int_df = df[(df['category'] == "International Politics") & (df['phase'] == "phase 6")]

int_df['positive_reactions'] = int_df['reactions'].apply(lambda x: x.get('👍', 0) + x.get('😁', 0) + x.get('🔥', 0) + x.get('❤', 0) + x.get('🎉', 0) + x.get('🤣', 0) 
                                                        + x.get('🙏', 0) if pd.notna(x) else 0)

int_df['negative_reactions'] = int_df['reactions'].apply(lambda x: x.get('👎', 0) + x.get('🤡', 0) + x.get('🤬', 0) + x.get('💩', 0) + x.get('😢', 0) + x.get('🤮', 0) 
                                                         if pd.notna(x) else 0)

print("positive %", round(int_df['positive_reactions'].sum() / (int_df['positive_reactions'].sum() + int_df['negative_reactions'].sum()) * 100,1))
print("negative %", round(int_df['negative_reactions'].sum() / (int_df['positive_reactions'].sum() + int_df['negative_reactions'].sum()) * 100,1))

positive % 68.6
negative % 31.4


In [7]:
int_df = df[df['category'] == "International Politics"]
int_df['week_year'] = int_df['post_datetime'].dt.strftime('%Y-%W')
int_df['weekday_number'] = int_df['post_datetime'].dt.weekday

int_df['positive_reactions'] = int_df['reactions'].apply(lambda x: x.get('👍', 0) + x.get('😁', 0) + x.get('🔥', 0) + x.get('❤', 0) + x.get('🎉', 0) + x.get('🤣', 0) 
                                                        + x.get('🙏', 0) if pd.notna(x) else 0)

int_df['negative_reactions'] = int_df['reactions'].apply(lambda x: x.get('👎', 0) + x.get('🤡', 0) + x.get('🤬', 0) + x.get('💩', 0) + x.get('😢', 0) + x.get('🤮', 0) 
                                                         if pd.notna(x) else 0)

int_df['net_reactions'] = int_df['positive_reactions'] - int_df['negative_reactions']

int_result = int_df.groupby(['phase', 'week_year']).agg(net_reactions=('net_reactions', 'sum'),
                                                    days_captured=('weekday_number', 
                                                                   lambda x: x.max() - x.min() + 1)).reset_index()

# result_int_df.to_csv("check_int.csv", index=False)
int_result['mean_net_reactions'] = round(int_result['net_reactions'] / int_result['days_captured']).astype(int)
int_result = int_result[1:]
int_result = int_result[~((int_result['phase'] == "phase 6") & (int_result['week_year'] == '2023-00'))]

int_result = int_result.reset_index(drop=True)

int_grouped_data = [int_result['mean_net_reactions'][int_result['phase'] == phase] for phase in int_result['phase'].unique()]

In [8]:
int_f_statistic, int_p_value = f_oneway(*int_grouped_data)

# Print the results
print(f'F-statistic: {int_f_statistic}')
print(f'P-value: {int_p_value}\n')

model = ols('mean_net_reactions ~ C(phase)', int_result).fit()
# Perform ANOVA and print the results
aov_table = sm.stats.anova_lm(model, typ=2)
print(aov_table)

F-statistic: 15.974096312364965
P-value: 8.32992403794132e-11

                sum_sq    df          F        PR(>F)
C(phase)  9.333405e+10   6.0  15.974096  8.329924e-11
Residual  5.745457e+10  59.0        NaN           NaN


In [9]:
# contrast 1: 0 vs rest
print("Contrast 1: 0 vs rest\n")

stat_test(int_result['mean_net_reactions'][int_result['phase'].isin(["phase 0"])], 
     int_result['mean_net_reactions'][int_result['phase'].isin(["phase 1", "phase 2", "phase 3", "phase 4", "phase 5", "phase 6"])])

print("=====================================================================================================")

# contrast 2: phase1, phase2, phase3 vs phase4, phase5, phase6
print("Contrast 2: phase1, phase2, phase3 vs phase4, phase5, phase6\n")

stat_test(int_result['mean_net_reactions'][int_result['phase'].isin(["phase 1", "phase 2", "phase 3"])], 
     int_result['mean_net_reactions'][int_result['phase'].isin(["phase 4", "phase 5", "phase 6"])])

print("=====================================================================================================")

# contrast 3: phase1, phase2 vs phase3
print("Contrast 3: phase1, phase2 vs phase3\n")

stat_test(int_result['mean_net_reactions'][int_result['phase'].isin(["phase 1", "phase 2"])],
        int_result['mean_net_reactions'][int_result['phase'].isin(["phase 3"])])

print("=====================================================================================================")

# contrast 4: phase4 vs phase5, phase6
print("Contrast 4: phase4 vs phase5, phase6\n")

stat_test(int_result['mean_net_reactions'][int_result['phase'].isin(["phase 4"])],
        int_result['mean_net_reactions'][int_result['phase'].isin(["phase 5", "phase 6"])])

print("=====================================================================================================")

# contrast 5: phase1 vs phase2
print("Contrast 5: phase1 vs phase2\n")

stat_test(int_result['mean_net_reactions'][int_result['phase'].isin(["phase 1"])],
        int_result['mean_net_reactions'][int_result['phase'].isin(["phase 2"])])

print("=====================================================================================================")

# contrast 6: phase5 vs phase6
print("Contrast 6: phase5 vs phase6\n")

stat_test(int_result['mean_net_reactions'][int_result['phase'].isin(["phase 5"])],
        int_result['mean_net_reactions'][int_result['phase'].isin(["phase 6"])])

print("=====================================================================================================")

Contrast 1: 0 vs rest

Levene p-value: 0.009744212381222816
Unequal variance t-test stat: -8.744647658855731
Unequal variance t-test p-value: 3.761905725808745e-12
degree of freedom (unequal variance): 57.50530683592335
Reject the null hypothesis - significant difference between groups
Contrast 2: phase1, phase2, phase3 vs phase4, phase5, phase6

Levene p-value: 0.37357645245437476
Equal variance t-test stat: 2.520967047670704
Equal variance t-test p-value: 0.014577848109800298
Degrees of freedom (equal variance): 56
Reject the null hypothesis - significant difference between groups
Contrast 3: phase1, phase2 vs phase3

Levene p-value: 0.01538614751114398
Unequal variance t-test stat: 3.304189656506952
Unequal variance t-test p-value: 0.004720905315634084
degree of freedom (unequal variance): 15.268904769013208
Reject the null hypothesis - significant difference between groups
Contrast 4: phase4 vs phase5, phase6

Levene p-value: 0.6858143332556288
Equal variance t-test stat: 0.8270926

### Topic 2: Russian Domestic Affairs

In [11]:
rus_df = df[(df['category'] == "Russian domestic politics") & (df['phase'] == "phase 4")]

rus_df['positive_reactions'] = rus_df['reactions'].apply(lambda x: x.get('👍', 0) + x.get('😁', 0) + x.get('🔥', 0) + x.get('❤', 0) + x.get('🎉', 0) + x.get('🤣', 0) 
                                                        + x.get('🙏', 0) if pd.notna(x) else 0)

rus_df['negative_reactions'] = rus_df['reactions'].apply(lambda x: x.get('👎', 0) + x.get('🤡', 0) + x.get('🤬', 0) + x.get('💩', 0) + x.get('😢', 0) + x.get('🤮', 0) 
                                                         if pd.notna(x) else 0)


print("positive %", round(rus_df['positive_reactions'].sum() / (rus_df['positive_reactions'].sum() + rus_df['negative_reactions'].sum()) * 100,1))
print("negative %", round(rus_df['negative_reactions'].sum() / (rus_df['positive_reactions'].sum() + rus_df['negative_reactions'].sum()) * 100,1))

positive % 66.7
negative % 33.3


In [12]:
rus_df = df[df['category'] == "Russian domestic politics"]
rus_df['week_year'] = rus_df['post_datetime'].dt.strftime('%Y-%W')
rus_df['weekday_number'] = rus_df['post_datetime'].dt.weekday

rus_df['positive_reactions'] = rus_df['reactions'].apply(lambda x: x.get('👍', 0) + x.get('😁', 0) + x.get('🔥', 0) + x.get('❤', 0) + x.get('🎉', 0) + x.get('🤣', 0) 
                                                        + x.get('🙏', 0) if pd.notna(x) else 0)

rus_df['negative_reactions'] = rus_df['reactions'].apply(lambda x: x.get('👎', 0) + x.get('🤡', 0) + x.get('🤬', 0) + x.get('💩', 0) + x.get('😢', 0) + x.get('🤮', 0) 
                                                         if pd.notna(x) else 0)

rus_df['net_reactions'] = rus_df['positive_reactions'] - rus_df['negative_reactions']

rus_result = rus_df.groupby(['phase', 'week_year']).agg(net_reactions=('net_reactions', 'sum'),
                                                    days_captured=('weekday_number', 
                                                                   lambda x: x.max() - x.min() + 1)).reset_index()

# result_int_df.to_csv("check_int.csv", index=False)
rus_result['mean_net_reactions'] = round(rus_result['net_reactions'] / rus_result['days_captured']).astype(int)
rus_result = rus_result[1:]
rus_result = rus_result[~((rus_result['phase'] == "phase 6") & (rus_result['week_year'] == '2023-00'))]

rus_result = rus_result.reset_index(drop=True)

rus_grouped_data = [rus_result['mean_net_reactions'][rus_result['phase'] == phase] for phase in rus_result['phase'].unique()]

In [13]:
# one way ANOVA
rus_f_statistic, rus_p_value = f_oneway(*rus_grouped_data)

# Prrus the results
print(f'F-statistic: {rus_f_statistic}')
print(f'P-value: {rus_p_value}\n')

model = ols('mean_net_reactions ~ C(phase)', rus_result).fit()
# Perform ANOVA and prrus the results
aov_table = sm.stats.anova_lm(model, typ=2)
print(aov_table)

F-statistic: 15.394458752436904
P-value: 1.58474558511988e-10

                sum_sq    df          F        PR(>F)
C(phase)  5.378583e+10   6.0  15.394459  1.584746e-10
Residual  3.435613e+10  59.0        NaN           NaN


In [14]:
# contrast 1: 0 vs rest
print("Contrast 1: 0 vs rest\n")

stat_test(rus_result['mean_net_reactions'][rus_result['phase'].isin(["phase 0"])], 
     rus_result['mean_net_reactions'][rus_result['phase'].isin(["phase 1", "phase 2", "phase 3", "phase 4", "phase 5", "phase 6"])])

print("=====================================================================================================")

# contrast 2: phase1, phase2, phase3 vs phase4, phase5, phase6
print("Contrast 2: phase1, phase2, phase3 vs phase4, phase5, phase6\n")

stat_test(rus_result['mean_net_reactions'][rus_result['phase'].isin(["phase 1", "phase 2", "phase 3"])], 
     rus_result['mean_net_reactions'][rus_result['phase'].isin(["phase 4", "phase 5", "phase 6"])])

print("=====================================================================================================")

# contrast 3: phase1, phase2 vs phase3
print("Contrast 3: phase1, phase2 vs phase3\n")

stat_test(rus_result['mean_net_reactions'][rus_result['phase'].isin(["phase 1", "phase 2"])],
        rus_result['mean_net_reactions'][rus_result['phase'].isin(["phase 3"])])

print("=====================================================================================================")

# contrast 4: phase4 vs phase5, phase6
print("Contrast 4: phase4 vs phase5, phase6\n")

stat_test(rus_result['mean_net_reactions'][rus_result['phase'].isin(["phase 4"])],
        rus_result['mean_net_reactions'][rus_result['phase'].isin(["phase 5", "phase 6"])])

print("=====================================================================================================")

# contrast 5: phase1 vs phase2
print("Contrast 5: phase1 vs phase2\n")

stat_test(rus_result['mean_net_reactions'][rus_result['phase'].isin(["phase 1"])],
        rus_result['mean_net_reactions'][rus_result['phase'].isin(["phase 2"])])

print("=====================================================================================================")

# contrast 6: phase5 vs phase6
print("Contrast 6: phase5 vs phase6\n")

stat_test(rus_result['mean_net_reactions'][rus_result['phase'].isin(["phase 5"])],
        rus_result['mean_net_reactions'][rus_result['phase'].isin(["phase 6"])])

print("=====================================================================================================")

Contrast 1: 0 vs rest

Levene p-value: 0.007589607617203904
Unequal variance t-test stat: -7.548821182526978
Unequal variance t-test p-value: 3.4537452157336907e-10
degree of freedom (unequal variance): 58.14660184049598
Reject the null hypothesis - significant difference between groups
Contrast 2: phase1, phase2, phase3 vs phase4, phase5, phase6

Levene p-value: 0.226362861155471
Equal variance t-test stat: 1.3001844082498486
Equal variance t-test p-value: 0.19886370420458738
Degrees of freedom (equal variance): 56
Fail to reject the null hypothesis - no significant difference between groups
Contrast 3: phase1, phase2 vs phase3

Levene p-value: 0.011730645730391877
Unequal variance t-test stat: 4.233707663604048
Unequal variance t-test p-value: 0.0006726072158144296
degree of freedom (unequal variance): 15.52461053049394
Reject the null hypothesis - significant difference between groups
Contrast 4: phase4 vs phase5, phase6

Levene p-value: 0.509871574271353
Equal variance t-test stat:

### Topic 3: Combat and Frontline Updates

In [15]:
war_df = df[(df['category'] == "Combat and Frontline updates") & (df['phase'] == "phase 6")]

war_df['positive_reactions'] = war_df['reactions'].apply(lambda x: x.get('👍', 0) + x.get('😁', 0) + x.get('🔥', 0) + x.get('❤', 0) + x.get('🎉', 0) + x.get('🤣', 0) 
                                                        + x.get('🙏', 0) if pd.notna(x) else 0)

war_df['negative_reactions'] = war_df['reactions'].apply(lambda x: x.get('👎', 0) + x.get('🤡', 0) + x.get('🤬', 0) + x.get('💩', 0) + x.get('😢', 0) + x.get('🤮', 0) 
                                                         if pd.notna(x) else 0)

war_df['positive_reactions'].sum(), war_df['negative_reactions'].sum(), (war_df['positive_reactions'].sum() + war_df['negative_reactions'].sum())

print("positive %", round(war_df['positive_reactions'].sum() / (war_df['positive_reactions'].sum() + war_df['negative_reactions'].sum()) * 100,1))
print("negative %", round(war_df['negative_reactions'].sum() / (war_df['positive_reactions'].sum() + war_df['negative_reactions'].sum()) * 100,1))

positive % 67.4
negative % 32.6


In [16]:
war_df = df[df['category'] == "Combat and Frontline updates"]
war_df['week_year'] = war_df['post_datetime'].dt.strftime('%Y-%W')
war_df['weekday_number'] = war_df['post_datetime'].dt.weekday

war_df['positive_reactions'] = war_df['reactions'].apply(lambda x: x.get('👍', 0) + x.get('😁', 0) + x.get('🔥', 0) + x.get('❤', 0) + x.get('🎉', 0) + x.get('🤣', 0) 
                                                        + x.get('🙏', 0) if pd.notna(x) else 0)

war_df['negative_reactions'] = war_df['reactions'].apply(lambda x: x.get('👎', 0) + x.get('🤡', 0) + x.get('🤬', 0) + x.get('💩', 0) + x.get('😢', 0) + x.get('🤮', 0) 
                                                         if pd.notna(x) else 0)

war_df['net_reactions'] = war_df['positive_reactions'] - war_df['negative_reactions']

war_result = war_df.groupby(['phase', 'week_year']).agg(net_reactions=('net_reactions', 'sum'),
                                                    days_captured=('weekday_number', 
                                                                   lambda x: x.max() - x.min() + 1)).reset_index()

# result_int_df.to_csv("check_int.csv", index=False)
war_result['mean_net_reactions'] = round(war_result['net_reactions'] / war_result['days_captured']).astype(int)
war_result = war_result[1:]
war_result = war_result[~((war_result['phase'] == "phase 6") & (war_result['week_year'] == '2023-00'))]

war_result = war_result.reset_index(drop=True)

war_grouped_data = [war_result['mean_net_reactions'][war_result['phase'] == phase] for phase in war_result['phase'].unique()]

In [17]:
# one way ANOVA
war_f_statistic, war_p_value = f_oneway(*war_grouped_data)

# print the results
print(f'F-statistic: {war_f_statistic}')
print(f'P-value: {war_p_value}\n')

model = ols('mean_net_reactions ~ C(phase)', war_result).fit()
# Perform ANOVA and print the results
aov_table = sm.stats.anova_lm(model, typ=2)
print(aov_table)

F-statistic: 12.142121322630183
P-value: 9.060681313141336e-08

                sum_sq    df          F        PR(>F)
C(phase)  1.221591e+11   5.0  12.142121  9.060681e-08
Residual  1.026199e+11  51.0        NaN           NaN


In [18]:
# contrast 1: 0 vs rest
print("Contrast 1: 0 vs rest\n")

stat_test(war_result['mean_net_reactions'][war_result['phase'].isin(["phase 0"])], 
     war_result['mean_net_reactions'][war_result['phase'].isin(["phase 1", "phase 2", "phase 3", "phase 4", "phase 5", "phase 6"])])

print("=====================================================================================================")

# contrast 2: phase1, phase2, phase3 vs phase4, phase5, phase6
print("Contrast 2: phase1, phase2, phase3 vs phase4, phase5, phase6\n")

stat_test(war_result['mean_net_reactions'][war_result['phase'].isin(["phase 1", "phase 2", "phase 3"])], 
     war_result['mean_net_reactions'][war_result['phase'].isin(["phase 4", "phase 5", "phase 6"])])

print("=====================================================================================================")

# contrast 3: phase1, phase2 vs phase3
print("Contrast 3: phase1, phase2 vs phase3\n")

stat_test(war_result['mean_net_reactions'][war_result['phase'].isin(["phase 1", "phase 2"])],
        war_result['mean_net_reactions'][war_result['phase'].isin(["phase 3"])])

print("=====================================================================================================")

# contrast 4: phase4 vs phase5, phase6
print("Contrast 4: phase4 vs phase5, phase6\n")

stat_test(war_result['mean_net_reactions'][war_result['phase'].isin(["phase 4"])],
        war_result['mean_net_reactions'][war_result['phase'].isin(["phase 5", "phase 6"])])

print("=====================================================================================================")

# contrast 5: phase1 vs phase2
print("Contrast 5: phase1 vs phase2\n")

stat_test(war_result['mean_net_reactions'][war_result['phase'].isin(["phase 1"])],
        war_result['mean_net_reactions'][war_result['phase'].isin(["phase 2"])])

print("=====================================================================================================")

# contrast 6: phase5 vs phase6
print("Contrast 6: phase5 vs phase6\n")

stat_test(war_result['mean_net_reactions'][war_result['phase'].isin(["phase 5"])],
        war_result['mean_net_reactions'][war_result['phase'].isin(["phase 6"])])

print("=====================================================================================================")

Contrast 1: 0 vs rest

Levene p-value: nan
Unequal variance t-test stat: nan
Unequal variance t-test p-value: nan
degree of freedom (unequal variance): nan
Fail to reject the null hypothesis - no significant difference between groups
Contrast 2: phase1, phase2, phase3 vs phase4, phase5, phase6

Levene p-value: 0.01789405136635797
Unequal variance t-test stat: 2.014082035452751
Unequal variance t-test p-value: 0.05371360221849337
degree of freedom (unequal variance): 27.95913238769071
Fail to reject the null hypothesis - no significant difference between groups
Contrast 3: phase1, phase2 vs phase3

Levene p-value: 0.2636410378282402
Equal variance t-test stat: 4.412219655591652
Equal variance t-test p-value: 0.000299286503616025
Degrees of freedom (equal variance): 19
Reject the null hypothesis - significant difference between groups
Contrast 4: phase4 vs phase5, phase6

Levene p-value: 0.10891038033819914
Equal variance t-test stat: -4.026170175345601
Equal variance t-test p-value: 0.0

### Topic 4: Ukrainian Domestic Affairs

In [19]:
ukr_df = df[(df['category'] == "Ukranian domestic affairs") & (df['phase'] == "phase 5")]

ukr_df['positive_reactions'] = ukr_df['reactions'].apply(lambda x: x.get('👍', 0) + x.get('😁', 0) + x.get('🔥', 0) + x.get('❤', 0) + x.get('🎉', 0) + x.get('🤣', 0) 
                                                        + x.get('🙏', 0) if pd.notna(x) else 0)

ukr_df['negative_reactions'] = ukr_df['reactions'].apply(lambda x: x.get('👎', 0) + x.get('🤡', 0) + x.get('🤬', 0) + x.get('💩', 0) + x.get('😢', 0) + x.get('🤮', 0) 
                                                         if pd.notna(x) else 0)


print("positive %", round(ukr_df['positive_reactions'].sum() / (ukr_df['positive_reactions'].sum() + ukr_df['negative_reactions'].sum()) * 100,1))
print("negative %", round(ukr_df['negative_reactions'].sum() / (ukr_df['positive_reactions'].sum() + ukr_df['negative_reactions'].sum()) * 100,1))

positive % 63.5
negative % 36.5


In [20]:
ukr_df = df[df['category'] == "Ukranian domestic affairs"]
ukr_df['week_year'] = ukr_df['post_datetime'].dt.strftime('%Y-%W')
ukr_df['weekday_number'] = ukr_df['post_datetime'].dt.weekday

ukr_df['positive_reactions'] = ukr_df['reactions'].apply(lambda x: x.get('👍', 0) + x.get('😁', 0) + x.get('🔥', 0) + x.get('❤', 0) + x.get('🎉', 0) + x.get('🤣', 0) 
                                                        + x.get('🙏', 0) if pd.notna(x) else 0)

ukr_df['negative_reactions'] = ukr_df['reactions'].apply(lambda x: x.get('👎', 0) + x.get('🤡', 0) + x.get('🤬', 0) + x.get('💩', 0) + x.get('😢', 0) + x.get('🤮', 0) 
                                                         if pd.notna(x) else 0)

ukr_df['net_reactions'] = ukr_df['positive_reactions'] - ukr_df['negative_reactions']

ukr_result = ukr_df.groupby(['phase', 'week_year']).agg(net_reactions=('net_reactions', 'sum'),
                                                    days_captured=('weekday_number', 
                                                                   lambda x: x.max() - x.min() + 1)).reset_index()

# result_int_df.to_csv("check_int.csv", index=False)
ukr_result['mean_net_reactions'] = round(ukr_result['net_reactions'] / ukr_result['days_captured']).astype(int)
ukr_result = ukr_result[1:]
ukr_result = ukr_result[~((war_result['phase'] == "phase 6") & (ukr_result['week_year'] == '2023-00'))]

ukr_result = ukr_result.reset_index(drop=True)

ukr_grouped_data = [ukr_result['mean_net_reactions'][ukr_result['phase'] == phase] for phase in ukr_result['phase'].unique()]

In [21]:
# one way ANOVA
ukr_f_statistic, ukr_p_value = f_oneway(*ukr_grouped_data)

# print the results
print(f'F-statistic: {ukr_f_statistic}')
print(f'P-value: {ukr_p_value}\n')

model = ols('mean_net_reactions ~ C(phase)', ukr_result).fit()
# Perform ANOVA and print the results
aov_table = sm.stats.anova_lm(model, typ=2)
print(aov_table)

F-statistic: 2.5898922160044053
P-value: 0.02699588480911408

                sum_sq    df         F    PR(>F)
C(phase)  1.181957e+11   6.0  2.589892  0.026996
Residual  4.487668e+11  59.0       NaN       NaN


### Topic 5: Economy 

In [22]:
eco_df = df[(df['category'] == "Economy") & (df['phase'] == "phase 3")]

eco_df['positive_reactions'] = eco_df['reactions'].apply(lambda x: x.get('👍', 0) + x.get('😁', 0) + x.get('🔥', 0) + x.get('❤', 0) + x.get('🎉', 0) + x.get('🤣', 0) 
                                                        + x.get('🙏', 0) if pd.notna(x) else 0)

eco_df['negative_reactions'] = eco_df['reactions'].apply(lambda x: x.get('👎', 0) + x.get('🤡', 0) + x.get('🤬', 0) + x.get('💩', 0) + x.get('😢', 0) + x.get('🤮', 0) 
                                                         if pd.notna(x) else 0)

eco_df['positive_reactions'].sum(), eco_df['negative_reactions'].sum(), (eco_df['positive_reactions'].sum() + eco_df['negative_reactions'].sum())

print("positive %", eco_df['positive_reactions'].sum() / (eco_df['positive_reactions'].sum() + eco_df['negative_reactions'].sum()) * 100)
print("negative %", eco_df['negative_reactions'].sum() / (eco_df['positive_reactions'].sum() + eco_df['negative_reactions'].sum()) * 100)

positive % 66.1767605811374
negative % 33.82323941886259


In [23]:
eco_df = df[df['category'] == "Economy"]
eco_df['week_year'] = eco_df['post_datetime'].dt.strftime('%Y-%W')
eco_df['weekday_number'] = eco_df['post_datetime'].dt.weekday

eco_df['positive_reactions'] = eco_df['reactions'].apply(lambda x: x.get('👍', 0) + x.get('😁', 0) + x.get('🔥', 0) + x.get('❤', 0) + x.get('🎉', 0) + x.get('🤣', 0) 
                                                        + x.get('🙏', 0) if pd.notna(x) else 0)

eco_df['negative_reactions'] = eco_df['reactions'].apply(lambda x: x.get('👎', 0) + x.get('🤡', 0) + x.get('🤬', 0) + x.get('💩', 0) + x.get('😢', 0) + x.get('🤮', 0) 
                                                         if pd.notna(x) else 0)

eco_df['net_reactions'] = eco_df['positive_reactions'] - eco_df['negative_reactions']

eco_result = eco_df.groupby(['phase', 'week_year']).agg(net_reactions=('net_reactions', 'sum'),
                                                    days_captured=('weekday_number', 
                                                                   lambda x: x.max() - x.min() + 1)).reset_index()

# result_int_df.to_csv("check_int.csv", index=False)
eco_result['mean_net_reactions'] = round(eco_result['net_reactions'] / eco_result['days_captured']).astype(int)
eco_result = eco_result[1:]
eco_result = eco_result[~((eco_result['phase'] == "phase 6") & (eco_result['week_year'] == '2023-00'))]

eco_result = eco_result.reset_index(drop=True)

eco_grouped_data = [eco_result['mean_net_reactions'][eco_result['phase'] == phase] for phase in eco_result['phase'].unique()]

In [24]:
# one way ANOVA
eco_f_statistic, eco_p_value = f_oneway(*eco_grouped_data)

# print the results
print(f'F-statistic: {eco_f_statistic}')
print(f'P-value: {eco_p_value}\n')

model = ols('mean_net_reactions ~ C(phase)', eco_result).fit()
# Perform ANOVA and print the results
aov_table = sm.stats.anova_lm(model, typ=2)
print(aov_table)

F-statistic: 34.20228248685103
P-value: 1.8311312814366938e-17

                sum_sq    df          F        PR(>F)
C(phase)  7.952138e+10   6.0  34.202282  1.831131e-17
Residual  2.286281e+10  59.0        NaN           NaN


In [25]:
# contrast 1: 0 vs rest
print("Contrast 1: 0 vs rest\n")

stat_test(eco_result['mean_net_reactions'][eco_result['phase'].isin(["phase 0"])], 
     eco_result['mean_net_reactions'][eco_result['phase'].isin(["phase 1", "phase 2", "phase 3", "phase 4", "phase 5", "phase 6"])])

print("=====================================================================================================")

# contrast 2: phase1, phase2, phase3 vs phase4, phase5, phase6
print("Contrast 2: phase1, phase2, phase3 vs phase4, phase5, phase6\n")

stat_test(eco_result['mean_net_reactions'][eco_result['phase'].isin(["phase 1", "phase 2", "phase 3"])], 
     eco_result['mean_net_reactions'][eco_result['phase'].isin(["phase 4", "phase 5", "phase 6"])])

print("=====================================================================================================")

# contrast 3: phase1, phase2 vs phase3
print("Contrast 3: phase1, phase2 vs phase3\n")

stat_test(eco_result['mean_net_reactions'][eco_result['phase'].isin(["phase 1", "phase 2"])],
        eco_result['mean_net_reactions'][eco_result['phase'].isin(["phase 3"])])

print("=====================================================================================================")

# contrast 4: phase4 vs phase5, phase6
print("Contrast 4: phase4 vs phase5, phase6\n")

stat_test(eco_result['mean_net_reactions'][eco_result['phase'].isin(["phase 4"])],
        eco_result['mean_net_reactions'][eco_result['phase'].isin(["phase 5", "phase 6"])])

print("=====================================================================================================")

# contrast 5: phase1 vs phase2
print("Contrast 5: phase1 vs phase2\n")

stat_test(eco_result['mean_net_reactions'][eco_result['phase'].isin(["phase 1"])],
        eco_result['mean_net_reactions'][eco_result['phase'].isin(["phase 2"])])

print("=====================================================================================================")

# contrast 6: phase5 vs phase6
print("Contrast 6: phase5 vs phase6\n")

stat_test(eco_result['mean_net_reactions'][eco_result['phase'].isin(["phase 5"])],
        eco_result['mean_net_reactions'][eco_result['phase'].isin(["phase 6"])])

print("=====================================================================================================")

Contrast 1: 0 vs rest

Levene p-value: 0.1588173651246088
Equal variance t-test stat: -1.9364526906583743
Equal variance t-test p-value: 0.05723033253531658
Degrees of freedom (equal variance): 64
Fail to reject the null hypothesis - no significant difference between groups
Contrast 2: phase1, phase2, phase3 vs phase4, phase5, phase6

Levene p-value: 0.0014210955456469211
Unequal variance t-test stat: 3.5680005666482755
Unequal variance t-test p-value: 0.0017528955584100172
degree of freedom (unequal variance): 21.635984611453853
Reject the null hypothesis - significant difference between groups
Contrast 3: phase1, phase2 vs phase3

Levene p-value: 0.060583244569754564
Equal variance t-test stat: 2.494059843501682
Equal variance t-test p-value: 0.021504471013300493
Degrees of freedom (equal variance): 20
Reject the null hypothesis - significant difference between groups
Contrast 4: phase4 vs phase5, phase6

Levene p-value: 0.18612105749220412
Equal variance t-test stat: 1.5036176051394