In [28]:
import csv
import json
import os
import random

import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer as CV
import nltk
import kenlm
import re

from scipy.stats import ttest_ind, ttest_rel

import matplotlib.pyplot as plt
from IPython.display import display, HTML

In [2]:
sfp_data = pd.read_csv("../sample_data/sfp_data_v4.csv")

In [3]:
td_data = pd.read_csv("../sample_data/td_data_v4.csv")

In [4]:
sfp_authors = sfp_data['author'].unique()
td_authors = td_data['author'].unique()

sfp_posts = sfp_data.loc[sfp_data['subreddit'] == 'SandersForPresident']
td_posts = td_data.loc[td_data['subreddit'] == 'The_Donald']

sfp_min_times = [min(sfp_posts.loc[sfp_posts['author'] == auth]['created_utc']) for auth in sfp_authors]
td_min_times = [min(td_posts.loc[td_posts['author'] == auth]['created_utc']) for auth in td_authors]

In [5]:
print(sfp_data.shape)
print(td_data.shape)
print(len(sfp_authors))
print(len(td_authors))
print(sfp_posts.shape)
print(td_posts.shape)

(4868749, 5)
(4003940, 5)
2000
2000
(78037, 5)
(55521, 5)


In [13]:
min_overall_time = 1420131600
max_overall_time = 1464753600

In [16]:
sfp_politics = sfp_data.loc[sfp_data['subreddit'] == 'politics']
td_politics = td_data.loc[td_data['subreddit'] == 'politics']

sfp_politics = sfp_politics.loc[sfp_politics['created_utc'] >= min_overall_time]
sfp_politics = sfp_politics.loc[sfp_politics['created_utc'] < max_overall_time]

td_politics = td_politics.loc[td_politics['created_utc'] >= min_overall_time]
td_politics = td_politics.loc[td_politics['created_utc'] < max_overall_time]

print(sfp_politics.shape)
print(td_politics.shape)

(131076, 5)
(42352, 5)


In [17]:
sfp_politics_before = pd.DataFrame([], columns = sfp_politics.columns)
sfp_politics_after = pd.DataFrame([], columns = sfp_politics.columns)

for i in range(len(sfp_authors)):
    curr_auth_posts = sfp_politics.loc[sfp_politics['author'] == sfp_authors[i]]
    
    if len(curr_auth_posts) > 0:
        sfp_politics_before = \
sfp_politics_before.append(curr_auth_posts.loc[curr_auth_posts['created_utc'] < sfp_min_times[i]])
        
        sfp_politics_after = \
sfp_politics_after.append(curr_auth_posts.loc[curr_auth_posts['created_utc'] > sfp_min_times[i]])
    

In [18]:
td_politics_before = pd.DataFrame([], columns = td_politics.columns)
td_politics_after = pd.DataFrame([], columns = td_politics.columns)

for i in range(len(td_authors)):
    curr_auth_posts = td_politics.loc[td_politics['author'] == td_authors[i]]
    
    if len(curr_auth_posts) > 0:
        td_politics_before = \
td_politics_before.append(curr_auth_posts.loc[curr_auth_posts['created_utc'] < td_min_times[i]])
        
        td_politics_after = \
td_politics_after.append(curr_auth_posts.loc[curr_auth_posts['created_utc'] > td_min_times[i]])
    

In [19]:
sfp_politics_before_text = sfp_politics_before.loc[sfp_politics_before['body'].notna()]['body'].values
sfp_politics_after_text = sfp_politics_after.loc[sfp_politics_after['body'].notna()]['body'].values

td_politics_before_text = td_politics_before.loc[td_politics_before['body'].notna()]['body'].values
td_politics_after_text = td_politics_after.loc[td_politics_after['body'].notna()]['body'].values

sfp_posts_text = sfp_posts.loc[sfp_posts['body'].notna()]['body'].values
td_posts_text = td_posts.loc[td_posts['body'].notna()]['body'].values

In [20]:
def tokenize_data(data, deleted_str = '[deleted]', removed_str = '[removed]', verbose=True):
    if(verbose):
        print("Number of entries in original dataset: " + str(len(data)))
    data1 = [i for i in data if 'thank you for participating' not in i.lower()]
    if(verbose):
        print("Number of moderator posts removed from dataset: " + str(len([i for i in data if 'thank you for participating' in i.lower()])))
    data1 = [i for i in data1 if i != deleted_str and i != removed_str]
    if(verbose):
        print("Number of user-deleted posts removed from dataset: " + str(len([i for i in data if i == deleted_str])))
        print("Number of mod/admin-deleted posts removed from dataset: " + str(len([i for i in data if i == removed_str])))
    
        print("Number of entries in modified dataset: " + str(len(data1)))
    
    # removing hyperlinks
        print("Number of comments with a hyperlink in modified dataset: " + str(len([i for i in data1 if "http" in i])))
    
    data1 = [re.sub(r'https?:\/\/[\S]+', ' ', i, flags=re.MULTILINE) for i in data1]
    
    # common escape sequences
    data1 = [re.sub(r'\n', '', i) for i in data1]
    data1 = [re.sub(r'&gt;', '', i) for i in data1]
    
    # convert to lowercase
    data1 = [i.lower() for i in data1]
    
    # keep user and subreddit tags
    if(verbose):
        print("Number of comments that mention a user in the modified dataset: " + str(len([i for i in data1 if "/u/" in i])))
        print("Number of comments that mention a subreddit in the modified dataset: " + str(len([i for i in data1 if "/r/" in i])))
    
    data1 = [re.sub(r'/u/', '_user_', i) for i in data1]
    data1 = [re.sub(r'/r/', '_subreddit_', i) for i in data1]
    
    data_tokenized = [re.findall(r'\w+', i.lower()) for i in data1]
    
    if(verbose):
        print("Number of total tokens: " + str(sum([len(d) for d in data_tokenized])))
    
    return [(' '.join(i) + '\n') for i in data_tokenized]


In [23]:
text_arrs = [sfp_politics_before_text,
             sfp_politics_after_text,
             td_politics_before_text,
             td_politics_after_text,
             sfp_posts_text,
             td_posts_text]

test_arrs = []

for i in range(len(text_arrs)):
    lm_test_arr = tokenize_data(text_arrs[i])
    print()
    test_arrs.append(lm_test_arr)

Number of entries in original dataset: 28504
Number of moderator posts removed from dataset: 0
Number of user-deleted posts removed from dataset: 0
Number of mod/admin-deleted posts removed from dataset: 0
Number of entries in modified dataset: 28504
Number of comments with a hyperlink in modified dataset: 1523
Number of comments that mention a user in the modified dataset: 79
Number of comments that mention a subreddit in the modified dataset: 232
Number of total tokens: 1226203

Number of entries in original dataset: 102572
Number of moderator posts removed from dataset: 52
Number of user-deleted posts removed from dataset: 1
Number of mod/admin-deleted posts removed from dataset: 0
Number of entries in modified dataset: 102519
Number of comments with a hyperlink in modified dataset: 6326
Number of comments that mention a user in the modified dataset: 201
Number of comments that mention a subreddit in the modified dataset: 731
Number of total tokens: 4335851

Number of entries in ori

In [24]:
for i in range(len(test_arrs)):
    print(len(test_arrs[i]))
    print(test_arrs[i][:10])
    print("\n\n")

28504
['what state do you live in\n', 'awesome\n', 'a link to the actual bill\n', 'can you provide a source proving that planned parenthood is using donations to some kind of real benefit how are they getting around the law are they not just donating to the scientific community from the article it turns out nucatola wasn t discussing the illegal black market sale of fetal organs but instead the perfectly legal donation of the organs to biomedical research laboratories that use the organs to help save lives the price range of the organs described by nucatola is in reality the reimbursed expenses accrued in the delivery of the specimens\n', 'which was a total misrepresentation of the question he brought up reassignment himself and then went on to describe what the military does standard garbage response\n', 'what if hillary gets the nomination despite the popular vote being solidly behind sanders if the democratic base is convinced the party went against the will of the people in favor o

In [25]:
for i in range(len(test_arrs)):
    print(len(test_arrs[i]))

28504
102519
23854
18497
78036
55512


In [26]:
filenames = ['politics_train.arpa',
             'sfp_politics_before.arpa', 
             'sfp_politics_after.arpa',
             'td_politics_before.arpa',
             'td_politics_after.arpa',
             'sfp_posts.arpa',
             'td_posts.arpa']

In [27]:
language_models = []

for i in range(len(filenames)):
    curr_lm = os.path.join(os.path.dirname('Language models.ipynb'), '..', 'language_models', filenames[i])
    curr_model = kenlm.LanguageModel(curr_lm)
    print('{0}-gram model'.format(curr_model.order))
    language_models.append(curr_model)

2-gram model
2-gram model
2-gram model
2-gram model
2-gram model
2-gram model
2-gram model


In [19]:
def bootstrap(text1, text2, scoring_func, num_iters = 1000, sample_size = 1000):
    lower_size = min(len(text1), len(text2))
    text1_new = np.random.choice(text1, lower_size, replace=False)
    text2_new = np.random.choice(text2, lower_size, replace=False)
    
    scores_all = [scoring_func(s) for s in (text1 + text2)]
    bootstrap1 = []
    bootstrap2 = []
    random.seed(10)
    
    for i in range(num_iters):
        scores1 = random.sample(scores_all, sample_size)
        scores2 = random.sample(scores_all, sample_size)
        
        bootstrap1.append(sum(scores1)/sample_size)
        bootstrap2.append(sum(scores2)/sample_size)
        
    return (bootstrap1, bootstrap2)  

In [20]:
def calculate_values(text1, text2, scoring_func, num_iters = 1000, sample_size = 1000):
    random.seed(10)
    text1_sample = random.sample(text1, sample_size)
    text2_sample = random.sample(text2, sample_size)
    scores1 = [scoring_func(s) for s in text1_sample]
    scores2 = [scoring_func(s) for s in text2_sample]

    mean1 = sum(scores1)/len(scores1)
    mean2 = sum(scores2)/len(scores2)
    print(mean1)
    print(mean2)
    print()
    
    bootstrap1, bootstrap2 = bootstrap(text1, text2, scoring_func, num_iters)
    model_diff = mean1 - mean2
    print(model_diff)
    print()
    
    bootstrap_diffs = [bootstrap1[i] - bootstrap2[i] for i in range(len(bootstrap1))]
    print(sum([i > model_diff for i in bootstrap_diffs])/len(bootstrap_diffs))

In [None]:
def calculate_values_paired(text1, text2, scoring_func):
    random.seed(10)
    text1_sample = random.sample(text1, sample_size)
    text2_sample = random.sample(text2, sample_size)
    scores1 = [scoring_func(s) for s in text1_sample]
    scores2 = [scoring_func(s) for s in text2_sample]

    mean1 = sum(scores1)/len(scores1)
    mean2 = sum(scores2)/len(scores2)
    print(mean1)
    print(mean2)
    print()
    
    
    bootstrap1, bootstrap2 = bootstrap(text1, text2, scoring_func, num_iters)
    model_diff = mean1 - mean2
    print(model_diff)
    print()
    
    bootstrap_diffs = [bootstrap1[i] - bootstrap2[i] for i in range(len(bootstrap1))]
    print(sum([i > model_diff for i in bootstrap_diffs])/len(bootstrap_diffs))

In [29]:
def calculate_values_ind(text1, text2, scoring_func):
    scores1 = [scoring_func(s) for s in text1]
    scores2 = [scoring_func(s) for s in text2]

    mean1 = sum(scores1)/len(scores1)
    mean2 = sum(scores2)/len(scores2)
    print(mean1)
    print(mean2)
    print()
    
    print(ttest_ind(scores1, scores2))

In [70]:
# sfp_politics_before, td_politics_before, sfp_politics_before
calculate_values(test_arrs[0], test_arrs[2], language_models[1].perplexity, num_iters=1000, sample_size = 20000)

466.0915190667158
464.0644101356655

2.027108931050293

0.512


In [71]:
# sfp_politics_before, td_politics_before, td_politics_before
calculate_values(test_arrs[0], test_arrs[2], language_models[3].perplexity, num_iters=1000, sample_size = 20000)

415.26142824930616
390.3400073401792

24.921420909126937

0.438


In [72]:
# sfp_politics_after, td_politics_after, sfp_politics_after
calculate_values(test_arrs[1], test_arrs[3], language_models[2].perplexity, num_iters=1000, sample_size = 20000)

348.7865394483283
516.1244845746966

-167.33794512636825

0.877


In [73]:
# sfp_politics_after, td_politics_after, td_politics_after
calculate_values(test_arrs[1], test_arrs[3], language_models[4].perplexity, num_iters=1000, sample_size = 20000)

352.5069705611644
414.18445782481314

-61.67748726364874

0.82


In [74]:
# sfp_politics_before, td_politics_before, sfp_posts
calculate_values(test_arrs[0], test_arrs[2], language_models[5].perplexity, num_iters=1000, sample_size = 20000)

452.27702955300754
442.24635122921

10.030678323797531

0.478


In [75]:
# sfp_politics_before, td_politics_before, td_posts
calculate_values(test_arrs[0], test_arrs[2], language_models[6].perplexity, num_iters=1000, sample_size = 20000)

411.0995839632289
390.2742375488254

20.825346414403498

0.376


In [76]:
# sfp_politics_after, td_politics_after, sfp_posts
calculate_values(test_arrs[1], test_arrs[3], language_models[5].perplexity, num_iters=1000, sample_size = 20000)

361.5910337341523
526.1723508792212

-164.5813171450689

0.92


In [77]:
# sfp_politics_after, td_politics_after, td_posts
calculate_values(test_arrs[1], test_arrs[3], language_models[6].perplexity, num_iters=1000, sample_size = 20000)

357.7513617089054
376.5504233294564

-18.799061620551015

0.607


In [78]:
# sfp_politics_before, td_politics_before, politics
calculate_values(test_arrs[0], test_arrs[2], language_models[0].perplexity, num_iters=1000, sample_size = 20000)

386.99331096744527
395.1014610810287

-8.108150113583406

0.531


In [79]:
# sfp_politics_after, td_politics_after, politics
calculate_values(test_arrs[1], test_arrs[3], language_models[0].perplexity, num_iters=1000, sample_size = 20000)

349.8563032214515
439.71758078008185

-89.86127755863032

0.93


In [80]:
# sfp_posts, td_posts, sfp_posts
calculate_values(test_arrs[4], test_arrs[5], language_models[5].perplexity, num_iters=1000, sample_size = 20000)

372.3425881771748
1437.7450103397887

-1065.402422162614

0.998


In [81]:
# sfp_posts, td_posts, td_posts
calculate_values(test_arrs[4], test_arrs[5], language_models[6].perplexity, num_iters=1000, sample_size = 20000)

383.91765532531605
658.0044753733365

-274.08682004802046

0.963


In [82]:
# td_posts, sfp_posts, td_posts
calculate_values(test_arrs[5], test_arrs[4], language_models[6].perplexity, num_iters=1000, sample_size = 20000)

654.1731912362627
437.5470999445293

216.62609129173342

0.069


In [83]:
# sfp_posts, td_posts, politics
calculate_values(test_arrs[4], test_arrs[5], language_models[0].perplexity, num_iters=1000, sample_size = 20000)

393.96497182012
987.3871161287846

-593.4221443086645

0.999


In [84]:
# sfp_politics_before, sfp_politics_after, politics
calculate_values(test_arrs[0], test_arrs[1], language_models[0].perplexity, num_iters=1000, sample_size = 20000)

386.99331096744527
357.0470253137992

29.946285653646044

0.321


In [85]:
# td_politics_before, td_politics_after, politics
calculate_values(test_arrs[2], test_arrs[3], language_models[0].perplexity, num_iters=1000, sample_size = 20000)

380.8497134050213
452.1804452783587

-71.33073187333741

0.849


## Same but with cross entropy instead of perplexity

In [21]:
# sfp_politics_before, td_politics_before, sfp_politics_before
calculate_values(test_arrs[0], test_arrs[2], language_models[1].score, num_iters=1000, sample_size = 20000)

-40.99284445625543
-38.161164723038674

-2.8316797332167596

0.975


In [22]:
# sfp_politics_before, td_politics_before, td_politics_before
calculate_values(test_arrs[0], test_arrs[2], language_models[3].score, num_iters=1000, sample_size = 20000)

-41.45484392162562
-38.48047024730444

-2.9743736743211784

0.98


In [23]:
# sfp_politics_after, td_politics_after, sfp_politics_after
calculate_values(test_arrs[1], test_arrs[3], language_models[2].score, num_iters=1000, sample_size = 20000)

-38.13838319438696
-33.79882725139856

-4.339555942988397

0.998


In [24]:
# sfp_politics_after, td_politics_after, td_politics_after
calculate_values(test_arrs[1], test_arrs[3], language_models[4].score, num_iters=1000, sample_size = 20000)

-39.60858447471857
-34.6449614354372

-4.963623039281366

0.999


In [25]:
# sfp_politics_before, td_politics_before, sfp_posts
calculate_values(test_arrs[0], test_arrs[2], language_models[5].score, num_iters=1000, sample_size = 20000)

-42.761745394754406
-39.87905867060423

-2.882686724150176

0.971


In [26]:
# sfp_politics_before, td_politics_before, td_posts
calculate_values(test_arrs[0], test_arrs[2], language_models[6].score, num_iters=1000, sample_size = 20000)

-43.46981461167336
-40.32892970715761

-3.140884904515744

0.981


In [27]:
# sfp_politics_after, td_politics_after, sfp_posts
calculate_values(test_arrs[1], test_arrs[3], language_models[5].score, num_iters=1000, sample_size = 20000)

-39.028087789463996
-34.663414007985594

-4.364673781478402

0.998


In [28]:
# sfp_politics_after, td_politics_after, td_posts
calculate_values(test_arrs[1], test_arrs[3], language_models[6].score, num_iters=1000, sample_size = 20000)

-40.23067969937325
-34.85549478787184

-5.375184911501407

0.999


In [29]:
# sfp_politics_before, td_politics_before, politics
calculate_values(test_arrs[0], test_arrs[2], language_models[0].score, num_iters=1000, sample_size = 20000)

-43.261180846309664
-40.2992213262558

-2.961959520053867

0.973


In [30]:
# sfp_politics_after, td_politics_after, politics
calculate_values(test_arrs[1], test_arrs[3], language_models[0].score, num_iters=1000, sample_size = 20000)

-39.98813622492552
-35.2850863899827

-4.703049834942817

0.999


In [31]:
# sfp_posts, td_posts, sfp_posts
calculate_values(test_arrs[4], test_arrs[5], language_models[5].score, num_iters=1000, sample_size = 20000)

-36.33988372894525
-33.53964806175232

-2.8002356671929363

0.91


In [32]:
# sfp_posts, td_posts, td_posts
calculate_values(test_arrs[4], test_arrs[5], language_models[6].score, num_iters=1000, sample_size = 20000)

-37.99267998012304
-31.907608811330796

-6.085071168792247

0.988


In [33]:
# td_posts, sfp_posts, td_posts
calculate_values(test_arrs[5], test_arrs[4], language_models[6].score, num_iters=1000, sample_size = 20000)

-32.96674963059425
-37.55648960086107

4.58973997026682

0.02


In [34]:
# sfp_posts, td_posts, politics
calculate_values(test_arrs[4], test_arrs[5], language_models[0].score, num_iters=1000, sample_size = 20000)

-38.140959322738645
-34.11710799072981

-4.023851332008839

0.963


In [35]:
# sfp_politics_before, sfp_politics_after, politics
calculate_values(test_arrs[0], test_arrs[1], language_models[0].score, num_iters=1000, sample_size = 20000)

-43.261180846309664
-40.16015071448088

-3.101030131828786

0.981


In [36]:
# td_politics_before, td_politics_after, politics
calculate_values(test_arrs[2], test_arrs[3], language_models[0].score, num_iters=1000, sample_size = 20000)

-40.26308662309646
-35.172616477310655

-5.090470145785808

0.999


Almost all of these are p < 0.05, but the bootstrapping method seems suspect, so I should use something better. 

## Use a t-test

In [30]:
# sfp_politics_before, td_politics_before, sfp_politics_before
calculate_values_ind(test_arrs[0], test_arrs[2], language_models[1].score)

-120.963218138956
-110.61032555850322

Ttest_indResult(statistic=-6.732302343462776, pvalue=1.6871667718307063e-11)


In [31]:
# sfp_politics_before, td_politics_before, td_politics_before
calculate_values_ind(test_arrs[0], test_arrs[2], language_models[3].score)

-121.42584818385806
-110.8403906114565

Ttest_indResult(statistic=-6.838922919323715, pvalue=8.066378947210291e-12)


In [32]:
# sfp_politics_after, td_politics_after, sfp_politics_after
calculate_values_ind(test_arrs[1], test_arrs[3], language_models[2].score)

-117.70578388786899
-86.75577316455869

Ttest_indResult(statistic=-24.07462982723636, pvalue=9.230155122576456e-128)


In [33]:
# sfp_politics_after, td_politics_after, td_politics_after
calculate_values_ind(test_arrs[1], test_arrs[3], language_models[4].score)

-120.69819819193371
-87.87735906627917

Ttest_indResult(statistic=-24.849185357163726, pvalue=5.800221922592684e-136)


In [34]:
# sfp_politics_before, td_politics_before, sfp_posts
calculate_values_ind(test_arrs[0], test_arrs[2], language_models[5].score)

-122.944396003466
-113.06373171211838

Ttest_indResult(statistic=-6.210748957124003, pvalue=5.312781221067689e-10)


In [35]:
# sfp_politics_before, td_politics_before, td_posts
calculate_values_ind(test_arrs[0], test_arrs[2], language_models[6].score)

-124.33040786749721
-113.49665718767463

Ttest_indResult(statistic=-6.729717322636551, pvalue=1.717386727537896e-11)


In [36]:
# sfp_politics_after, td_politics_after, sfp_posts
calculate_values_ind(test_arrs[1], test_arrs[3], language_models[5].score)

-119.72738866990963
-88.33224752282248

Ttest_indResult(statistic=-23.88363855931886, pvalue=8.876011556885794e-126)


In [37]:
# sfp_politics_after, td_politics_after, td_posts
calculate_values_ind(test_arrs[1], test_arrs[3], language_models[6].score)

-122.24164702453253
-88.22415055580034

Ttest_indResult(statistic=-25.310515175826023, pvalue=5.710629924179657e-141)


In [39]:
# sfp_politics_before, td_politics_before, politics
calculate_values_ind(test_arrs[0], test_arrs[2], language_models[0].score)

-124.30374640226364
-113.96454048976321

Ttest_indResult(statistic=-6.480470301748532, pvalue=9.224664970309833e-11)


In [40]:
# sfp_politics_after, td_politics_after, politics
calculate_values_ind(test_arrs[1], test_arrs[3], language_models[0].score)

-121.96106240087934
-89.50404411971637

Ttest_indResult(statistic=-24.448181531396976, pvalue=1.1006954582946825e-131)


In [41]:
# sfp_posts, td_posts, sfp_posts
calculate_values_ind(test_arrs[4], test_arrs[5], language_models[5].score)

-97.99008184064631
-71.35517761280909

Ttest_indResult(statistic=-33.507022626772184, pvalue=3.996217477103932e-245)


In [42]:
# sfp_posts, td_posts, td_posts
calculate_values_ind(test_arrs[4], test_arrs[5], language_models[6].score)

-101.066193753447
-68.05382477948535

Ttest_indResult(statistic=-42.33169226761963, pvalue=0.0)


In [43]:
# td_posts, sfp_posts, td_posts
calculate_values_ind(test_arrs[5], test_arrs[4], language_models[6].score)

-68.05382477948535
-101.066193753447

Ttest_indResult(statistic=42.33169226761963, pvalue=0.0)


In [44]:
# sfp_posts, td_posts, politics
calculate_values_ind(test_arrs[4], test_arrs[5], language_models[0].score)

-101.69964701879812
-71.97838298174779

Ttest_indResult(statistic=-37.22797900401066, pvalue=8.603074743452662e-302)


In [45]:
# sfp_politics_before, sfp_politics_after, politics
calculate_values_ind(test_arrs[0], test_arrs[1], language_models[0].score)

-124.30374640226364
-121.96106240087934

Ttest_indResult(statistic=-2.0451283099572826, pvalue=0.04084420323411929)


In [46]:
# td_politics_before, td_politics_after, politics
calculate_values_ind(test_arrs[2], test_arrs[3], language_models[0].score)

-113.96454048976321
-89.50404411971637

Ttest_indResult(statistic=-14.599797337934946, pvalue=3.689276439978331e-48)


In [49]:
# sanity check
calculate_values_ind(random.sample(test_arrs[0], 1000), random.sample(test_arrs[0], 1000), language_models[0].score)

-126.20280684804916
-116.40277046489716

Ttest_indResult(statistic=-1.2691939738407458, pvalue=0.2045197137905985)


## Average over users first