In [1]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.formula.api import ols
from tqdm import tqdm_notebook
from IPython.display import display, Markdown, Latex
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.float_format', lambda x: '%.3f' % x)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
plt.rcParams.update({
    "text.usetex": True,
    "font.family": "sans-serif",
    "font.sans-serif": ["Helvetica"]})
%matplotlib inline
print_all_flag=0
significant=[]
mydpi=600
b=False
s=18

orange, purple, elm, grey ='#FFCC00', '#666599', '#217C7E', '#DCDCDC'

__author__ = 'HK Dambanemuya, Haomin Lin'
__version__ = 'Python3'

In [2]:
sources = ['news', 'blogs', 'discussions']
emotions = ['affect', 'posemo', 'negemo']
sentiments = ['anx', 'anger', 'sad']
cognitive_processes = ['certain', 'tentat', 'discrep']
time_orientation = ['focuspast', 'focuspresent', 'focusfuture']

In [3]:
liwc_complete = pd.read_csv("liwc_results.csv")
liwc_complete = liwc_complete.drop(columns=['B','C'])
liwc_complete = liwc_complete.rename(columns={"A": "Date", "D": "Topic", "E": "Platform","F": "Source", "G": "Location"})
liwc_complete.loc[liwc_complete.Location=='GB', 'Location'] = 'UK'
liwc_complete.head()

Unnamed: 0,Date,Topic,Platform,Source,Location,Analytic,Clout,Authentic,Tone,affect,posemo,negemo,anx,anger,sad,discrep,tentat,certain,focuspast,focuspresent,focusfuture,work,home,money,death
0,2020-02-15,"COVID, medical supplies",news,barnesville-enterprise,US,92.84,80.96,7.44,19.14,7.43,3.42,3.86,1.49,1.63,0.15,1.93,2.08,0.59,3.86,6.69,0.59,4.16,0.45,0.45,0.3
1,2020-02-17,"COVID, medical supplies",news,people,CN,99.0,68.92,17.67,60.88,3.09,2.47,0.62,0.0,0.62,0.0,0.62,0.62,0.62,1.85,4.32,2.47,12.35,0.62,0.0,0.62
2,2020-02-15,"COVID, medical supplies",news,ecns,CN,98.51,69.46,24.85,31.06,2.6,1.45,1.14,0.21,0.52,0.1,0.42,1.14,0.31,4.05,3.63,0.83,8.72,0.62,2.8,0.93
3,2020-02-17,"COVID, medical supplies",news,independent,NG,98.14,60.95,6.77,13.28,2.78,0.93,1.85,0.0,0.93,0.93,0.0,1.85,0.0,0.93,6.48,0.93,5.56,0.0,0.93,1.85
4,2020-02-16,"COVID, medical supplies",news,yahoo,US,91.82,52.88,30.77,38.57,0.72,0.72,0.0,0.0,0.0,0.0,0.0,2.16,0.72,2.88,9.35,0.0,3.6,0.0,1.44,0.0


In [35]:
def get_mean_std(dimensions, liwc, liwc_compare):  
    for measure in dimensions:
        anova_analysis = get_daily_measure_scores(measure, liwc) 
        anova_compare = get_daily_measure_scores(measure, liwc_compare)
        prefix,suffix = '',''
        mean_liwc = round(anova_analysis['value'].mean(), 3)
        std_liwc = round(anova_analysis['value'].std(), 3)
        mean_compare = round(anova_compare['value'].mean(), 3)
        std_compare = round(anova_compare['value'].std(), 3)
        if measure in significant:
            prefix,suffix = '\033[91m','\033[0m'
        print("{}".format(prefix)+"   {:<8}\t{}({})\t{}({})".format(measure,mean_liwc,std_liwc,mean_compare,std_compare)+"{}".format(suffix))

In [5]:
def get_daily_measure_scores(measure, liwc):
    dates = sorted(set(liwc['Date']))
    source = liwc[field].unique()[0]
    data_daily = []
    for date in dates:
        data_daily.append(liwc[liwc['Date'] == date][measure].mean())
    data_df = pd.DataFrame()
    data_df['index'] = range(0,len(data_daily))
    data_df['treatments'] = [source]*len(data_daily)
    data_df['value'] = data_daily
    return data_df.fillna(0)

In [6]:
def get_anova_results_daily_aggregate(dimensions, liwc, liwc_compare):
    for measure in dimensions:
        anova_analysis_df = get_daily_measure_scores(measure, liwc) 
        anova_compare_df = get_daily_measure_scores(measure, liwc_compare)
        anova_df = pd.concat([anova_analysis_df.reset_index(),
                              anova_compare_df.reset_index()])
        # Ordinary Least Squares (OLS) model
        model = ols('value ~ C(treatments)', data=anova_df).fit()
        anova_table = sm.stats.anova_lm(model, typ=2)
        if anova_table['PR(>F)'].iloc[0]<0.05 or print_all_flag:
            print ('\n',measure)
            print ("===================================")
            print (anova_table)
            significant.append(measure)

## Social distancing vs. all others

In [7]:
field = 'Topic'

In [8]:
liwc = liwc_complete[liwc_complete['Topic'].str.contains('recommendations') | liwc_complete['Topic'].str.contains('restrictions')]
liwc['Topic'] = 'social distancing'
drop_index = liwc.index
liwc_compare = liwc_complete.drop(drop_index)
liwc_compare['Topic'] = 'all other topics'
print("social distancing: {}, others: {}".format(liwc.shape[0], liwc_compare.shape[0]))

social distancing: 711883, others: 435143


In [9]:
display(Markdown('### Emotions'))
get_anova_results_daily_aggregate(emotions, liwc, liwc_compare)

display(Markdown('### Sentiments'))
get_anova_results_daily_aggregate(sentiments, liwc, liwc_compare)

display(Markdown('### Cognitive Processes'))
get_anova_results_daily_aggregate(cognitive_processes, liwc, liwc_compare)

display(Markdown('### Time Orientation'))
get_anova_results_daily_aggregate(time_orientation, liwc, liwc_compare)

### Emotions


 negemo
               sum_sq      df     F  PR(>F)
C(treatments) 0.926   1.000   4.408 0.037  
Residual      42.658  203.000 nan   nan    


### Sentiments


 anger
               sum_sq      df      F  PR(>F)
C(treatments) 0.136   1.000   12.935 0.000  
Residual      2.139   203.000 nan    nan    


### Cognitive Processes


 tentat
               sum_sq      df      F  PR(>F)
C(treatments) 4.841   1.000   19.211 0.000  
Residual      51.156  203.000 nan    nan    


### Time Orientation


 focusfuture
               sum_sq      df     F  PR(>F)
C(treatments) 0.827   1.000   9.726 0.002  
Residual      17.260  203.000 nan   nan    


In [10]:
print("\t      {:>5}\t{:<5}".format(liwc[field].unique()[0],liwc_compare[field].unique()[0]))
print('Emotions')
get_mean_std(emotions, liwc, liwc_compare)

print('Sentiments')
get_mean_std(sentiments, liwc, liwc_compare)

print('Cognitive Processes')
get_mean_std(cognitive_processes, liwc, liwc_compare)

print('Time Orientation')
get_mean_std(time_orientation, liwc, liwc_compare)

	      social distancing	all other topics
Emotions
   affect  	3.212(0.683)	3.28(0.672)
   posemo  	1.598(0.506)	1.524(0.511)
[1m   negemo  	1.569(0.601)	1.704(0.304)[0m
Sentiments
   anx     	0.462(0.107)	0.487(0.161)
[1m   anger   	0.284(0.101)	0.336(0.104)[0m
   sad     	0.279(0.074)	0.26(0.076)
Cognitive Processes
   certain 	0.889(0.275)	0.871(0.238)
[1m   tentat  	1.872(0.443)	2.182(0.544)[0m
   discrep 	0.98(0.28)	0.905(0.287)
Time Orientation
   focuspast	3.555(0.757)	3.373(1.123)
   focuspresent	7.087(1.243)	7.174(1.139)
[1m   focusfuture	1.064(0.344)	0.936(0.243)[0m


### US vs. All others

In [11]:
field = 'Location'

In [12]:
liwc_analysis = liwc[liwc['Location']=='US']
drop_index = liwc_analysis.index
liwc_other = liwc.drop(drop_index)
liwc_other['Location'] = 'all other locations'
print("US: {}, others: {}".format(liwc_analysis.shape[0], liwc_other.shape[0]))

US: 500096, others: 211787


In [13]:
significant = []

display(Markdown('### Emotions'))
get_anova_results_daily_aggregate(emotions,liwc_analysis,liwc_other)

display(Markdown('### Sentiments'))
get_anova_results_daily_aggregate(sentiments,liwc_analysis,liwc_other)

display(Markdown('### Cognitive Processes'))
get_anova_results_daily_aggregate(cognitive_processes,liwc_analysis,liwc_other)

display(Markdown('### Time Orientation'))
get_anova_results_daily_aggregate(time_orientation,liwc_analysis,liwc_other)

### Emotions


 posemo
               sum_sq      df     F  PR(>F)
C(treatments) 0.826   1.000   3.960 0.048  
Residual      34.191  164.000 nan   nan    


### Sentiments


 sad
               sum_sq      df     F  PR(>F)
C(treatments) 0.036   1.000   6.597 0.011  
Residual      0.883   164.000 nan   nan    


### Cognitive Processes


 tentat
               sum_sq      df     F  PR(>F)
C(treatments) 1.576   1.000   9.879 0.002  
Residual      26.157  164.000 nan   nan    


### Time Orientation


 focuspresent
               sum_sq      df     F  PR(>F)
C(treatments) 8.373   1.000   6.676 0.011  
Residual      205.696 164.000 nan   nan    


In [14]:
print("\t\t{}\t\t{}".format(liwc_analysis[field].unique()[0],liwc_other[field].unique()[0]))
print('Emotions')
get_mean_std(emotions, liwc_analysis, liwc_other)

print('Sentiments')
get_mean_std(sentiments, liwc_analysis, liwc_other)

print('Cognitive Processes')
get_mean_std(cognitive_processes, liwc_analysis, liwc_other)

print('Time Orientation')
get_mean_std(time_orientation, liwc_analysis, liwc_other)

		US		all other locations
Emotions
   affect  	3.208(0.561)	3.127(0.61)
[1m   posemo  	1.655(0.493)	1.514(0.416)[0m
   negemo  	1.505(0.222)	1.57(0.619)
Sentiments
   anx     	0.469(0.109)	0.466(0.11)
   anger   	0.294(0.101)	0.265(0.091)
[1m   sad     	0.273(0.063)	0.302(0.083)[0m
Cognitive Processes
   certain 	0.94(0.23)	0.881(0.255)
[1m   tentat  	1.961(0.408)	1.766(0.39)[0m
   discrep 	1.006(0.242)	1.0(0.288)
Time Orientation
   focuspast	3.528(0.721)	3.67(0.752)
[1m   focuspresent	7.363(0.967)	6.914(1.258)[0m
   focusfuture	1.117(0.251)	1.063(0.373)


### UK vs. All others

In [15]:
liwc_analysis = liwc[liwc['Location']=='UK']
drop_index = liwc_analysis.index
liwc_other = liwc.drop(drop_index)
liwc_other['Location'] = 'all other locations'
print("UK: {}, others: {}".format(liwc_analysis.shape[0], liwc_other.shape[0]))

UK: 55510, others: 656373


In [16]:
significant=[]

display(Markdown('### Emotions'))
get_anova_results_daily_aggregate(emotions,liwc_analysis,liwc_other)

display(Markdown('### Sentiments'))
get_anova_results_daily_aggregate(sentiments,liwc_analysis,liwc_other)

display(Markdown('### Cognitive Processes'))
get_anova_results_daily_aggregate(cognitive_processes,liwc_analysis,liwc_other)

display(Markdown('### Time Orientation'))
get_anova_results_daily_aggregate(time_orientation,liwc_analysis,liwc_other)

### Emotions

### Sentiments


 anx
               sum_sq      df      F  PR(>F)
C(treatments) 0.378   1.000   16.067 0.000  
Residual      3.742   159.000 nan    nan    

 anger
               sum_sq      df     F  PR(>F)
C(treatments) 0.050   1.000   4.805 0.030  
Residual      1.652   159.000 nan   nan    

 sad
               sum_sq      df      F  PR(>F)
C(treatments) 0.471   1.000   51.085 0.000  
Residual      1.466   159.000 nan    nan    


### Cognitive Processes


 certain
               sum_sq      df     F  PR(>F)
C(treatments) 0.574   1.000   7.958 0.005  
Residual      11.477  159.000 nan   nan    

 tentat
               sum_sq      df      F  PR(>F)
C(treatments) 1.807   1.000   10.050 0.002  
Residual      28.592  159.000 nan    nan    

 discrep
               sum_sq      df      F  PR(>F)
C(treatments) 3.181   1.000   32.076 0.000  
Residual      15.769  159.000 nan    nan    


### Time Orientation


 focuspresent
               sum_sq      df      F  PR(>F)
C(treatments) 39.771  1.000   30.139 0.000  
Residual      209.812 159.000 nan    nan    

 focusfuture
               sum_sq      df     F  PR(>F)
C(treatments) 0.800   1.000   6.217 0.014  
Residual      20.457  159.000 nan   nan    


In [17]:
print("\t\t{}\t\t{}".format(liwc_analysis[field].unique()[0],liwc_other[field].unique()[0]))
print('Emotions')
get_mean_std(emotions, liwc_analysis, liwc_other)

print('Sentiments')
get_mean_std(sentiments, liwc_analysis, liwc_other)

print('Cognitive Processes')
get_mean_std(cognitive_processes, liwc_analysis, liwc_other)

print('Time Orientation')
get_mean_std(time_orientation, liwc_analysis, liwc_other)

		UK		all other locations
Emotions
   affect  	3.279(0.548)	3.194(0.704)
   posemo  	1.614(0.469)	1.592(0.513)
   negemo  	1.625(0.283)	1.556(0.61)
Sentiments
[1m   anx     	0.551(0.191)	0.453(0.115)[0m
[1m   anger   	0.25(0.1)	0.286(0.103)[0m
[1m   sad     	0.38(0.119)	0.271(0.073)[0m
Cognitive Processes
[1m   certain 	1.003(0.257)	0.882(0.278)[0m
[1m   tentat  	2.069(0.385)	1.855(0.452)[0m
[1m   discrep 	1.247(0.361)	0.964(0.273)[0m
Time Orientation
   focuspast	3.719(0.818)	3.542(0.757)
[1m   focuspresent	8.037(1.023)	7.036(1.238)[0m
[1m   focusfuture	1.201(0.382)	1.059(0.34)[0m


### US & UK vs. All others

In [18]:
liwc_analysis = liwc[(liwc['Location']=='US') | (liwc['Location']=='UK')]
drop_index = liwc_analysis.index
liwc_other = liwc.drop(drop_index)
liwc_other['Location'] = 'all other locations'
print("US & UK: {}, others: {}".format(liwc_analysis.shape[0], liwc_other.shape[0]))

US & UK: 555606, others: 156277


In [19]:
significant=[]

display(Markdown('### Emotions'))
get_anova_results_daily_aggregate(emotions,liwc_analysis,liwc_other)

display(Markdown('### Sentiments'))
get_anova_results_daily_aggregate(sentiments,liwc_analysis,liwc_other)

display(Markdown('### Cognitive Processes'))
get_anova_results_daily_aggregate(cognitive_processes,liwc_analysis,liwc_other)

display(Markdown('### Time Orientation'))
get_anova_results_daily_aggregate(time_orientation,liwc_analysis,liwc_other)

### Emotions


 posemo
               sum_sq      df     F  PR(>F)
C(treatments) 1.089   1.000   5.360 0.022  
Residual      33.512  165.000 nan   nan    


### Sentiments


 anx
               sum_sq      df     F  PR(>F)
C(treatments) 0.055   1.000   4.412 0.037  
Residual      2.058   165.000 nan   nan    


### Cognitive Processes


 certain
               sum_sq      df     F  PR(>F)
C(treatments) 0.261   1.000   4.282 0.040  
Residual      10.064  165.000 nan   nan    

 tentat
               sum_sq      df      F  PR(>F)
C(treatments) 3.544   1.000   22.355 0.000  
Residual      26.158  165.000 nan    nan    

 discrep
               sum_sq      df     F  PR(>F)
C(treatments) 0.287   1.000   4.305 0.040  
Residual      10.993  165.000 nan   nan    


### Time Orientation


 focuspresent
               sum_sq      df      F  PR(>F)
C(treatments) 22.961  1.000   17.486 0.000  
Residual      216.660 165.000 nan    nan    


In [20]:
print("\t\t{}\t\t{}".format(liwc_analysis[field].unique()[0],liwc_other[field].unique()[0]))
print('Emotions')
get_mean_std(emotions, liwc_analysis, liwc_other)

print('Sentiments')
get_mean_std(sentiments, liwc_analysis, liwc_other)

print('Cognitive Processes')
get_mean_std(cognitive_processes, liwc_analysis, liwc_other)

print('Time Orientation')
get_mean_std(time_orientation, liwc_analysis, liwc_other)

		US		all other locations
Emotions
   affect  	3.22(0.541)	3.064(0.645)
[1m   posemo  	1.651(0.489)	1.49(0.407)[0m
   negemo  	1.522(0.208)	1.529(0.639)
Sentiments
[1m   anx     	0.48(0.104)	0.444(0.119)[0m
   anger   	0.291(0.099)	0.268(0.093)
   sad     	0.283(0.069)	0.283(0.081)
Cognitive Processes
[1m   certain 	0.94(0.238)	0.861(0.256)[0m
[1m   tentat  	1.98(0.4)	1.689(0.396)[0m
[1m   discrep 	1.023(0.255)	0.94(0.261)[0m
Time Orientation
   focuspast	3.548(0.686)	3.645(0.806)
[1m   focuspresent	7.4(0.975)	6.658(1.299)[0m
   focusfuture	1.11(0.281)	1.037(0.357)


### US vs. UK


In [21]:
liwc_us = liwc[liwc['Location']=='US']
liwc_uk = liwc[liwc['Location']=='UK']
print("US: {}, UK: {}".format(liwc_us.shape[0], liwc_uk.shape[0]))

US: 500096, UK: 55510


In [22]:
significant=[]

display(Markdown('### Emotions'))
get_anova_results_daily_aggregate(emotions,liwc_us,liwc_uk)

display(Markdown('### Sentiments'))
get_anova_results_daily_aggregate(sentiments,liwc_us,liwc_uk)

display(Markdown('### Cognitive Processes'))
get_anova_results_daily_aggregate(cognitive_processes,liwc_us,liwc_uk)

display(Markdown('### Time Orientation'))
get_anova_results_daily_aggregate(time_orientation,liwc_us,liwc_uk)

### Emotions


 negemo
               sum_sq      df     F  PR(>F)
C(treatments) 0.549   1.000   8.660 0.004  
Residual      9.695   153.000 nan   nan    


### Sentiments


 anx
               sum_sq      df      F  PR(>F)
C(treatments) 0.259   1.000   11.183 0.001  
Residual      3.547   153.000 nan    nan    

 anger
               sum_sq      df     F  PR(>F)
C(treatments) 0.073   1.000   7.204 0.008  
Residual      1.548   153.000 nan   nan    

 sad
               sum_sq      df      F  PR(>F)
C(treatments) 0.441   1.000   51.304 0.000  
Residual      1.314   153.000 nan    nan    


### Cognitive Processes


 discrep
               sum_sq      df      F  PR(>F)
C(treatments) 2.231   1.000   24.360 0.000  
Residual      14.009  153.000 nan    nan    


### Time Orientation


 focuspresent
               sum_sq      df      F  PR(>F)
C(treatments) 17.486  1.000   17.731 0.000  
Residual      150.891 153.000 nan    nan    


In [23]:
print("\t\t{}\t\t{}".format(liwc_us[field].unique()[0],liwc_uk[field].unique()[0]))
print('Emotions')
get_mean_std(emotions, liwc_us, liwc_uk)

print('Sentiments')
get_mean_std(sentiments, liwc_us, liwc_uk)

print('Cognitive Processes')
get_mean_std(cognitive_processes, liwc_us, liwc_uk)

print('Time Orientation')
get_mean_std(time_orientation, liwc_us, liwc_uk)

		US		UK
Emotions
   affect  	3.208(0.561)	3.279(0.548)
   posemo  	1.655(0.493)	1.614(0.469)
[1m   negemo  	1.505(0.222)	1.625(0.283)[0m
Sentiments
[1m   anx     	0.469(0.109)	0.551(0.191)[0m
[1m   anger   	0.294(0.101)	0.25(0.1)[0m
[1m   sad     	0.273(0.063)	0.38(0.119)[0m
Cognitive Processes
   certain 	0.94(0.23)	1.003(0.257)
   tentat  	1.961(0.408)	2.069(0.385)
[1m   discrep 	1.006(0.242)	1.247(0.361)[0m
Time Orientation
   focuspast	3.528(0.721)	3.719(0.818)
[1m   focuspresent	7.363(0.967)	8.037(1.023)[0m
   focusfuture	1.117(0.251)	1.201(0.382)


### *Separate Platform*

### News

In [24]:
liwc_us_platform = liwc_us[liwc_us['Platform']=='news']
liwc_uk_platform = liwc_uk[liwc_uk['Platform']=='news']
print("US (news): {}, UK (news): {}".format(liwc_us_platform.shape[0], liwc_uk_platform.shape[0]))

US (news): 331250, UK (news): 42834


In [25]:
significant=[]

display(Markdown('### Emotions'))
get_anova_results_daily_aggregate(emotions,liwc_us_platform,liwc_uk_platform)

display(Markdown('### Sentiments'))
get_anova_results_daily_aggregate(sentiments,liwc_us_platform,liwc_uk_platform)

display(Markdown('### Cognitive Processes'))
get_anova_results_daily_aggregate(cognitive_processes,liwc_us_platform,liwc_uk_platform)

display(Markdown('### Time Orientation'))
get_anova_results_daily_aggregate(time_orientation,liwc_us_platform,liwc_uk_platform)

### Emotions


 affect
               sum_sq      df     F  PR(>F)
C(treatments) 1.227   1.000   3.923 0.049  
Residual      46.909  150.000 nan   nan    

 negemo
               sum_sq      df      F  PR(>F)
C(treatments) 1.673   1.000   25.612 0.000  
Residual      9.796   150.000 nan    nan    


### Sentiments


 anx
               sum_sq      df      F  PR(>F)
C(treatments) 0.380   1.000   15.386 0.000  
Residual      3.702   150.000 nan    nan    

 sad
               sum_sq      df      F  PR(>F)
C(treatments) 0.565   1.000   61.526 0.000  
Residual      1.378   150.000 nan    nan    


### Cognitive Processes


 discrep
               sum_sq      df      F  PR(>F)
C(treatments) 1.940   1.000   23.480 0.000  
Residual      12.393  150.000 nan    nan    


### Time Orientation


 focuspresent
               sum_sq      df      F  PR(>F)
C(treatments) 14.782  1.000   18.281 0.000  
Residual      121.294 150.000 nan    nan    


In [26]:
print("\t\t{}\t\t{}".format(liwc_us_platform[field].unique()[0],liwc_uk_platform[field].unique()[0]))
print('Emotions')
get_mean_std(emotions, liwc_us_platform, liwc_uk_platform)

print('Sentiments')
get_mean_std(sentiments, liwc_us_platform, liwc_uk_platform)

print('Cognitive Processes')
get_mean_std(cognitive_processes, liwc_us_platform, liwc_uk_platform)

print('Time Orientation')
get_mean_std(time_orientation, liwc_us_platform, liwc_uk_platform)

		US		UK
Emotions
[1m   affect  	3.109(0.565)	3.289(0.553)[0m
   posemo  	1.626(0.489)	1.603(0.493)
[1m   negemo  	1.434(0.236)	1.644(0.276)[0m
Sentiments
[1m   anx     	0.461(0.113)	0.561(0.196)[0m
   anger   	0.264(0.09)	0.255(0.094)
[1m   sad     	0.27(0.067)	0.392(0.121)[0m
Cognitive Processes
   certain 	0.917(0.238)	0.982(0.261)
   tentat  	1.873(0.318)	1.976(0.318)
[1m   discrep 	0.98(0.223)	1.207(0.346)[0m
Time Orientation
   focuspast	3.629(0.586)	3.816(0.749)
[1m   focuspresent	7.242(0.781)	7.867(1.018)[0m
   focusfuture	1.105(0.254)	1.193(0.385)


### Blogs

In [27]:
liwc_us_platform = liwc_us[liwc_us['Platform']=='blogs']
liwc_uk_platform = liwc_uk[liwc_uk['Platform']=='blogs']
print("US (blogs): {}, UK (blogs): {}".format(liwc_us_platform.shape[0], liwc_uk_platform.shape[0]))

US (blogs): 143880, UK (blogs): 10377


In [28]:
significant=[]

display(Markdown('### Emotions'))
get_anova_results_daily_aggregate(emotions,liwc_us_platform,liwc_uk_platform)

display(Markdown('### Sentiments'))
get_anova_results_daily_aggregate(sentiments,liwc_us_platform,liwc_uk_platform)

display(Markdown('### Cognitive Processes'))
get_anova_results_daily_aggregate(cognitive_processes,liwc_us_platform,liwc_uk_platform)

display(Markdown('### Time Orientation'))
get_anova_results_daily_aggregate(time_orientation,liwc_us_platform,liwc_uk_platform)

### Emotions


 affect
               sum_sq      df     F  PR(>F)
C(treatments) 2.098   1.000   6.304 0.013  
Residual      45.933  138.000 nan   nan    


### Sentiments


 anger
               sum_sq      df      F  PR(>F)
C(treatments) 0.489   1.000   21.083 0.000  
Residual      3.203   138.000 nan    nan    

 sad
               sum_sq      df      F  PR(>F)
C(treatments) 0.414   1.000   37.088 0.000  
Residual      1.541   138.000 nan    nan    


### Cognitive Processes


 tentat
               sum_sq      df     F  PR(>F)
C(treatments) 0.792   1.000   4.233 0.042  
Residual      25.815  138.000 nan   nan    

 discrep
               sum_sq      df     F  PR(>F)
C(treatments) 0.766   1.000   7.068 0.009  
Residual      14.964  138.000 nan   nan    


### Time Orientation


 focuspast
               sum_sq      df      F  PR(>F)
C(treatments) 15.759  1.000   26.799 0.000  
Residual      81.151  138.000 nan    nan    

 focusfuture
               sum_sq      df     F  PR(>F)
C(treatments) 0.516   1.000   4.555 0.035  
Residual      15.632  138.000 nan   nan    


In [29]:
print("\t\t{}\t\t{}".format(liwc_us_platform[field].unique()[0],liwc_uk_platform[field].unique()[0]))
print('Emotions')
get_mean_std(emotions, liwc_us_platform, liwc_uk_platform)

print('Sentiments')
get_mean_std(sentiments, liwc_us_platform, liwc_uk_platform)

print('Cognitive Processes')
get_mean_std(cognitive_processes, liwc_us_platform, liwc_uk_platform)

print('Time Orientation')
get_mean_std(time_orientation, liwc_us_platform, liwc_uk_platform)

		US		UK
Emotions
[1m   affect  	3.43(0.399)	3.185(0.726)[0m
   posemo  	1.756(0.429)	1.612(0.585)
   negemo  	1.625(0.251)	1.531(0.322)
Sentiments
   anx     	0.487(0.103)	0.508(0.213)
[1m   anger   	0.352(0.17)	0.234(0.129)[0m
[1m   sad     	0.283(0.062)	0.392(0.139)[0m
Cognitive Processes
   certain 	1.0(0.204)	1.038(0.271)
[1m   tentat  	2.097(0.356)	1.947(0.505)[0m
[1m   discrep 	1.038(0.216)	1.186(0.421)[0m
Time Orientation
[1m   focuspast	3.22(0.574)	3.892(0.937)[0m
   focuspresent	7.532(1.046)	7.736(1.25)
[1m   focusfuture	1.133(0.212)	1.255(0.436)[0m


### Discussions

In [30]:
liwc_us_platform = liwc_us[liwc_us['Platform']=='discussions']
liwc_uk_platform = liwc_uk[liwc_uk['Platform']=='discussions']
print("US (blogs): {}, UK (blogs): {}".format(liwc_us_platform.shape[0], liwc_uk_platform.shape[0]))

US (blogs): 24966, UK (blogs): 2299


In [31]:
significant=[]

display(Markdown('### Emotions'))
get_anova_results_daily_aggregate(emotions,liwc_us_platform,liwc_uk_platform)

display(Markdown('### Sentiments'))
get_anova_results_daily_aggregate(sentiments,liwc_us_platform,liwc_uk_platform)

display(Markdown('### Cognitive Processes'))
get_anova_results_daily_aggregate(cognitive_processes,liwc_us_platform,liwc_uk_platform)

display(Markdown('### Time Orientation'))
get_anova_results_daily_aggregate(time_orientation,liwc_us_platform,liwc_uk_platform)

### Emotions


 posemo
               sum_sq      df     F  PR(>F)
C(treatments) 1.684   1.000   7.533 0.007  
Residual      27.494  123.000 nan   nan    


### Sentiments


 anger
               sum_sq      df     F  PR(>F)
C(treatments) 0.282   1.000   4.104 0.045  
Residual      8.461   123.000 nan   nan    


### Cognitive Processes


 certain
               sum_sq      df      F  PR(>F)
C(treatments) 1.650   1.000   10.861 0.001  
Residual      18.686  123.000 nan    nan    

 tentat
               sum_sq      df      F  PR(>F)
C(treatments) 27.338  1.000   44.136 0.000  
Residual      76.186  123.000 nan    nan    

 discrep
               sum_sq      df      F  PR(>F)
C(treatments) 18.320  1.000   84.409 0.000  
Residual      26.696  123.000 nan    nan    


### Time Orientation


 focuspast
               sum_sq      df      F  PR(>F)
C(treatments) 13.613  1.000   11.347 0.001  
Residual      147.569 123.000 nan    nan    

 focuspresent
               sum_sq      df      F  PR(>F)
C(treatments) 284.302 1.000   71.062 0.000  
Residual      492.097 123.000 nan    nan    

 focusfuture
               sum_sq      df      F  PR(>F)
C(treatments) 4.925   1.000   24.158 0.000  
Residual      25.077  123.000 nan    nan    


In [36]:
print("\t\t{}\t\t{}".format(liwc_us_platform[field].unique()[0],liwc_uk_platform[field].unique()[0]))
print('Emotions')
get_mean_std(emotions, liwc_us_platform, liwc_uk_platform)

print('Sentiments')
get_mean_std(sentiments, liwc_us_platform, liwc_uk_platform)

print('Cognitive Processes')
get_mean_std(cognitive_processes, liwc_us_platform, liwc_uk_platform)

print('Time Orientation')
get_mean_std(time_orientation, liwc_us_platform, liwc_uk_platform)

		US		UK
Emotions
   affect  	3.59(0.557)	3.826(0.915)
[91m   posemo  	1.64(0.496)	1.876(0.438)[0m
   negemo  	1.905(0.323)	1.924(0.834)
Sentiments
   anx     	0.526(0.125)	0.655(0.691)
[91m   anger   	0.401(0.133)	0.305(0.375)[0m
   sad     	0.305(0.098)	0.35(0.252)
Cognitive Processes
[91m   certain 	1.14(0.29)	1.373(0.498)[0m
[91m   tentat  	2.512(0.532)	3.461(1.046)[0m
[91m   discrep 	1.314(0.358)	2.091(0.586)[0m
Time Orientation
[91m   focuspast	3.297(0.92)	2.627(1.304)[0m
[91m   focuspresent	8.875(1.708)	11.935(2.352)[0m
[91m   focusfuture	1.286(0.311)	1.688(0.596)[0m
