In [1]:
import numpy as np
import pandas as pd
import csv

### Import the ZuCo dataset

In [2]:
zuco_ds = pd.read_csv('ZuCo_words_dataset.csv')

### Get discrete emotion intensity values dataset from NRC lexicon

In [3]:
anger_lex = pd.read_csv('NRC_Emotion_Intensity_Lexicon/NRC-Emotion-Intensity-anger-scores.csv')
anticipation_lex = pd.read_csv('NRC_Emotion_Intensity_Lexicon/NRC-Emotion-Intensity-anticipation-scores.csv')
disgust_lex = pd.read_csv('NRC_Emotion_Intensity_Lexicon/NRC-Emotion-Intensity-disgust-scores.csv')
fear_lex = pd.read_csv('NRC_Emotion_Intensity_Lexicon/NRC-Emotion-Intensity-fear-scores.csv')
joy_lex = pd.read_csv('NRC_Emotion_Intensity_Lexicon/NRC-Emotion-Intensity-joy-scores.csv')
sadness_lex = pd.read_csv('NRC_Emotion_Intensity_Lexicon/NRC-Emotion-Intensity-sadness-scores.csv')
surprise_lex = pd.read_csv('NRC_Emotion_Intensity_Lexicon/NRC-Emotion-Intensity-surprise-scores.csv')
trust_lex = pd.read_csv('NRC_Emotion_Intensity_Lexicon/NRC-Emotion-Intensity-trust-scores.csv')

### Intersect each discrete emotion lexicon with the zuco used words dataset

In [4]:
anger_ds = pd.merge(zuco_ds, anger_lex, how ='inner', on =['Word'])
anger_ds = anger_ds.drop(['Word'], axis=1)

anticipation_ds = pd.merge(zuco_ds, anticipation_lex, how ='inner', on =['Word'])
anticipation_ds = anticipation_ds.drop(['Word'], axis=1)

disgust_ds = pd.merge(zuco_ds, disgust_lex, how ='inner', on =['Word'])
disgust_ds = disgust_ds.drop(['Word'], axis=1)

fear_ds = pd.merge(zuco_ds, fear_lex, how ='inner', on =['Word'])
fear_ds = fear_ds.drop(['Word'], axis=1)

joy_ds = pd.merge(zuco_ds, joy_lex, how ='inner', on =['Word'])
joy_ds = joy_ds.drop(['Word'], axis=1)

sadness_ds = pd.merge(zuco_ds, sadness_lex, how ='inner', on =['Word'])
sadness_ds = sadness_ds.drop(['Word'], axis=1)

surprise_ds = pd.merge(zuco_ds, surprise_lex, how ='inner', on =['Word'])
surprise_ds = surprise_ds.drop(['Word'], axis=1)

trust_ds = pd.merge(zuco_ds, trust_lex, how ='inner', on =['Word'])
trust_ds = trust_ds.drop(['Word'], axis=1)

             Word          MPS         TRT          GD         FFD  \
0          losing  3328.035714  117.071429  101.928571   96.714286   
1            whip  3253.875000  159.875000  147.625000  137.500000   
2            gore  3347.736332  161.481481  125.296296  116.444444   
3       socialist  3307.029412  174.470588  163.941176  140.588235   
4        politics  3240.321705  134.046512  108.209302  105.534884   
5           death  3369.560606  137.113636  122.659091  108.477273   
6           cross  3317.326923  148.346154  120.038462  108.500000   
7       revulsion  3624.174242  291.727273  188.818182  155.363636   
8       brutality  3475.090909  154.000000  145.000000  118.909091   
9         unhappy  3373.400000  175.000000  125.066667  119.733333   
10        hanging  3301.987805  150.634146  127.536585  121.219512   
11        poverty  3295.652174  161.260870  123.652174  115.043478   
12          fight  3213.884615  109.000000  102.076923  102.076923   
13         strike  3

### Apply feature scaling

In [20]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

In [21]:
anger_ds = sc.fit_transform(anger_ds)
anticipation_ds = sc.fit_transform(anticipation_ds)
disgust_ds = sc.fit_transform(disgust_ds)
fear_ds = sc.fit_transform(fear_ds)
joy_ds = sc.fit_transform(joy_ds)
sadness_ds = sc.fit_transform(sadness_ds)
surprise_ds = sc.fit_transform(surprise_ds)
trust_ds = sc.fit_transform(trust_ds)

### Apply simple linear regression model

In [22]:
import statsmodels.api as sm

In [23]:
anger_ds = pd.DataFrame(anger_ds, columns=['MPS','TRT','GD','FFD','SCORE'])
X = anger_ds[['MPS','TRT','GD','FFD']]
y = anger_ds['SCORE']

model = sm.OLS(y, X).fit()
model.summary()

0,1,2,3
Dep. Variable:,SCORE,R-squared (uncentered):,0.04
Model:,OLS,Adj. R-squared (uncentered):,-0.054
Method:,Least Squares,F-statistic:,0.4224
Date:,"Sat, 31 Oct 2020",Prob (F-statistic):,0.792
Time:,19:20:09,Log-Likelihood:,-62.944
No. Observations:,45,AIC:,133.9
Df Residuals:,41,BIC:,141.1
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
MPS,0.0396,0.177,0.223,0.825,-0.319,0.398
TRT,0.1558,0.227,0.686,0.497,-0.303,0.615
GD,0.2064,0.409,0.505,0.616,-0.619,1.032
FFD,-0.3015,0.355,-0.849,0.401,-1.019,0.416

0,1,2,3
Omnibus:,4.715,Durbin-Watson:,1.889
Prob(Omnibus):,0.095,Jarque-Bera (JB):,2.384
Skew:,0.292,Prob(JB):,0.304
Kurtosis:,2.035,Cond. No.,5.73


In [24]:
anticipation_ds = pd.DataFrame(anticipation_ds, columns=['MPS','TRT','GD','FFD','SCORE'])
X = anticipation_ds[['MPS','TRT','GD','FFD']]
y = anticipation_ds['SCORE']

model = sm.OLS(y, X).fit()
model.summary()

0,1,2,3
Dep. Variable:,SCORE,R-squared (uncentered):,0.008
Model:,OLS,Adj. R-squared (uncentered):,-0.042
Method:,Least Squares,F-statistic:,0.1547
Date:,"Sat, 31 Oct 2020",Prob (F-statistic):,0.96
Time:,19:20:09,Log-Likelihood:,-117.45
No. Observations:,83,AIC:,242.9
Df Residuals:,79,BIC:,252.6
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
MPS,-0.0426,0.115,-0.370,0.712,-0.272,0.187
TRT,0.0411,0.146,0.281,0.779,-0.250,0.332
GD,-0.0488,0.190,-0.257,0.798,-0.427,0.330
FFD,-0.0527,0.170,-0.311,0.757,-0.390,0.285

0,1,2,3
Omnibus:,1.232,Durbin-Watson:,1.863
Prob(Omnibus):,0.54,Jarque-Bera (JB):,1.265
Skew:,-0.211,Prob(JB):,0.531
Kurtosis:,2.567,Cond. No.,3.19


In [25]:
disgust_ds = pd.DataFrame(disgust_ds, columns=['MPS','TRT','GD','FFD','SCORE'])
X = disgust_ds[['MPS','TRT','GD','FFD']]
y = disgust_ds['SCORE']

model = sm.OLS(y, X).fit()
model.summary()

0,1,2,3
Dep. Variable:,SCORE,R-squared (uncentered):,0.119
Model:,OLS,Adj. R-squared (uncentered):,-0.049
Method:,Least Squares,F-statistic:,0.7098
Date:,"Sat, 31 Oct 2020",Prob (F-statistic):,0.594
Time:,19:20:09,Log-Likelihood:,-33.888
No. Observations:,25,AIC:,75.78
Df Residuals:,21,BIC:,80.65
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
MPS,0.2029,0.239,0.849,0.406,-0.294,0.700
TRT,0.3022,0.367,0.824,0.419,-0.460,1.065
GD,-0.2406,0.628,-0.383,0.705,-1.546,1.065
FFD,0.1607,0.496,0.324,0.749,-0.871,1.192

0,1,2,3
Omnibus:,3.359,Durbin-Watson:,1.839
Prob(Omnibus):,0.186,Jarque-Bera (JB):,2.043
Skew:,0.474,Prob(JB):,0.36
Kurtosis:,1.969,Cond. No.,6.59


In [26]:
fear_ds = pd.DataFrame(fear_ds, columns=['MPS','TRT','GD','FFD','SCORE'])
X = fear_ds[['MPS','TRT','GD','FFD']]
y = fear_ds['SCORE']

model = sm.OLS(y, X).fit()
model.summary()

0,1,2,3
Dep. Variable:,SCORE,R-squared (uncentered):,0.09
Model:,OLS,Adj. R-squared (uncentered):,0.019
Method:,Least Squares,F-statistic:,1.268
Date:,"Sat, 31 Oct 2020",Prob (F-statistic):,0.295
Time:,19:20:09,Log-Likelihood:,-75.434
No. Observations:,55,AIC:,158.9
Df Residuals:,51,BIC:,166.9
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
MPS,0.2669,0.149,1.794,0.079,-0.032,0.566
TRT,-0.3137,0.223,-1.406,0.166,-0.761,0.134
GD,-0.2554,0.373,-0.685,0.497,-1.004,0.493
FFD,0.4573,0.347,1.318,0.193,-0.239,1.154

0,1,2,3
Omnibus:,4.375,Durbin-Watson:,1.928
Prob(Omnibus):,0.112,Jarque-Bera (JB):,2.079
Skew:,0.143,Prob(JB):,0.354
Kurtosis:,2.092,Cond. No.,6.02


In [27]:
joy_ds = pd.DataFrame(joy_ds, columns=['MPS','TRT','GD','FFD','SCORE'])
X = joy_ds[['MPS','TRT','GD','FFD']]
y = joy_ds['SCORE']

model = sm.OLS(y, X).fit()
model.summary()

0,1,2,3
Dep. Variable:,SCORE,R-squared (uncentered):,0.028
Model:,OLS,Adj. R-squared (uncentered):,-0.008
Method:,Least Squares,F-statistic:,0.7701
Date:,"Sat, 31 Oct 2020",Prob (F-statistic):,0.547
Time:,19:20:09,Log-Likelihood:,-154.51
No. Observations:,110,AIC:,317.0
Df Residuals:,106,BIC:,327.8
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
MPS,-0.0791,0.098,-0.806,0.422,-0.274,0.116
TRT,0.0363,0.130,0.278,0.781,-0.222,0.295
GD,-0.1084,0.182,-0.595,0.553,-0.470,0.253
FFD,-0.0673,0.172,-0.392,0.696,-0.408,0.273

0,1,2,3
Omnibus:,2.653,Durbin-Watson:,1.95
Prob(Omnibus):,0.265,Jarque-Bera (JB):,2.536
Skew:,0.303,Prob(JB):,0.281
Kurtosis:,2.568,Cond. No.,3.77


In [28]:
sadness_ds = pd.DataFrame(sadness_ds, columns=['MPS','TRT','GD','FFD','SCORE'])
X = sadness_ds[['MPS','TRT','GD','FFD']]
y = sadness_ds['SCORE']

model = sm.OLS(y, X).fit()
model.summary()

0,1,2,3
Dep. Variable:,SCORE,R-squared (uncentered):,0.063
Model:,OLS,Adj. R-squared (uncentered):,-0.018
Method:,Least Squares,F-statistic:,0.7776
Date:,"Sat, 31 Oct 2020",Prob (F-statistic):,0.545
Time:,19:20:09,Log-Likelihood:,-69.311
No. Observations:,50,AIC:,146.6
Df Residuals:,46,BIC:,154.3
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
MPS,0.1306,0.143,0.911,0.367,-0.158,0.419
TRT,0.0966,0.207,0.466,0.643,-0.321,0.514
GD,-0.0890,0.359,-0.248,0.805,-0.811,0.634
FFD,0.2082,0.384,0.542,0.591,-0.565,0.981

0,1,2,3
Omnibus:,10.392,Durbin-Watson:,2.169
Prob(Omnibus):,0.006,Jarque-Bera (JB):,3.095
Skew:,0.188,Prob(JB):,0.213
Kurtosis:,1.84,Cond. No.,5.66


In [29]:
surprise_ds = pd.DataFrame(surprise_ds, columns=['MPS','TRT','GD','FFD','SCORE'])
X = surprise_ds[['MPS','TRT','GD','FFD']]
y = surprise_ds['SCORE']

model = sm.OLS(y, X).fit()
model.summary()

0,1,2,3
Dep. Variable:,SCORE,R-squared (uncentered):,0.065
Model:,OLS,Adj. R-squared (uncentered):,-0.078
Method:,Least Squares,F-statistic:,0.4545
Date:,"Sat, 31 Oct 2020",Prob (F-statistic):,0.768
Time:,19:20:09,Log-Likelihood:,-41.554
No. Observations:,30,AIC:,91.11
Df Residuals:,26,BIC:,96.71
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
MPS,-0.0748,0.203,-0.368,0.716,-0.493,0.343
TRT,-0.0644,0.291,-0.221,0.827,-0.663,0.534
GD,0.2920,0.288,1.015,0.320,-0.300,0.884
FFD,-0.2919,0.257,-1.134,0.267,-0.821,0.237

0,1,2,3
Omnibus:,1.187,Durbin-Watson:,2.556
Prob(Omnibus):,0.553,Jarque-Bera (JB):,1.094
Skew:,0.309,Prob(JB):,0.579
Kurtosis:,2.297,Cond. No.,2.86


In [30]:
trust_ds = pd.DataFrame(trust_ds, columns=['MPS','TRT','GD','FFD','SCORE'])
X = trust_ds[['MPS','TRT','GD','FFD']]
y = trust_ds['SCORE']

model = sm.OLS(y, X).fit()
model.summary()

0,1,2,3
Dep. Variable:,SCORE,R-squared (uncentered):,0.005
Model:,OLS,Adj. R-squared (uncentered):,-0.02
Method:,Least Squares,F-statistic:,0.1918
Date:,"Sat, 31 Oct 2020",Prob (F-statistic):,0.942
Time:,19:20:09,Log-Likelihood:,-236.57
No. Observations:,167,AIC:,481.1
Df Residuals:,163,BIC:,493.6
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
MPS,0.0239,0.081,0.294,0.769,-0.137,0.185
TRT,0.0193,0.104,0.185,0.854,-0.187,0.226
GD,0.0077,0.159,0.048,0.962,-0.307,0.323
FFD,0.0406,0.141,0.288,0.774,-0.239,0.320

0,1,2,3
Omnibus:,1.166,Durbin-Watson:,1.855
Prob(Omnibus):,0.558,Jarque-Bera (JB):,1.27
Skew:,0.173,Prob(JB):,0.53
Kurtosis:,2.751,Cond. No.,4.04
