# VAD Multiple Linear Regression Analysis

### Import libraries and files

In [19]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import csv
import statsmodels.api as sm

### Take data from CSV files

In [20]:
vad_arousal = pd.read_csv(r'NRC_VAD_Lexicon/NRC-VAD-Lexicon-a-scores.csv')
vad_valence = pd.read_csv(r'NRC_VAD_Lexicon/NRC-VAD-Lexicon-v-scores.csv')
zuco_cs = pd.read_csv(r'ZuCo_words_dataset.csv')
sentences_cs = pd.read_csv(r'Emotion_Sentences_Cross_Analysis.csv')
#zuco_cs = zuco_cs.drop(index=430)

In [21]:
zuco_ar_cs = pd.merge(zuco_cs, vad_arousal, how = 'inner', on = ['Word'])
zuco_ar_cs = zuco_ar_cs.sort_values(by=['Arousal']).reset_index(drop=True)
zuco_ar_cs = zuco_ar_cs.drop(['Word'], axis=1)

zuco_va_cs = pd.merge(zuco_cs, vad_valence, how = 'inner', on = ['Word'])
zuco_va_cs = zuco_va_cs.sort_values(by=['Valence']).reset_index(drop=True)
zuco_va_cs = zuco_va_cs.drop(['Word'], axis=1)

## Multiple Linear Regression between VAD and Gaze Features on words in the dataset

### Analysis: Arousal as dependent and MPS, TRT, GD, FFD as indipendent

In [23]:
#x = zuco_ar_cs.iloc[:, :-1].values
#y = zuco_ar_cs.iloc[:, -1].values
zuco_ar_cs = pd.DataFrame(zuco_ar_cs, columns=['MPS','TRT','GD','FFD','Arousal'])
x = zuco_ar_cs[['MPS','TRT','GD','FFD']]
y = zuco_ar_cs['Arousal']

x = sm.add_constant(x)
model = sm.OLS(y, x).fit()
model.summary()

0,1,2,3
Dep. Variable:,Arousal,R-squared (uncentered):,0.007
Model:,OLS,Adj. R-squared (uncentered):,0.003
Method:,Least Squares,F-statistic:,1.698
Date:,"Thu, 05 Nov 2020",Prob (F-statistic):,0.148
Time:,17:01:45,Log-Likelihood:,-1418.4
No. Observations:,1002,AIC:,2845.0
Df Residuals:,998,BIC:,2864.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
MPS,-0.0318,0.032,-0.987,0.324,-0.095,0.031
TRT,-0.0134,0.044,-0.305,0.761,-0.100,0.073
GD,0.1036,0.058,1.788,0.074,-0.010,0.217
FFD,-0.0272,0.048,-0.564,0.573,-0.122,0.067

0,1,2,3
Omnibus:,33.917,Durbin-Watson:,0.013
Prob(Omnibus):,0.0,Jarque-Bera (JB):,36.685
Skew:,0.462,Prob(JB):,1.08e-08
Kurtosis:,2.845,Cond. No.,3.49


## Analysis: Valence as dependent and MPS, TRT, GD, FFD as indipendent

In [24]:
#x = zuco_va_cs.iloc[:, :-1].values
#y = zuco_va_cs.iloc[:, -1].values
zuco_va_cs = pd.DataFrame(zuco_va_cs, columns=['MPS','TRT','GD','FFD','Valence'])
x = zuco_va_cs[['MPS','TRT','GD','FFD']]
y = zuco_va_cs['Valence']

x = sm.add_constant(x)
model = sm.OLS(y, x).fit()
model.summary()

0,1,2,3
Dep. Variable:,Valence,R-squared (uncentered):,0.005
Model:,OLS,Adj. R-squared (uncentered):,0.001
Method:,Least Squares,F-statistic:,1.292
Date:,"Thu, 05 Nov 2020",Prob (F-statistic):,0.271
Time:,17:01:45,Log-Likelihood:,-1419.2
No. Observations:,1002,AIC:,2846.0
Df Residuals:,998,BIC:,2866.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
MPS,-0.0561,0.032,-1.742,0.082,-0.119,0.007
TRT,0.0036,0.044,0.081,0.936,-0.083,0.090
GD,0.0200,0.058,0.345,0.730,-0.094,0.134
FFD,-0.0568,0.048,-1.177,0.240,-0.151,0.038

0,1,2,3
Omnibus:,69.896,Durbin-Watson:,0.011
Prob(Omnibus):,0.0,Jarque-Bera (JB):,83.497
Skew:,-0.666,Prob(JB):,7.39e-19
Kurtosis:,3.476,Cond. No.,3.49


## Multiple Linear Regression between VAD emotions and Discrete emotions in sentences

### Analysis: Anger as dependent and Arousal, Valence, Dominance as indipendent

In [25]:
vad_anger_cs = pd.DataFrame(sentences_cs, columns=['Arousal','Valence','Dominance','Anger'])
x = vad_anger_cs[['Arousal','Valence','Dominance']]
y = vad_anger_cs['Anger']

x = sm.add_constant(x)
model = sm.OLS(y, x).fit()
model.summary()

0,1,2,3
Dep. Variable:,Anger,R-squared (uncentered):,0.208
Model:,OLS,Adj. R-squared (uncentered):,0.202
Method:,Least Squares,F-statistic:,33.85
Date:,"Thu, 05 Nov 2020",Prob (F-statistic):,1.89e-19
Time:,17:01:45,Log-Likelihood:,72.658
No. Observations:,390,AIC:,-139.3
Df Residuals:,387,BIC:,-127.4
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Arousal,0.7029,0.132,5.337,0.000,0.444,0.962
Valence,-0.4506,0.088,-5.099,0.000,-0.624,-0.277
Dominance,0.0659,0.121,0.546,0.585,-0.172,0.303

0,1,2,3
Omnibus:,272.531,Durbin-Watson:,1.73
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2875.741
Skew:,2.943,Prob(JB):,0.0
Kurtosis:,14.93,Cond. No.,16.1


### Analysis: Anticipation as dependent and Arousal, Valence, Dominance as indipendent

In [26]:
vad_anticipation_cs = pd.DataFrame(sentences_cs, columns=['Arousal','Valence','Dominance','Anticipation'])
x = vad_anticipation_cs[['Arousal','Valence','Dominance']]
y = vad_anticipation_cs['Anticipation']

x = sm.add_constant(x)
model = sm.OLS(y, x).fit()
model.summary()

0,1,2,3
Dep. Variable:,Anticipation,R-squared (uncentered):,0.319
Model:,OLS,Adj. R-squared (uncentered):,0.314
Method:,Least Squares,F-statistic:,60.56
Date:,"Thu, 05 Nov 2020",Prob (F-statistic):,4.0200000000000005e-32
Time:,17:01:45,Log-Likelihood:,-150.29
No. Observations:,390,AIC:,306.6
Df Residuals:,387,BIC:,318.5
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Arousal,0.2925,0.233,1.254,0.211,-0.166,0.751
Valence,0.2654,0.157,1.696,0.091,-0.042,0.573
Dominance,-0.1083,0.214,-0.506,0.613,-0.529,0.312

0,1,2,3
Omnibus:,131.642,Durbin-Watson:,2.073
Prob(Omnibus):,0.0,Jarque-Bera (JB):,327.157
Skew:,1.683,Prob(JB):,9.09e-72
Kurtosis:,5.967,Cond. No.,16.1


### Analysis: Disgust as dependent and Arousal, Valence, Dominance as indipendent

In [27]:
vad_disgust_cs = pd.DataFrame(sentences_cs, columns=['Arousal','Valence','Dominance','Disgust'])
x = vad_disgust_cs[['Arousal','Valence','Dominance']]
y = vad_disgust_cs['Disgust']

x = sm.add_constant(x)
model = sm.OLS(y, x).fit()
model.summary()

0,1,2,3
Dep. Variable:,Disgust,R-squared (uncentered):,0.108
Model:,OLS,Adj. R-squared (uncentered):,0.101
Method:,Least Squares,F-statistic:,15.55
Date:,"Thu, 05 Nov 2020",Prob (F-statistic):,1.44e-09
Time:,17:01:45,Log-Likelihood:,164.69
No. Observations:,390,AIC:,-323.4
Df Residuals:,387,BIC:,-311.5
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Arousal,0.3024,0.104,2.907,0.004,0.098,0.507
Valence,-0.1467,0.070,-2.103,0.036,-0.284,-0.010
Dominance,0.0037,0.095,0.039,0.969,-0.184,0.191

0,1,2,3
Omnibus:,299.198,Durbin-Watson:,1.911
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3166.85
Skew:,3.374,Prob(JB):,0.0
Kurtosis:,15.221,Cond. No.,16.1


### Analysis: Fear as dependent and Arousal, Valence, Dominance as indipendent

In [28]:
vad_fear_cs = pd.DataFrame(sentences_cs, columns=['Arousal','Valence','Dominance','Fear'])
x = vad_fear_cs[['Arousal','Valence','Dominance']]
y = vad_fear_cs['Fear']

x = sm.add_constant(x)
model = sm.OLS(y, x).fit()
model.summary()

0,1,2,3
Dep. Variable:,Fear,R-squared (uncentered):,0.261
Model:,OLS,Adj. R-squared (uncentered):,0.255
Method:,Least Squares,F-statistic:,45.56
Date:,"Thu, 05 Nov 2020",Prob (F-statistic):,3.1e-25
Time:,17:01:45,Log-Likelihood:,-50.424
No. Observations:,390,AIC:,106.8
Df Residuals:,387,BIC:,118.7
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Arousal,1.0231,0.181,5.666,0.000,0.668,1.378
Valence,-0.7579,0.121,-6.256,0.000,-0.996,-0.520
Dominance,0.2222,0.166,1.342,0.180,-0.103,0.548

0,1,2,3
Omnibus:,228.991,Durbin-Watson:,1.9
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1755.55
Skew:,2.447,Prob(JB):,0.0
Kurtosis:,12.17,Cond. No.,16.1


### Analysis: Joy as dependent and Arousal, Valence, Dominance as indipendent

In [29]:
vad_joy_cs = pd.DataFrame(sentences_cs, columns=['Arousal','Valence','Dominance','Joy'])
x = vad_joy_cs[['Arousal','Valence','Dominance']]
y = vad_joy_cs['Joy']

x = sm.add_constant(x)
model = sm.OLS(y, x).fit()
model.summary()

0,1,2,3
Dep. Variable:,Joy,R-squared (uncentered):,0.378
Model:,OLS,Adj. R-squared (uncentered):,0.373
Method:,Least Squares,F-statistic:,78.4
Date:,"Thu, 05 Nov 2020",Prob (F-statistic):,1.21e-39
Time:,17:01:45,Log-Likelihood:,-181.75
No. Observations:,390,AIC:,369.5
Df Residuals:,387,BIC:,381.4
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Arousal,0.7686,0.253,3.040,0.003,0.271,1.266
Valence,0.9058,0.170,5.339,0.000,0.572,1.239
Dominance,-1.0946,0.232,-4.720,0.000,-1.551,-0.639

0,1,2,3
Omnibus:,103.497,Durbin-Watson:,1.986
Prob(Omnibus):,0.0,Jarque-Bera (JB):,204.845
Skew:,1.433,Prob(JB):,3.3e-45
Kurtosis:,5.097,Cond. No.,16.1


### Analysis: Sadness as dependent and Arousal, Valence, Dominance as indipendent

In [30]:
vad_sadness_cs = pd.DataFrame(sentences_cs, columns=['Arousal','Valence','Dominance','Sadness'])
x = vad_sadness_cs[['Arousal','Valence','Dominance']]
y = vad_sadness_cs['Sadness']

x = sm.add_constant(x)
model = sm.OLS(y, x).fit()
model.summary()

0,1,2,3
Dep. Variable:,Sadness,R-squared (uncentered):,0.216
Model:,OLS,Adj. R-squared (uncentered):,0.21
Method:,Least Squares,F-statistic:,35.61
Date:,"Thu, 05 Nov 2020",Prob (F-statistic):,2.41e-20
Time:,17:01:45,Log-Likelihood:,-33.498
No. Observations:,390,AIC:,73.0
Df Residuals:,387,BIC:,84.89
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Arousal,1.4101,0.173,8.156,0.000,1.070,1.750
Valence,-0.2859,0.116,-2.465,0.014,-0.514,-0.058
Dominance,-0.6308,0.159,-3.978,0.000,-0.943,-0.319

0,1,2,3
Omnibus:,242.482,Durbin-Watson:,1.869
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1736.82
Skew:,2.681,Prob(JB):,0.0
Kurtosis:,11.839,Cond. No.,16.1


### Analysis: Surprise as dependent and Arousal, Valence, Dominance as indipendent

In [31]:
vad_surprise_cs = pd.DataFrame(sentences_cs, columns=['Arousal','Valence','Dominance','Surprise'])
x = vad_surprise_cs[['Arousal','Valence','Dominance']]
y = vad_surprise_cs['Surprise']

x = sm.add_constant(x)
model = sm.OLS(y, x).fit()
model.summary()

0,1,2,3
Dep. Variable:,Surprise,R-squared (uncentered):,0.119
Model:,OLS,Adj. R-squared (uncentered):,0.112
Method:,Least Squares,F-statistic:,17.42
Date:,"Thu, 05 Nov 2020",Prob (F-statistic):,1.25e-10
Time:,17:01:45,Log-Likelihood:,231.99
No. Observations:,390,AIC:,-458.0
Df Residuals:,387,BIC:,-446.1
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Arousal,0.3375,0.088,3.856,0.000,0.165,0.510
Valence,-0.0248,0.059,-0.422,0.673,-0.140,0.091
Dominance,-0.1623,0.080,-2.022,0.044,-0.320,-0.005

0,1,2,3
Omnibus:,325.601,Durbin-Watson:,2.091
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4755.512
Skew:,3.646,Prob(JB):,0.0
Kurtosis:,18.475,Cond. No.,16.1


### Analysis: Trust as dependent and Arousal, Valence, Dominance as indipendent

In [32]:
vad_trust_cs = pd.DataFrame(sentences_cs, columns=['Arousal','Valence','Dominance','Trust'])
x = vad_trust_cs[['Arousal','Valence','Dominance']]
y = vad_trust_cs['Trust']

x = sm.add_constant(x)
model = sm.OLS(y, x).fit()
model.summary()

0,1,2,3
Dep. Variable:,Trust,R-squared (uncentered):,0.501
Model:,OLS,Adj. R-squared (uncentered):,0.497
Method:,Least Squares,F-statistic:,129.5
Date:,"Thu, 05 Nov 2020",Prob (F-statistic):,4.37e-58
Time:,17:01:45,Log-Likelihood:,-355.99
No. Observations:,390,AIC:,718.0
Df Residuals:,387,BIC:,729.9
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Arousal,-0.8159,0.395,-2.064,0.040,-1.593,-0.039
Valence,-0.1317,0.265,-0.497,0.620,-0.653,0.390
Dominance,1.7367,0.363,4.791,0.000,1.024,2.449

0,1,2,3
Omnibus:,98.412,Durbin-Watson:,1.764
Prob(Omnibus):,0.0,Jarque-Bera (JB):,201.544
Skew:,1.333,Prob(JB):,1.72e-44
Kurtosis:,5.301,Cond. No.,16.1


## Multiple Linear Regression between VAD and Gaze Features in sentences

### Analysis: MPS as dependent and Arousal, Valence, Dominance as indipendent

In [33]:
vad_MPS_cs = pd.DataFrame(sentences_cs, columns=['Arousal','Valence','Dominance','MPS'])
vad_MPS_cs = vad_MPS_cs[vad_MPS_cs.MPS.notnull()]

x = vad_MPS_cs[['Arousal','Valence','Dominance']]
y = vad_MPS_cs['MPS']

x = sm.add_constant(x)
model = sm.OLS(y, x).fit()
model.summary()

0,1,2,3
Dep. Variable:,MPS,R-squared (uncentered):,0.987
Model:,OLS,Adj. R-squared (uncentered):,0.987
Method:,Least Squares,F-statistic:,8294.0
Date:,"Thu, 05 Nov 2020",Prob (F-statistic):,3.02e-302
Time:,17:01:45,Log-Likelihood:,-2363.2
No. Observations:,322,AIC:,4732.0
Df Residuals:,319,BIC:,4744.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Arousal,2260.6477,261.900,8.632,0.000,1745.378,2775.917
Valence,2125.9801,178.509,11.910,0.000,1774.776,2477.184
Dominance,1394.2851,243.018,5.737,0.000,916.165,1872.405

0,1,2,3
Omnibus:,25.717,Durbin-Watson:,1.879
Prob(Omnibus):,0.0,Jarque-Bera (JB):,31.325
Skew:,0.637,Prob(JB):,1.58e-07
Kurtosis:,3.843,Cond. No.,15.7


### Analysis: TRT as dependent and Arousal, Valence, Dominance as indipendent

In [34]:
vad_TRT_cs = pd.DataFrame(sentences_cs, columns=['Arousal','Valence','Dominance','TRT'])
vad_TRT_cs = vad_TRT_cs[vad_TRT_cs.TRT.notnull()]

x = vad_TRT_cs[['Arousal','Valence','Dominance']]
y = vad_TRT_cs['TRT']

x = sm.add_constant(x)
model = sm.OLS(y, x).fit()
model.summary()

0,1,2,3
Dep. Variable:,TRT,R-squared (uncentered):,0.849
Model:,OLS,Adj. R-squared (uncentered):,0.848
Method:,Least Squares,F-statistic:,597.9
Date:,"Thu, 05 Nov 2020",Prob (F-statistic):,1.46e-130
Time:,17:01:45,Log-Likelihood:,-2737.5
No. Observations:,322,AIC:,5481.0
Df Residuals:,319,BIC:,5492.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Arousal,1099.0607,837.458,1.312,0.190,-548.578,2746.700
Valence,2051.2700,570.806,3.594,0.000,928.250,3174.290
Dominance,1606.3802,777.081,2.067,0.040,77.530,3135.231

0,1,2,3
Omnibus:,29.679,Durbin-Watson:,1.799
Prob(Omnibus):,0.0,Jarque-Bera (JB):,35.114
Skew:,0.785,Prob(JB):,2.37e-08
Kurtosis:,3.389,Cond. No.,15.7


### Analysis: GD as dependent and Arousal, Valence, Dominance as indipendent

In [35]:
vad_GD_cs = pd.DataFrame(sentences_cs, columns=['Arousal','Valence','Dominance','GD'])
vad_GD_cs = vad_GD_cs[vad_GD_cs.GD.notnull()]

x = vad_GD_cs[['Arousal','Valence','Dominance']]
y = vad_GD_cs['GD']

x = sm.add_constant(x)
model = sm.OLS(y, x).fit()
model.summary()

0,1,2,3
Dep. Variable:,GD,R-squared (uncentered):,0.846
Model:,OLS,Adj. R-squared (uncentered):,0.844
Method:,Least Squares,F-statistic:,583.4
Date:,"Thu, 05 Nov 2020",Prob (F-statistic):,3.98e-129
Time:,17:01:45,Log-Likelihood:,-2662.7
No. Observations:,322,AIC:,5331.0
Df Residuals:,319,BIC:,5343.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Arousal,1060.2456,663.716,1.597,0.111,-245.567,2366.059
Valence,1634.2122,452.384,3.612,0.000,744.179,2524.246
Dominance,1073.3795,615.864,1.743,0.082,-138.289,2285.048

0,1,2,3
Omnibus:,30.409,Durbin-Watson:,1.848
Prob(Omnibus):,0.0,Jarque-Bera (JB):,36.275
Skew:,0.805,Prob(JB):,1.33e-08
Kurtosis:,3.334,Cond. No.,15.7


### Analysis: FFD as dependent and Arousal, Valence, Dominance as indipendent

In [36]:
vad_FFD_cs = pd.DataFrame(sentences_cs, columns=['Arousal','Valence','Dominance','FFD'])
vad_FFD_cs = vad_FFD_cs[vad_FFD_cs.FFD.notnull()]

x = vad_FFD_cs[['Arousal','Valence','Dominance']]
y = vad_FFD_cs['FFD']

x = sm.add_constant(x)
model = sm.OLS(y, x).fit()
model.summary()

0,1,2,3
Dep. Variable:,FFD,R-squared (uncentered):,0.845
Model:,OLS,Adj. R-squared (uncentered):,0.844
Method:,Least Squares,F-statistic:,581.9
Date:,"Thu, 05 Nov 2020",Prob (F-statistic):,5.710000000000001e-129
Time:,17:01:45,Log-Likelihood:,-2632.2
No. Observations:,322,AIC:,5270.0
Df Residuals:,319,BIC:,5282.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Arousal,1096.9394,603.805,1.817,0.070,-91.004,2284.883
Valence,1463.1739,411.549,3.555,0.000,653.480,2272.868
Dominance,894.9052,560.273,1.597,0.111,-207.392,1997.202

0,1,2,3
Omnibus:,29.53,Durbin-Watson:,1.869
Prob(Omnibus):,0.0,Jarque-Bera (JB):,34.987
Skew:,0.791,Prob(JB):,2.53e-08
Kurtosis:,3.328,Cond. No.,15.7
