# VAD Multiple Linear Regression Analysis

### Import libraries and files

In [1]:
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt

# some_file.py
import sys
# insert at 1, 0 is the script path (or '' in REPL)
sys.path.insert(1, '../')
import BackwardElimination as be

### Take data from CSV files

In [2]:
zuco_cs = pd.read_csv(r'../Lexicons/ZuCo_words_dataset.csv')
sentences_cs = pd.read_csv(r'../Lexicons/Emotion_Sentences_Cross_Analysis.csv')
print(sentences_cs)

      Arousal   Valence  Dominance  Anger  Anticipation  Disgust   Fear  \
0    0.415830  0.240984   0.316366  0.349         0.000    0.000  0.000   
1    0.147271  0.077473   0.095612  0.000         0.000    0.000  1.072   
2    0.271725  0.304864   0.311004  0.000         0.609    0.297  0.000   
3    0.269869  0.380215   0.313269  0.000         0.000    0.250  0.000   
4    0.425873  0.382730   0.371271  0.000         0.000    0.000  0.000   
..        ...       ...        ...    ...           ...      ...    ...   
385  0.116048  0.175611   0.164414  0.000         0.000    0.000  0.000   
386  0.476419  0.616485   0.524507  0.000         0.555    0.000  0.094   
387  0.321070  0.237527   0.340684  0.000         0.000    0.000  0.942   
388  0.067249  0.157618   0.104977  0.000         0.406    0.000  0.000   
389  0.551638  0.571384   0.477003  0.000         0.531    0.000  0.038   

       Joy  Sadness  Surprise  Trust       MPS       TRT        GD       FFD  
0    0.000    0.953 

## Multiple Linear Regression between VAD and Gaze Features on sentences

### Analysis: Arousal as dependent and MPS, TRT, GD and FFD as indipendent

In [3]:
GF_AR_cs = pd.DataFrame(sentences_cs, columns=['Arousal','MPS','TRT','GD','FFD'])
GF_AR_cs = GF_AR_cs[GF_AR_cs.FFD.notnull()]
GF_AR_cs = GF_AR_cs[GF_AR_cs.MPS.notnull()]
GF_AR_cs = GF_AR_cs[GF_AR_cs.TRT.notnull()]
GF_AR_cs = GF_AR_cs[GF_AR_cs.GD.notnull()]

x = GF_AR_cs[['MPS','TRT','GD', 'FFD']]
y = GF_AR_cs['Arousal']

x = sm.add_constant(x)
model = be.backWardEliminationMLR(x,y)
model.summary()

const    0.063829
MPS      0.504383
TRT      0.389606
GD       0.043415
FFD      0.000005
dtype: float64
 
const    0.008603
TRT      0.307776
GD       0.043408
FFD      0.000003
dtype: float64
 
const    0.005311
GD       0.005911
FFD      0.000001
dtype: float64
 


0,1,2,3
Dep. Variable:,Arousal,R-squared:,0.604
Model:,OLS,Adj. R-squared:,0.601
Method:,Least Squares,F-statistic:,242.9
Date:,"Mon, 16 Nov 2020",Prob (F-statistic):,7.77e-65
Time:,15:53:48,Log-Likelihood:,266.46
No. Observations:,322,AIC:,-526.9
Df Residuals:,319,BIC:,-515.6
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0326,0.012,2.807,0.005,0.010,0.055
GD,-0.8136,0.294,-2.771,0.006,-1.391,-0.236
FFD,1.4638,0.298,4.916,0.000,0.878,2.050

0,1,2,3
Omnibus:,6.984,Durbin-Watson:,1.842
Prob(Omnibus):,0.03,Jarque-Bera (JB):,9.979
Skew:,0.124,Prob(JB):,0.00681
Kurtosis:,3.826,Cond. No.,78.6


We can see an apparently no correlation between the dependent variable Arousal and the following independent variables: MPS, TRT, GD and FFD

SL = 0.05

### Analysis: Valence as dependent and MPS, TRT, GD and FFD as indipendent

In [4]:
GF_VA_cs = pd.DataFrame(sentences_cs, columns=['Valence','MPS','TRT','GD','FFD'])
GF_VA_cs = GF_VA_cs[GF_VA_cs.FFD.notnull()]
GF_VA_cs = GF_VA_cs[GF_VA_cs.MPS.notnull()]
GF_VA_cs = GF_VA_cs[GF_VA_cs.TRT.notnull()]
GF_VA_cs = GF_VA_cs[GF_VA_cs.GD.notnull()]

x = GF_VA_cs[['MPS','TRT','GD', 'FFD']]
y = GF_VA_cs['Valence']

x = sm.add_constant(x)
model = be.backWardEliminationMLR(x,y)
model.summary()

const    0.001208
MPS      0.152804
TRT      0.659614
GD       0.090889
FFD      0.000061
dtype: float64
 
const    0.000565
MPS      0.120005
GD       0.036479
FFD      0.000045
dtype: float64
 
const    0.000022
GD       0.024473
FFD      0.000018
dtype: float64
 


0,1,2,3
Dep. Variable:,Valence,R-squared:,0.59
Model:,OLS,Adj. R-squared:,0.587
Method:,Least Squares,F-statistic:,229.3
Date:,"Mon, 16 Nov 2020",Prob (F-statistic):,1.93e-62
Time:,15:53:48,Log-Likelihood:,248.84
No. Observations:,322,AIC:,-491.7
Df Residuals:,319,BIC:,-480.4
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0528,0.012,4.305,0.000,0.029,0.077
GD,-0.7009,0.310,-2.260,0.024,-1.311,-0.091
FFD,1.3684,0.314,4.351,0.000,0.750,1.987

0,1,2,3
Omnibus:,11.118,Durbin-Watson:,1.775
Prob(Omnibus):,0.004,Jarque-Bera (JB):,14.32
Skew:,0.299,Prob(JB):,0.000777
Kurtosis:,3.842,Cond. No.,78.6


We can see an apparently no correlation between the dependent variable Valence and the following independent variables: MPS, TRT, GD and FFD

SL = 0.05

### Analysis: Dominance as dependent and MPS, TRT, GD and FFD as indipendent

In [5]:
GF_DO_cs = pd.DataFrame(sentences_cs, columns=['Dominance','MPS','TRT','GD','FFD'])
GF_DO_cs = GF_DO_cs[GF_DO_cs.FFD.notnull()]
GF_DO_cs = GF_DO_cs[GF_DO_cs.MPS.notnull()]
GF_DO_cs = GF_DO_cs[GF_DO_cs.TRT.notnull()]
GF_DO_cs = GF_DO_cs[GF_DO_cs.GD.notnull()]

x = GF_DO_cs[['MPS','TRT','GD', 'FFD']]
y = GF_DO_cs['Dominance']

x = sm.add_constant(x)
model = be.backWardEliminationMLR(x,y)
model.summary()

const    0.022252
MPS      0.750640
TRT      0.688100
GD       0.101602
FFD      0.000066
dtype: float64
 
const    0.000036
TRT      0.632957
GD       0.101287
FFD      0.000053
dtype: float64
 
const    0.000023
GD       0.038803
FFD      0.000035
dtype: float64
 


0,1,2,3
Dep. Variable:,Dominance,R-squared:,0.597
Model:,OLS,Adj. R-squared:,0.594
Method:,Least Squares,F-statistic:,235.9
Date:,"Mon, 16 Nov 2020",Prob (F-statistic):,1.29e-63
Time:,15:53:48,Log-Likelihood:,287.87
No. Observations:,322,AIC:,-569.7
Df Residuals:,319,BIC:,-558.4
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0467,0.011,4.299,0.000,0.025,0.068
GD,-0.5699,0.275,-2.075,0.039,-1.110,-0.030
FFD,1.1698,0.279,4.199,0.000,0.622,1.718

0,1,2,3
Omnibus:,10.802,Durbin-Watson:,1.723
Prob(Omnibus):,0.005,Jarque-Bera (JB):,15.57
Skew:,0.247,Prob(JB):,0.000416
Kurtosis:,3.958,Cond. No.,78.6


We can see an apparently no correlation between the dependent variable Dominance and the following independent variables: MPS, TRT, GD and FFD

SL = 0.05