# Sentiment Analysis **3-class**

In [0]:
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd


data11={'LEXICON_BASED':[57,51,54],'BOW':[54,54,49],'TF-IDF':[55,55,54],'Skip-Thought':[58,57,57],'Word2vec':[59,62,61],'GloVe':[58,61,61],'FastText':[63,65,63],'Meta-Embeddings':[61,65,63],'BERT':[67,67,67],'ELMo':[63,64,63]}
df11=pd.DataFrame(data11)
df11_melt=pd.melt(df11.reset_index(),id_vars=['index'],value_vars=['LEXICON_BASED','BOW','TF-IDF','Skip-Thought','Word2vec','GloVe','FastText','Meta-Embeddings','BERT','ELMo'])
df11_melt.columns=['index','embeddings','F1_SCORE']


print(df11_melt)

model11=ols('F1_SCORE ~ embeddings',data=df11_melt).fit()


  import pandas.util.testing as tm


    index       embeddings  F1_SCORE
0       0    LEXICON_BASED        57
1       1    LEXICON_BASED        51
2       2    LEXICON_BASED        54
3       0              BOW        54
4       1              BOW        54
5       2              BOW        49
6       0           TF-IDF        55
7       1           TF-IDF        55
8       2           TF-IDF        54
9       0     Skip-Thought        58
10      1     Skip-Thought        57
11      2     Skip-Thought        57
12      0         Word2vec        59
13      1         Word2vec        62
14      2         Word2vec        61
15      0            GloVe        58
16      1            GloVe        61
17      2            GloVe        61
18      0         FastText        63
19      1         FastText        65
20      2         FastText        63
21      0  Meta-Embeddings        61
22      1  Meta-Embeddings        65
23      2  Meta-Embeddings        63
24      0             BERT        67
25      1             BERT        67
2

## Checking Anova Assumptions were satisfied or not



## **1)Normality**
### Shapiro-Wilk Test 

#### **Null Hypothesis**  :   **Data is drawn from Normal Distribution**

In [0]:
import scipy.stats as stats
w, pvalue = stats.shapiro(model11.resid)
print('Shapiro Test')
print(w, pvalue)


Shapiro Test
0.9641076922416687 0.39264270663261414


### Since p-value>0.05,we fail to reject NULL Hypothesis and data is drawn from Normal distribution.


## **2)Homogenity of Variances**
### Bartlett's Test

#### **Null Hypothesis** : **Samples from populations have equal variances.**

In [0]:
import scipy.stats as stats
# w,pvalue=stats.bartlett(df11['LEXICON_BASED'],df11['BOW'],df11['TF-IDF'],df11['Skip-Thought'],df11['Word2vec'],df11['GloVe'],df11['FastText'],df11['Meta-Embeddings'],df11['BERT'],df11['ELMo'])
# print('Bartlett Test')
# print(w,pvalue)

print("Levene Test")
w,pvalue=stats.levene(df11['LEXICON_BASED'],df11['BOW'],df11['TF-IDF'],df11['Skip-Thought'],df11['Word2vec'],df11['GloVe'],df11['FastText'],df11['Meta-Embeddings'],df11['BERT'],df11['ELMo'])
print(w,pvalue)



Levene Test
0.6705653021442497 0.7257543817033438


### Since p-value>0.05,we fail to reject NULL Hypothesis and conclude that embeddings have equal variances.



## If assumptions were valid then one-way anova test is conducted

In [0]:
table11=sm.stats.anova_lm(model11)
print("Anova Table for Sentiment-Analysis 3 class")
print(table11)




Anova Table for Sentiment-Analysis 3 class
              df  sum_sq    mean_sq          F        PR(>F)
embeddings   9.0   635.2  70.577778  24.337165  6.976054e-09
Residual    20.0    58.0   2.900000        NaN           NaN


### If anova returns a **Statistically Significant result** then post-hoc **Tukey Hsd test** to confirm where the differences occurred between groups.

### Since **p-value : 6.97e-09** (p-value>0.01) it means **statistical significant differences** between the groups


In [0]:
print("\n")
print("---------------Sentiment Analysis 3-class-----------------")
print("\n")
m_comp=pairwise_tukeyhsd(endog=df11_melt['F1_SCORE'], groups=df11_melt['embeddings'], alpha=0.01)

print(m_comp)




---------------Sentiment Analysis 3-class-----------------


          Multiple Comparison of Means - Tukey HSD, FWER=0.01          
     group1          group2     meandiff p-adj   lower    upper  reject
-----------------------------------------------------------------------
           BERT             BOW -14.6667  0.001 -20.6508 -8.6825   True
           BERT            ELMo  -3.6667 0.2634  -9.6508  2.3175  False
           BERT        FastText  -3.3333 0.3782  -9.3175  2.6508  False
           BERT           GloVe     -7.0  0.002 -12.9841 -1.0159   True
           BERT   LEXICON_BASED    -13.0  0.001 -18.9841 -7.0159   True
           BERT Meta-Embeddings     -4.0 0.1765  -9.9841  1.9841  False
           BERT    Skip-Thought  -9.6667  0.001 -15.6508 -3.6825   True
           BERT          TF-IDF -12.3333  0.001 -18.3175 -6.3492   True
           BERT        Word2vec  -6.3333 0.0058 -12.3175 -0.3492   True
            BOW            ELMo     11.0  0.001   5.0159 16.9841   True
 

# Sentiment Analysis 2-class


In [0]:
# Two class
#only over-sampling

data21={'LEXICON_BASED':[72,60,68],'BOW':[75,73,74],'TF-IDF':[74,72,73],'Skip-Thought':[78,79,24],'Word2vec':[83,85,84],'GloVe':[86,87,87],'FastText':[86,85,86],'Meta-Embeddings':[83,86,86],'BERT':[88,88,88],'ELMo':[84,83,84]}
df21=pd.DataFrame(data21)
df21_melt=pd.melt(df21.reset_index(),id_vars=['index'],value_vars=['LEXICON_BASED','BOW','TF-IDF','Skip-Thought','Word2vec','GloVe','FastText','Meta-Embeddings','BERT','ELMo'])
df21_melt.columns=['index','embeddings','F1_SCORE']



print(df21_melt)

model21=ols('F1_SCORE ~ embeddings',data=df21_melt).fit()


import scipy.stats as stats
w, pvalue = stats.shapiro(model21.resid)
print('Shapiro Test')
print(w, pvalue)

# import scipy.stats as stats
# w,pvalue=stats.bartlett(df21['LEXICON_BASED'],df21['BOW'],df21['TF-IDF'],df21['Skip-Thought'],df21['Word2vec'],df21['GloVe'],df21['FastText'],df21['Meta-Embeddings'],df21['BERT'],df21['ELMo'])
# print('Bartlett Test')
# print(w,pvalue)

print("Levene Test")
w,pvalue=stats.levene(df21['LEXICON_BASED'],df21['BOW'],df21['TF-IDF'],df21['Skip-Thought'],df21['Word2vec'],df21['GloVe'],df21['FastText'],df21['Meta-Embeddings'],df21['BERT'],df21['ELMo'])
print(w,pvalue)




    index       embeddings  F1_SCORE
0       0    LEXICON_BASED        72
1       1    LEXICON_BASED        60
2       2    LEXICON_BASED        68
3       0              BOW        75
4       1              BOW        73
5       2              BOW        74
6       0           TF-IDF        74
7       1           TF-IDF        72
8       2           TF-IDF        73
9       0     Skip-Thought        78
10      1     Skip-Thought        79
11      2     Skip-Thought        24
12      0         Word2vec        83
13      1         Word2vec        85
14      2         Word2vec        84
15      0            GloVe        86
16      1            GloVe        87
17      2            GloVe        87
18      0         FastText        86
19      1         FastText        85
20      2         FastText        86
21      0  Meta-Embeddings        83
22      1  Meta-Embeddings        86
23      2  Meta-Embeddings        86
24      0             BERT        88
25      1             BERT        88
2

In [0]:
table21=sm.stats.anova_lm(model21)
print("Anova Table for Sentiment-Analysis 2 class")
print(table21)


print("\n")
print("----------------Sentiment Analysis 2-class---------------")
print("\n")
m_comp=pairwise_tukeyhsd(endog=df21_melt['F1_SCORE'], groups=df21_melt['embeddings'], alpha=0.05)

print(m_comp)

Anova Table for Sentiment-Analysis 2 class
              df       sum_sq     mean_sq         F    PR(>F)
embeddings   9.0  2482.966667  275.885185  2.666416  0.032496
Residual    20.0  2069.333333  103.466667       NaN       NaN


----------------Sentiment Analysis 2-class---------------


          Multiple Comparison of Means - Tukey HSD, FWER=0.05          
     group1          group2     meandiff p-adj   lower    upper  reject
-----------------------------------------------------------------------
           BERT             BOW    -14.0 0.7663 -43.4098 15.4098  False
           BERT            ELMo  -4.3333    0.9 -33.7432 25.0765  False
           BERT        FastText  -2.3333    0.9 -31.7432 27.0765  False
           BERT           GloVe  -1.3333    0.9 -30.7432 28.0765  False
           BERT   LEXICON_BASED -21.3333 0.2932 -50.7432  8.0765  False
           BERT Meta-Embeddings     -3.0    0.9 -32.4098 26.4098  False
           BERT    Skip-Thought -27.6667 0.0762 -57.0765  1.7

## Since Anova Assumptions were not satisified through Shapiro(p-value<0.01) we go for **Kruskal-Wallis H-test**  for checking the **statistical significance** between results


## Kruskal-Wallis H-test
### **Null Hypothesis**  : There's no statistical significant difference between the means of three or more groups.

# Emotion Identification 5-class


In [0]:

data31={'LEXICON_BASED':[30,30,31],'BOW':[32,31,32],'TF-IDF':[32,33,34],
        'Skip-Thought':[35,36,36],'Word2vec':[33,39,42],'GloVe':[30,35,40],'FastText':[36,43,44],
        'Meta-Embeddings':[34,42,42],'BERT':[33,37,38],'ELMo':[41,44,44]}
df31=pd.DataFrame(data31)
df31_melt=pd.melt(df31.reset_index(),id_vars=['index'],value_vars=['LEXICON_BASED','BOW','TF-IDF','Skip-Thought','Word2vec','GloVe','FastText','Meta-Embeddings','BERT','ELMo'])
df31_melt.columns=['index','embeddings','F1_SCORE']

print(df31_melt)

model31=ols('F1_SCORE ~ embeddings',data=df31_melt).fit()


import scipy.stats as stats
w, pvalue = stats.shapiro(model31.resid)
print('Shapiro Test')
print(w, pvalue)


print("Levene Test")
w,pvalue=stats.levene(df31['LEXICON_BASED'],df31['BOW'],df31['TF-IDF'],df31['Skip-Thought'],df31['Word2vec'],df31['GloVe'],df31['FastText'],df31['Meta-Embeddings'],df31['BERT'],df31['ELMo'])
print(w,pvalue)




    index       embeddings  F1_SCORE
0       0    LEXICON_BASED        30
1       1    LEXICON_BASED        30
2       2    LEXICON_BASED        31
3       0              BOW        32
4       1              BOW        31
5       2              BOW        32
6       0           TF-IDF        32
7       1           TF-IDF        33
8       2           TF-IDF        34
9       0     Skip-Thought        35
10      1     Skip-Thought        36
11      2     Skip-Thought        36
12      0         Word2vec        33
13      1         Word2vec        39
14      2         Word2vec        42
15      0            GloVe        30
16      1            GloVe        35
17      2            GloVe        40
18      0         FastText        36
19      1         FastText        43
20      2         FastText        44
21      0  Meta-Embeddings        34
22      1  Meta-Embeddings        42
23      2  Meta-Embeddings        42
24      0             BERT        33
25      1             BERT        37
2

## Anova Assumptions were satisfied


In [0]:
table31=sm.stats.anova_lm(model31)
print("Anova Table for Emotion-identification 5 class")
print(table31)

print("\n")
print("------------------EMOTION IDENTIFICATION 5 CLASS ---------------------")
print("\n")

m_comp=pairwise_tukeyhsd(endog=df31_melt['F1_SCORE'], groups=df31_melt['embeddings'], alpha=0.01)

print(m_comp)


Anova Table for Emotion-identification 5 class
              df      sum_sq    mean_sq         F    PR(>F)
embeddings   9.0  447.633333  49.737037  5.058004  0.001238
Residual    20.0  196.666667   9.833333       NaN       NaN


------------------EMOTION IDENTIFICATION 5 CLASS ---------------------


          Multiple Comparison of Means - Tukey HSD, FWER=0.01          
     group1          group2     meandiff p-adj   lower    upper  reject
-----------------------------------------------------------------------
           BERT             BOW  -4.3333 0.7626 -15.3526  6.6859  False
           BERT            ELMo      7.0 0.2252  -4.0193 18.0193  False
           BERT        FastText      5.0 0.6218  -6.0193 16.0193  False
           BERT           GloVe     -1.0    0.9 -12.0193 10.0193  False
           BERT   LEXICON_BASED  -5.6667 0.4808 -16.6859  5.3526  False
           BERT Meta-Embeddings   3.3333    0.9  -7.6859 14.3526  False
           BERT    Skip-Thought  -0.3333    0.9 -1

## Since it returns a **stastical significant result** based on p-value=0.000181 post-hoc tukey-hsd test is conducted.

# Emotion Identification 4-class


In [0]:

data41={'LEXICON_BASED':[40,40,40],'BOW':[42,40,42],'TF-IDF':[43,41,43],'Skip-Thought':[52,47,48],
        'Word2vec':[52,52,54],'GloVe':[45,51,51],'FastText':[51,55,55],'Meta-Embeddings':[49,54,57],
        'BERT':[41,44,44],'ELMo':[57,57,59]}
df41=pd.DataFrame(data41)
df41_melt=pd.melt(df41.reset_index(),id_vars=['index'],value_vars=['LEXICON_BASED','BOW','TF-IDF','Skip-Thought','Word2vec','GloVe','FastText','Meta-Embeddings','BERT','ELMo'])
df41_melt.columns=['index','embeddings','F1_SCORE']

print(df41_melt)

model41=ols('F1_SCORE ~ embeddings',data=df41_melt).fit()


import scipy.stats as stats
w, pvalue = stats.shapiro(model41.resid)
print('Shapiro Test')
print(w, pvalue)



print("Levene Test")
w,pvalue=stats.levene(df41['LEXICON_BASED'],df41['BOW'],df41['TF-IDF'],df41['Skip-Thought'],df41['Word2vec'],df41['GloVe'],df41['FastText'],df41['Meta-Embeddings'],df41['BERT'],df41['ELMo'])
print(w,pvalue)



    index       embeddings  F1_SCORE
0       0    LEXICON_BASED        48
1       1    LEXICON_BASED        40
2       2    LEXICON_BASED        49
3       0              BOW        42
4       1              BOW        40
5       2              BOW        42
6       0           TF-IDF        43
7       1           TF-IDF        41
8       2           TF-IDF        43
9       0     Skip-Thought        52
10      1     Skip-Thought        47
11      2     Skip-Thought        48
12      0         Word2vec        52
13      1         Word2vec        52
14      2         Word2vec        54
15      0            GloVe        45
16      1            GloVe        51
17      2            GloVe        51
18      0         FastText        51
19      1         FastText        55
20      2         FastText        55
21      0  Meta-Embeddings        49
22      1  Meta-Embeddings        54
23      2  Meta-Embeddings        57
24      0             BERT        41
25      1             BERT        44
2

In [0]:
table41=sm.stats.anova_lm(model41)
print("Anova Table for Emotion-identification 4 class")
print(table41)


print("\n")
print("---------------------EMOTION IDENTIFICATION 4 Class-------------------")
print("\n")
m_comp=pairwise_tukeyhsd(endog=df41_melt['F1_SCORE'], groups=df41_melt['embeddings'], alpha=0.01)

print(m_comp)


Anova Table for Emotion-identification 4 class
              df      sum_sq    mean_sq          F    PR(>F)
embeddings   9.0  836.700000  92.966667  12.677273  0.000002
Residual    20.0  146.666667   7.333333        NaN       NaN


---------------------EMOTION IDENTIFICATION 4 Class-------------------


          Multiple Comparison of Means - Tukey HSD, FWER=0.01          
     group1          group2     meandiff p-adj   lower    upper  reject
-----------------------------------------------------------------------
           BERT             BOW  -1.6667    0.9 -11.1826  7.8493  False
           BERT            ELMo  14.6667  0.001   5.1507 24.1826   True
           BERT        FastText  10.6667 0.0032   1.1507 20.1826   True
           BERT           GloVe      6.0 0.2329   -3.516  15.516  False
           BERT   LEXICON_BASED   2.6667    0.9  -6.8493 12.1826  False
           BERT Meta-Embeddings  10.3333 0.0045   0.8174 19.8493   True
           BERT    Skip-Thought      6.0 0.2329

# Hate Speech results


In [0]:

data51={'LEXICON_BASED':[56,58,58],'BOW':[50,51,49],'TF-IDF':[50,51,49],
        'Skip-Thought':[58,61,56],'Word2vec':[54,57,58],'GloVe':[51,60,62],
        'FastText':[54,59,62],'Meta-Embeddings':[53,60,63],'BERT':[53,55,54],'ELMo':[61,64,64]}
df51=pd.DataFrame(data51)
df51_melt=pd.melt(df51.reset_index(),id_vars=['index'],value_vars=['LEXICON_BASED','BOW','TF-IDF','Skip-Thought','Word2vec','GloVe','FastText','Meta-Embeddings','BERT','ELMo'])
df51_melt.columns=['index','embeddings','F1_SCORE']

print(df51_melt)

model51=ols('F1_SCORE ~ embeddings',data=df51_melt).fit()


import scipy.stats as stats
w, pvalue = stats.shapiro(model51.resid)
print('Shapiro Test')
print(w, pvalue)

print("Levene Test")
w,pvalue=stats.levene(df51['LEXICON_BASED'],df51['BOW'],df51['TF-IDF'],df51['Skip-Thought'],df51['Word2vec'],df51['GloVe'],df51['FastText'],df51['Meta-Embeddings'],df51['BERT'],df51['ELMo'])
print(w,pvalue)




    index       embeddings  F1_SCORE
0       0    LEXICON_BASED        56
1       1    LEXICON_BASED        58
2       2    LEXICON_BASED        58
3       0              BOW        50
4       1              BOW        51
5       2              BOW        49
6       0           TF-IDF        50
7       1           TF-IDF        51
8       2           TF-IDF        49
9       0     Skip-Thought        58
10      1     Skip-Thought        61
11      2     Skip-Thought        56
12      0         Word2vec        54
13      1         Word2vec        57
14      2         Word2vec        58
15      0            GloVe        51
16      1            GloVe        60
17      2            GloVe        62
18      0         FastText        54
19      1         FastText        59
20      2         FastText        62
21      0  Meta-Embeddings        53
22      1  Meta-Embeddings        60
23      2  Meta-Embeddings        63
24      0             BERT        53
25      1             BERT        55
2

In [0]:
table51=sm.stats.anova_lm(model51)
print("Anova Table for HateSpeech results")
print(table51)

print("\n")
print("-----------------HATE SPEECH RESULTS---------------------")
print("\n")
m_comp=pairwise_tukeyhsd(endog=df51_melt['F1_SCORE'], groups=df51_melt['embeddings'], alpha=0.01)

print(m_comp)



Anova Table for HateSpeech results
              df      sum_sq    mean_sq         F    PR(>F)
embeddings   9.0  438.966667  48.774074  5.134113  0.001131
Residual    20.0  190.000000   9.500000       NaN       NaN


-----------------HATE SPEECH RESULTS---------------------


          Multiple Comparison of Means - Tukey HSD, FWER=0.01          
     group1          group2     meandiff p-adj   lower    upper  reject
-----------------------------------------------------------------------
           BERT             BOW     -4.0 0.8184 -14.8309  6.8309  False
           BERT            ELMo      9.0 0.0466  -1.8309 19.8309  False
           BERT        FastText   4.3333 0.7467  -6.4976 15.1642  False
           BERT           GloVe   3.6667   0.89  -7.1642 14.4976  False
           BERT   LEXICON_BASED   3.3333    0.9  -7.4976 14.1642  False
           BERT Meta-Embeddings   4.6667 0.6751  -6.1642 15.4976  False
           BERT    Skip-Thought   4.3333 0.7467  -6.4976 15.1642  False
   

# Sarcasm results

In [0]:

data61={'BOW':[48,49,47],'TF-IDF':[49,51,50],'Skip-Thought':[53,54,54],
        'Word2vec':[44,52,52],'GloVe':[41,53,52],'FastText':[42,52,53],'Meta-Embeddings':[42,50,53],
        'BERT':[50,52,53],'ELMo':[58,60,58]}
df61=pd.DataFrame(data61)
df61_melt=pd.melt(df61.reset_index(),id_vars=['index'],value_vars=['BOW','TF-IDF','Skip-Thought','Word2vec','GloVe','FastText','Meta-Embeddings','BERT','ELMo'])
df61_melt.columns=['index','embeddings','F1_SCORE']

print(df61_melt)

model61=ols('F1_SCORE ~ embeddings',data=df61_melt).fit()


import scipy.stats as stats
w, pvalue = stats.shapiro(model61.resid)
print('Shapiro Test')
print(w, pvalue)


print("Levene Test")
w,pvalue=stats.levene(df61['BOW'],df61['TF-IDF'],df61['Skip-Thought'],df61['Word2vec'],df61['GloVe'],df61['FastText'],df61['Meta-Embeddings'],df61['BERT'],df61['ELMo'])
print(w,pvalue)




    index       embeddings  F1_SCORE
0       0              BOW        48
1       1              BOW        49
2       2              BOW        47
3       0           TF-IDF        49
4       1           TF-IDF        51
5       2           TF-IDF        50
6       0     Skip-Thought        53
7       1     Skip-Thought        54
8       2     Skip-Thought        54
9       0         Word2vec        44
10      1         Word2vec        52
11      2         Word2vec        52
12      0            GloVe        41
13      1            GloVe        53
14      2            GloVe        52
15      0         FastText        42
16      1         FastText        52
17      2         FastText        53
18      0  Meta-Embeddings        42
19      1  Meta-Embeddings        50
20      2  Meta-Embeddings        53
21      0             BERT        50
22      1             BERT        52
23      2             BERT        53
24      0             ELMo        58
25      1             ELMo        60
2

In [0]:
table61=sm.stats.anova_lm(model61)
print("Anova Table for Sarcasm results")
print(table61)

Anova Table for Sarcasm results
              df      sum_sq    mean_sq         F    PR(>F)
embeddings   8.0  286.074074  35.759259  2.282506  0.069593
Residual    18.0  282.000000  15.666667       NaN       NaN


In [0]:
print("\n")
print("-----------------SARCASM RESULTS-----------------")
print("\n")

m_comp=pairwise_tukeyhsd(endog=df61_melt['F1_SCORE'], groups=df61_melt['embeddings'], alpha=0.01)

print(m_comp)



-----------------SARCASM RESULTS-----------------


          Multiple Comparison of Means - Tukey HSD, FWER=0.01          
     group1          group2     meandiff p-adj   lower    upper  reject
-----------------------------------------------------------------------
           BERT             BOW  -3.6667    0.9 -17.5626 10.2292  False
           BERT            ELMo      7.0 0.4661  -6.8959 20.8959  False
           BERT        FastText  -2.6667    0.9 -16.5626 11.2292  False
           BERT           GloVe     -3.0    0.9 -16.8959 10.8959  False
           BERT Meta-Embeddings  -3.3333    0.9 -17.2292 10.5626  False
           BERT    Skip-Thought      2.0    0.9 -11.8959 15.8959  False
           BERT          TF-IDF  -1.6667    0.9 -15.5626 12.2292  False
           BERT        Word2vec  -2.3333    0.9 -16.2292 11.5626  False
            BOW            ELMo  10.6667 0.0741  -3.2292 24.5626  False
            BOW        FastText      1.0    0.9 -12.8959 14.8959  False
          