# Datafram + module import 

In [4]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
import texthero as hero

In [6]:
df = pd.read_csv('data/Emotion_final.csv')

# Analyse

In [7]:
df

Unnamed: 0,Text,Emotion
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger
...,...,...
21454,Melissa stared at her friend in dism,fear
21455,Successive state elections have seen the gover...,fear
21456,Vincent was irritated but not dismay,fear
21457,Kendall-Hume turned back to face the dismayed ...,fear


- Les différents sentiments dans le df : 
  - Sadness 
  - Anger 
  - Love 
  - Surprise 
  - Fear 
  - Happy 

In [8]:
smileys = """:-) :) :o) :] :3 :c) :> =] 8) =)  
             :D 8-D 8D x-D xD X-D XD =-D =D =-3 =3 B^D""".split()

smyle_list = []
for i in df.Text  :
    for smyle in smileys :
        if smyle in i :
            smyle_list.append(i)

smyle_list

[]

- Le texte semble plutot propre, aucun smile 

In [9]:
digits = "0 1 2 3 4 5 6 7 8 9".split()

digits_list = []
for i in df.Text  :
    for smyle in digits :
        if smyle in i :
            digits_list.append(i)

digits_list

['However , in 1878 , deaf education in Greenock faced a crisis because there was no-one able to continue to teach the children , and one parent was not happy about his child being taught in sign langu',
 'However , in 1878 , deaf education in Greenock faced a crisis because there was no-one able to continue to teach the children , and one parent was not happy about his child being taught in sign langu',
 'However , in 1878 , deaf education in Greenock faced a crisis because there was no-one able to continue to teach the children , and one parent was not happy about his child being taught in sign langu',
 "He 's a very contented baby , a joy to his parents and big sister , and today at nine weeks old ( 40+3 ) , he smiled at us for the first t",
 "He 's a very contented baby , a joy to his parents and big sister , and today at nine weeks old ( 40+3 ) , he smiled at us for the first t",
 "He 's a very contented baby , a joy to his parents and big sister , and today at nine weeks old ( 40

In [10]:
len(digits_list)

144

- Le datafram contient 144 lignes avec des digits (à prendre en compte)

In [11]:
digits = "{ } / \\ ".split()

others = []
for i in df.Text  :
    for smyle in digits :
        if smyle in i :
            others.append(i)

others

[]

# Cleaning (preprocessing) + visualisation 

In [None]:
df['clean_text'] = hero.clean(df['Text'])
df['tfidf_clean_text'] = hero.tfidf(df['clean_text'])

In [None]:
df

In [None]:
hero.wordcloud(df.clean_text, max_words=100,)

In [None]:
from texthero import stopwords
default_stopwords = stopwords.DEFAULT
custom_stopwords = default_stopwords.union(set(["feel","im","feeling","like","really",'know','time','get','little','bit','would','want','think']))
df['clean_text'] = hero.remove_stopwords(df['clean_text'], custom_stopwords)

In [None]:
hero.wordcloud(df.clean_text, max_words=100,)

In [None]:
NUM_TOP_WORDS = 10
df.groupby('Emotion')['clean_text'].apply(lambda x: hero.top_words(x)[:NUM_TOP_WORDS])

In [None]:
df['pca_tfidf_clean_text'] = hero.pca(df['tfidf_clean_text'])

In [None]:
df

In [None]:
hero.scatterplot(df, col='pca_tfidf_clean_text', color='Emotion', title="Emotion space")

In [None]:
# basic = tf idf 
# word embbeding avec gloves 
# bert 

# Separation jeu de train et de test + entrainement model

In [None]:
df

In [None]:
df.Emotion.unique()

In [None]:
df.loc[df.Emotion == "sadness", 'Emotion'] = 1 
df.loc[df.Emotion == "anger", 'Emotion'] = 2
df.loc[df.Emotion == "love", 'Emotion'] = 3
df.loc[df.Emotion == "surprise", 'Emotion'] = 4 
df.loc[df.Emotion == "fear", 'Emotion'] = 5
df.loc[df.Emotion == "happy", 'Emotion'] = 6 

df.Emotion = np.asarray(df.Emotion).astype('int64').reshape((-1,1))

In [None]:
df.Emotion.unique()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df.Text, df.Emotion, train_size=0.85, random_state=2, shuffle=True)
X_test, X_valid, y_test, y_valid = train_test_split(X_train, y_train, train_size=0.5, random_state=2, shuffle=True)

In [None]:
import tensorflow as tf
import tensorflow_addons as tfa # F1_score
from keras.models import Sequential
from keras import layers

# example of l2 on a dense layer
from keras.regularizers import l2

from keras.utils import np_utils


tf.random.set_seed(2)

In [None]:
vectorizer = CountVectorizer(max_df=0.7, min_df=2)
vectorizer.fit(df.clean_text)


X_train_t = vectorizer.transform(X_train)
X_test_t = vectorizer.transform(X_test)
X_val_t = vectorizer.transform(X_valid)


# Transformer les y en int pour faire cette partie : 

dummy_y_train = np_utils.to_categorical(y_train)
dummy_y_test = np_utils.to_categorical(y_test)
dummy_y_val = np_utils.to_categorical(y_valid)


In [None]:
X_train_t.shape

In [None]:
X_test_t.shape


In [None]:
X_test.shape

In [None]:
X_test.shape

In [None]:
X_test_t.reshape((-1,1))

In [None]:
print(dummy_y_val)

In [None]:
df.info()

In [None]:
df

In [None]:
model = Sequential()

initializer = tf.keras.initializers.HeNormal()

model.add(layers.Dense(62, input_dim=X_train_t.shape[1],kernel_initializer=initializer, activation="relu"))
model.add(layers.BatchNormalization())
model.add(layers.AlphaDropout(10))
model.add(layers.Dense(21, input_dim=X_train_t.shape[1],kernel_initializer=initializer, activation="relu"))
model.add(layers.BatchNormalization())
model.add(layers.AlphaDropout(10))
model.add(layers.Dense(6, activation="softmax"))

model.compile(
    loss="binary_crossentropy",
    metrics=[tfa.metrics.F1Score(num_classes=2)],
    optimizer=tf.keras.optimizers.Adam(learning_rate=41e-5)
)

# model.summary()

history= model.fit(
    X_train_t,
    dummy_y_train,
    epochs=100,
    batch_size=64,
    callbacks=tf.keras.callbacks.EarlyStopping(patience=3, monitor="val_loss"),
    validation_data=(X_test_t, dummy_y_test)
)

In [None]:
print(f"Train {model.evaluate(X_train_t, dummy_y_train, batch_size=64)} ")
print('-')
print("Test:",model.evaluate(X_test_t, dummy_y_test, batch_size=64))
print(100*'-')
print("Validation:",model.evaluate(X_val_t, dummy_y_val, batch_size=64))
print(100*'-')