# Essais d'architectures de réseaux profonds pour analyse de sentiment sur tweets

#### Import et préparation de données : 

In [1]:
import time
import numpy as np
import pandas as pd
import sklearn
import tensorflow as tf
import tensorflow.keras as keras
import re
import string
import contractions
import nltk

INFO:tensorflow:Enabling eager execution
INFO:tensorflow:Enabling v2 tensorshape
INFO:tensorflow:Enabling resource variables
INFO:tensorflow:Enabling tensor equality
INFO:tensorflow:Enabling control flow v2


In [2]:
cd C:\\Users\\Wince\\Downloads\\OC\\Projet_7\\data

C:\Users\Wince\Downloads\OC\Projet_7\data


In [3]:
twit_df = pd.read_csv("training.1600000.processed.noemoticon.csv", header=None,
                      names=["sentiment", "twit_id", "datetime", "query", "user_id","text"],
                      encoding = "ISO-8859-1")
twit_df

Unnamed: 0,sentiment,twit_id,datetime,query,user_id,text
0,0,1467810369,Mon Apr 06 22:19:45 PDT 2009,NO_QUERY,_TheSpecialOne_,"@switchfoot http://twitpic.com/2y1zl - Awww, t..."
1,0,1467810672,Mon Apr 06 22:19:49 PDT 2009,NO_QUERY,scotthamilton,is upset that he can't update his Facebook by ...
2,0,1467810917,Mon Apr 06 22:19:53 PDT 2009,NO_QUERY,mattycus,@Kenichan I dived many times for the ball. Man...
3,0,1467811184,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,ElleCTF,my whole body feels itchy and like its on fire
4,0,1467811193,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,Karoli,"@nationwideclass no, it's not behaving at all...."
...,...,...,...,...,...,...
1599995,4,2193601966,Tue Jun 16 08:40:49 PDT 2009,NO_QUERY,AmandaMarie1028,Just woke up. Having no school is the best fee...
1599996,4,2193601969,Tue Jun 16 08:40:49 PDT 2009,NO_QUERY,TheWDBoards,TheWDB.com - Very cool to hear old Walt interv...
1599997,4,2193601991,Tue Jun 16 08:40:49 PDT 2009,NO_QUERY,bpbabe,Are you ready for your MoJo Makeover? Ask me f...
1599998,4,2193602064,Tue Jun 16 08:40:49 PDT 2009,NO_QUERY,tinydiamondz,Happy 38th Birthday to my boo of alll time!!! ...


In [4]:
twit_df.loc[:,'sentiment'] = twit_df.sentiment.apply(lambda x : 1 if x > 2 else 0)
twit_df = twit_df.loc[:,['text', 'sentiment']]

In [5]:

def string_cleanup(text):
    output = contractions.fix(text)
    output = re.sub(r'http?://\S+', '', output, flags=re.MULTILINE)
    output = re.sub(r'@\w+', '', output, flags=re.MULTILINE)
    output = [char for char in output if char not in string.punctuation]
    output = ''.join(output)
    return output

twit_df.loc[:,'cleaned_text'] = twit_df.text.apply(string_cleanup)

In [6]:
tokenizer = nltk.RegexpTokenizer(r'\w+')
twit_df.loc[:, 'tokens'] = twit_df.loc[
    :, 'cleaned_text'].apply(lambda x: tokenizer.tokenize(x.lower()))

In [7]:
nltk.download('wordnet')
from nltk.stem.wordnet import WordNetLemmatizer
wnl = WordNetLemmatizer()
def lemmatize(tokenlist): 
    out = [wnl.lemmatize(word) for word in tokenlist]
    return out
        
twit_df.loc[:, 'lems'] = twit_df.loc[
    :, 'tokens'].apply(lemmatize)
twit_df.loc[:, 'lems_sequence'] = twit_df.lems.apply(' '.join)

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Wince\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Les essais se conduiront sur la variable sélectionnée ci-dessous : 

In [8]:
selected_column = 'cleaned_text'

Nettoyage argot et jargon avant vectorisation : 

On va regarder s'il existe des termes à nettoyer parmi les plus fréquents : 

In [9]:
wordfreq = {}
for corpus in twit_df.loc[:, 'tokens']:
    for token in corpus:
        if token not in wordfreq.keys():
            wordfreq[token] = 1
        else:
            wordfreq[token] += 1

In [10]:
n_features = 1000
most_frequent = sorted(wordfreq, key=wordfreq.get, reverse=True)[:n_features]
word_counts = [wordfreq[i] for i in sorted(
    wordfreq, key=wordfreq.get, reverse=True)][:n_features]
word_counts = pd.concat([pd.Series(most_frequent),
                        pd.Series(word_counts)], axis=1)
word_counts = word_counts.rename(columns={0: 'word', 1: 'count'})
word_counts

Unnamed: 0,word,count
0,i,985578
1,to,615938
2,the,520361
3,is,391503
4,a,377702
...,...,...
995,deal,1773
996,l,1772
997,ohh,1768
998,eh,1768


In [11]:
# import bibtexparser
# from bibtexparser.bparser import BibTexParser
# btp = BibTexParser()
# with open('C:/Users/Wince/Downloads/OC/Projet_7/Modele_avance/slang_dict_en.bib') as bibtex_file:
#     bib_database = bibtexparser.load(bibtex_file, parser = btp)

# print(bib_database)

Train/test Split façon Keras (on mélange et on splitte sur les indexs) : 

In [12]:
 twit_df = twit_df.sample(frac=1)

validation_split = 0.2

num_validation_samples = int(validation_split * len(twit_df))
train_samples = twit_df[:-num_validation_samples][
    twit_df.columns[~twit_df.columns.str.contains('sentiment')]]
val_samples = twit_df[-num_validation_samples:][
    twit_df.columns[~twit_df.columns.str.contains('sentiment')]]
train_labels = twit_df[:-num_validation_samples].sentiment
val_labels = twit_df[-num_validation_samples:].sentiment

#### Vectorisation avec l'outil Keras et création de la couche d'embedding GloVe : 

Voyons la longueur du commentaire le plus long : 

In [13]:
len(max(twit_df.tokens, key=len))

117

On va donc construire nos tenseurs en paddant notre séquence à 120 mots (ça laisse une petite marge pour l'inférence :)

In [15]:
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization

start_time = time.time()
vectorizer = TextVectorization(max_tokens=50000, output_sequence_length=120)
text_ds = tf.data.Dataset.from_tensor_slices(train_samples.loc[:,selected_column]).batch(128)
vectorizer.adapt(text_ds)
print("--- Done in %s seconds ---" % (time.time() - start_time))
def _get_vocabulary():
    keyz, valuez = vectorizer._index_lookup_layer._table_handler.data()
    return [x.decode('latin-1') for _,x in sorted(zip(valuez, keyz))]
vocab = _get_vocabulary()
my_word_index = dict(zip(vocab, range(len(vocab))))

--- Done in 8.947515726089478 seconds ---


On charge notre dictionnaire GloVe puis on crée le noyau de notre embedding :

In [16]:
cd C:/Users/Wince/Downloads/OC/Projet_7/Modele_avance/

C:\Users\Wince\Downloads\OC\Projet_7\Modele_avance


In [17]:
start_time = time.time()
glove_dict = {}
with open("glove.twitter.27B.200d.txt", 'r', encoding="ISO-8859-1") as f:
    for line in f:
        word, values = line.split(maxsplit=1)
        vector = np.fromstring(values, "f", sep=" ")
        glove_dict[word] = vector
print("---Glove vectors loaded in %s seconds ---" % (time.time() - start_time))

  vector = np.fromstring(values, "f", sep=" ")


---Glove vectors loaded in 81.9329743385315 seconds ---


In [18]:
len(glove_dict)

1118763

In [19]:

def create_embedding_matrix(num_tokens = len(vocab)+2,
                            embedding_dim = 200,
                            my_word_index=my_word_index,
                            dictionary=glove_dict):
    start_time = time.time()
    hits = 0
    misses = 0
    embedding_matrix = np.zeros((num_tokens, embedding_dim))
    for word, i in my_word_index.items():
        embedding_vector = dictionary.get(word)
        if embedding_vector is not None and len(embedding_vector)!=0:
            embedding_matrix[i] = embedding_vector
            hits += 1
        else:
            misses += 1
    print("Converted %d words (%d misses)" % (hits, misses))
    print("--- Done in %s seconds ---" % (time.time() - start_time))
    return embedding_matrix

embedding_matrix = create_embedding_matrix()

Converted 40883 words (9116 misses)
--- Done in 0.08707880973815918 seconds ---


In [20]:
from tensorflow.keras.layers import Embedding

embedding_layer = Embedding(len(vocab)+2, 200,
    embeddings_initializer=keras.initializers.Constant(embedding_matrix),
    trainable=False)

Formatage des données d'entraînement/validation : 

In [22]:
x_train = vectorizer(np.array([[s] for s in train_samples.loc[:,selected_column]])).numpy()
x_val = vectorizer(np.array([[s] for s in val_samples.loc[:,selected_column]])).numpy()

y_train = np.array(train_labels)
y_val = np.array(val_labels)

In [23]:
y_val

array([1, 1, 0, ..., 0, 1, 0], dtype=int64)

In [24]:
x_train.shape

(1280000, 120)

In [25]:
x_val[:1600].shape

(1600, 120)

#### Création et évaluation d'un modèle Baseline : 

Vu la taille des données, on utilisera un classifieur SGD en perte log plutôt qu'une régression logistique (le problème résolu est le même mais la méthode est plus adaptée). 

In [31]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import confusion_matrix
cv = CountVectorizer(max_features=50000)
X_train = cv.fit_transform(train_samples.loc[:,selected_column])
X_test = cv.transform(val_samples.loc[:,selected_column])
sgd = SGDClassifier(loss='log')
sgd.fit(X_train, y_train)
y_pred = sgd.predict(X_test[:1600])
confusion = confusion_matrix(y_val[:1600], y_pred)

In [32]:
pd.DataFrame(confusion.ravel().reshape(1,-1), columns=["tn", "fp", "fn", "tp"])

Unnamed: 0,tn,fp,fn,tp
0,617,175,152,656


In [33]:
sklearn.metrics.roc_auc_score(y_val[:1600], y_pred)

0.7954607960796078

In [34]:
sklearn.metrics.accuracy_score(y_val[:1600], y_pred)

0.795625

On entraîne aussi un classifieur naif Bayésien : 

In [35]:
from sklearn.naive_bayes import MultinomialNB

nb = MultinomialNB()
nb.fit(X_train, y_train)
y_pred = nb.predict(X_test[:1600])
confusion = confusion_matrix(y_val[:1600], y_pred)

In [36]:
pd.DataFrame(confusion.ravel().reshape(1,-1), columns=["tn", "fp", "fn", "tp"])

Unnamed: 0,tn,fp,fn,tp
0,636,156,183,625


In [37]:
sklearn.metrics.roc_auc_score(y_val[:1600], y_pred)

0.7882725772577258

In [38]:
sklearn.metrics.accuracy_score(y_val[:1600], y_pred)

0.788125

#### Modèle basique (1 couche lstm) : 

In [54]:
from tensorflow.keras import layers
model = keras.Sequential()

model.add(embedding_layer)
model.add(layers.LSTM(128, return_sequences=True))
model.add(layers.Dense(1, activation='sigmoid'))

model.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 200)         10000200  
_________________________________________________________________
lstm_1 (LSTM)                (None, None, 128)         168448    
_________________________________________________________________
dense_1 (Dense)              (None, None, 1)           129       
Total params: 10,168,777
Trainable params: 168,577
Non-trainable params: 10,000,200
_________________________________________________________________


In [55]:
model.compile(
    loss="binary_crossentropy", optimizer="rmsprop", metrics=["acc"]
)

model.fit(x_train, y_train, batch_size=128, epochs=3, validation_data=(x_val, y_val))

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x1fb1b5a2eb0>

In [56]:
simple_bd = keras.Sequential()
simple_bd.add(embedding_layer)
simple_bd.add(layers.Bidirectional(layers.LSTM(64, return_sequences=True)))
simple_bd.add(layers.Dense(1, activation='sigmoid'))

simple_bd.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 200)         10000200  
_________________________________________________________________
bidirectional (Bidirectional (None, None, 128)         135680    
_________________________________________________________________
dense_2 (Dense)              (None, None, 1)           129       
Total params: 10,136,009
Trainable params: 135,809
Non-trainable params: 10,000,200
_________________________________________________________________


In [58]:
simple_bd.compile(
    loss="binary_crossentropy", optimizer="rmsprop", metrics=["acc"]
)
simple_bd.fit(x_train, y_train, batch_size=128, epochs=3, validation_data=(x_val, y_val))

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x1fb0d6e1af0>

On rend trainable la couche d'embedding sur le 1er modèle pour voir si cela permet de gagner un peu d'exactitude : 

In [None]:
# model.layers[0].trainable = True
# model.compile(
#     loss="binary_crossentropy", optimizer="rmsprop", metrics=["acc"]
# )
# model.fit(x_train, y_train, batch_size=128, epochs=3, validation_data=(x_val, y_val))

#### Modèle basique avec embedding fasttext : 

In [60]:
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization

start_time = time.time()
vectorizer = TextVectorization(max_tokens=50000, output_sequence_length=120)
text_ds = tf.data.Dataset.from_tensor_slices(train_samples.loc[:,'cleaned_text']).batch(128)
vectorizer.adapt(text_ds)
print("--- Done in %s seconds ---" % (time.time() - start_time))
def _get_vocabulary():
    keyz, valuez = vectorizer._index_lookup_layer._table_handler.data()
    return [x.decode('latin-1') for _,x in sorted(zip(valuez, keyz))]
vocab = _get_vocabulary()
my_word_index = dict(zip(vocab, range(len(vocab))))

--- Done in 9.87449026107788 seconds ---


In [61]:
start_time = time.time()
ft_dict = {}
with open('fasttext_english_twitter_100D.vec', 'r', encoding="ISO-8859-1") as f:
    for line in f:
        word, values = line.split(maxsplit=1)
        vector = np.fromstring(values, "f", sep=" ")
        ft_dict[word] = vector
print("---Fasttext vectors loaded in %s seconds ---" % (time.time() - start_time))

  vector = np.fromstring(values, "f", sep=" ")


---Fasttext vectors loaded in 24.463834762573242 seconds ---


In [62]:
fasttext_matrix = create_embedding_matrix(num_tokens = len(vocab)+2,
                                          embedding_dim = 100,
                                          my_word_index=my_word_index,
                                          dictionary=ft_dict)

Converted 40476 words (9523 misses)
--- Done in 0.06606030464172363 seconds ---


In [63]:
from tensorflow.keras.layers import Embedding

fasttext_embedding_layer = Embedding(
    len(vocab)+2,
    100,
    embeddings_initializer=keras.initializers.Constant(fasttext_matrix),
    trainable=False,
)

In [64]:
x_train.shape

(1280000, 120)

In [65]:
fasttext_single_layer = keras.Sequential()

fasttext_single_layer.add(fasttext_embedding_layer)
fasttext_single_layer.add(layers.LSTM(128, return_sequences=True))
fasttext_single_layer.add(layers.Dense(1, activation='sigmoid'))

fasttext_single_layer.summary()

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, None, 100)         5000100   
_________________________________________________________________
lstm_3 (LSTM)                (None, None, 128)         117248    
_________________________________________________________________
dense_3 (Dense)              (None, None, 1)           129       
Total params: 5,117,477
Trainable params: 117,377
Non-trainable params: 5,000,100
_________________________________________________________________


In [67]:
fasttext_single_layer.compile(
    loss="binary_crossentropy", optimizer="rmsprop", metrics=["acc"]
)
fasttext_single_layer.fit(x_train, y_train, batch_size=128, epochs=3, validation_data=(x_val, y_val))

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x1fb053769d0>

Puis on tune la couche d'embedding : 

In [68]:
# fasttext_single_layer.layers[0].trainable = True
# fasttext_single_layer.compile(
#     loss="binary_crossentropy", optimizer="rmsprop", metrics=["acc",'AUC']
# )
# fasttext_single_layer.fit(x_train, y_train, batch_size=128, epochs=3, validation_data=(x_val, y_val))

on définit d'emblée notre couche d'embedding comme trainable pour les modèles suivants : 

In [69]:
trainable_fasttext_embedding = Embedding(
    len(vocab)+2,
    100,
    embeddings_initializer=keras.initializers.Constant(fasttext_matrix),
    trainable=True,
)

#### Essais d'une architecture combinée (LSTM + ConvNet) : 

In [70]:
from tensorflow.keras import layers

int_sequences_input = keras.Input(shape=(None,), dtype="int64")
embedded_sequences = trainable_fasttext_embedding(int_sequences_input)
x = layers.Conv1D(120, 5, activation="relu")(embedded_sequences)
x = layers.MaxPooling1D(5)(x)
x = layers.LSTM(112, return_sequences=True)(x)
x = layers.Conv1D(112, 3, activation="relu")(x)
x = layers.MaxPooling1D(3)(x)
x = layers.Conv1D(108, 3, activation="relu",input_shape=(None,108))(x)
x = layers.Dropout(0.2)(x)
x = layers.GlobalMaxPooling1D()(x)
preds = layers.Dense(1, activation="sigmoid")(x)
model_pilot = keras.Model(int_sequences_input, preds)
model_pilot.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, None)]            0         
_________________________________________________________________
embedding_2 (Embedding)      (None, None, 100)         5000100   
_________________________________________________________________
conv1d (Conv1D)              (None, None, 120)         60120     
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, None, 120)         0         
_________________________________________________________________
lstm_4 (LSTM)                (None, None, 112)         104384    
_________________________________________________________________
conv1d_1 (Conv1D)            (None, None, 112)         37744     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, None, 112)         0     

In [71]:
model_pilot.compile(
    loss="binary_crossentropy", optimizer="rmsprop", metrics=["acc"]
)
history = model_pilot.fit(x_train, y_train, batch_size=128, epochs=3, validation_data=(x_val, y_val))
history.history.keys()

Epoch 1/3
Epoch 2/3
Epoch 3/3


dict_keys(['loss', 'acc', 'val_loss', 'val_acc'])

### Essais d'architectures orientées n-grammes : 
On teste d'abord un Bi-LSTM alimenté par des couches convolutionnelles concaténées afin de mieux capturer le poids des n-grammes : 

In [72]:
from tensorflow.keras import layers
from tensorflow.keras.layers import Concatenate
import keras.backend as K


int_sequences_input = keras.Input(shape=(None,), dtype="int64")
embedded_sequences = trainable_fasttext_embedding(int_sequences_input)
lamb = layers.Lambda(lambda x: K.permute_dimensions(x,(0,2,1)))(embedded_sequences)
first = layers.Conv1D(120, 5, activation="relu")(embedded_sequences)
first = layers.MaxPooling1D(5)(first)
second = layers.Conv1D(120, 4, activation="relu")(embedded_sequences)
second = layers.MaxPooling1D(4)(first)
third = layers.Conv1D(120, 3, activation="relu")(embedded_sequences)
third = layers.MaxPooling1D(3)(first)
fourth = layers.Conv1D(120, 2, activation="relu")(embedded_sequences)
fourth = layers.MaxPooling1D(2)(first)
merged = Concatenate(axis=1)([lamb, first, second, third, fourth])
x = layers.Dense(240, activation="tanh")(merged)
x = layers.Dropout(0.2)(x)
x = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(x)
x = layers.Conv1D(148, 3, activation="relu")(x)
x = layers.Dropout(0.2)(x)
x = layers.MaxPooling1D(3)(x)
x = layers.Conv1D(144, 2, activation="relu")(x)
x = layers.Dropout(0.2)(x)
x = layers.GlobalMaxPooling1D()(x)
preds = layers.Dense(1, activation="sigmoid")(x)
mixed_model = keras.Model(int_sequences_input, preds)
mixed_model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, None)]       0                                            
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, None, 100)    5000100     input_2[0][0]                    
__________________________________________________________________________________________________
conv1d_3 (Conv1D)               (None, None, 120)    60120       embedding_2[1][0]                
__________________________________________________________________________________________________
max_pooling1d_2 (MaxPooling1D)  (None, None, 120)    0           conv1d_3[0][0]                   
____________________________________________________________________________________________

In [73]:
mixed_model.layers[1].trainable = True

In [74]:
mixed_model.compile(
    loss="binary_crossentropy", optimizer="rmsprop", metrics=["acc"]
)
mixed_model.fit(x_train, y_train, batch_size=128, epochs=3, validation_data=(x_val, y_val))

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x1fb2a8afd00>

#### Essai du merge de n-grammes sur 2 couches denses : 

In [None]:
from tensorflow.keras import layers
from tensorflow.keras.layers import Concatenate
import keras.backend as K
from tensorflow.keras.constraints import max_norm


int_sequences_input = keras.Input(shape=(None,), dtype="int64")
embedded_sequences = trainable_fasttext_embedding(int_sequences_input)
lamb = layers.Lambda(lambda x: K.permute_dimensions(x,(0,2,1)))(embedded_sequences)
first = layers.Conv1D(120, 5, activation="relu")(embedded_sequences)
first = layers.MaxPooling1D(5)(first)
second = layers.Conv1D(120, 4, activation="relu")(embedded_sequences)
second = layers.MaxPooling1D(4)(first)
third = layers.Conv1D(120, 3, activation="relu")(embedded_sequences)
third = layers.MaxPooling1D(3)(first)
fourth = layers.Conv1D(120, 2, activation="relu")(embedded_sequences)
fourth = layers.MaxPooling1D(2)(first)
merged = Concatenate(axis=1)([lamb, first, second, third, fourth])
x = layers.Dropout(0.2)(x)
x = layers.Dense(600, activation="sigmoid")(merged)
preds = layers.Dense(1,kernel_constraint=max_norm(3), activation="sigmoid")(x)
ngrams_model = keras.Model(int_sequences_input, preds)
ngrams_model.summary()

In [None]:
ngrams_model.compile(
    loss="binary_crossentropy", optimizer="rmsprop", metrics=["acc"]
)
ngrams_model.fit(x_train, y_train, batch_size=128, epochs=3, validation_data=(x_val, y_val))

### Essais d'autres architectures avancées : 
<br>On revient à notre LSTM avec 2 couches de convolution, qu'on teste avec l'embedding GloVe : 

In [77]:
adv_glove_model = keras.Sequential()
adv_glove_model.add(embedding_layer)
adv_glove_model.add(layers.LSTM(128, return_sequences=True))
adv_glove_model.add(layers.Conv1D(128, 5, activation="relu", input_shape=(None,128)))
adv_glove_model.add(layers.MaxPooling1D(5))
adv_glove_model.add(layers.Conv1D(10, 2, activation="relu", input_shape=(None,128)))
adv_glove_model.add(layers.GlobalMaxPooling1D())
adv_glove_model.add(layers.Dropout(0.3))
adv_glove_model.add(layers.Dense(1, activation='sigmoid'))
adv_glove_model.compile(
    loss="binary_crossentropy", optimizer="rmsprop", metrics=['acc']
)

In [78]:
adv_glove_model.layers[0].trainable = True
adv_glove_model.summary()

Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 200)         10000200  
_________________________________________________________________
lstm_7 (LSTM)                (None, None, 128)         168448    
_________________________________________________________________
conv1d_11 (Conv1D)           (None, None, 128)         82048     
_________________________________________________________________
max_pooling1d_9 (MaxPooling1 (None, None, 128)         0         
_________________________________________________________________
conv1d_12 (Conv1D)           (None, None, 10)          2570      
_________________________________________________________________
global_max_pooling1d_2 (Glob (None, 10)                0         
_________________________________________________________________
dropout_5 (Dropout)          (None, 10)              

In [80]:
adv_glove_model.fit(x_train, y_train, batch_size=128, epochs=3, validation_data=(x_val, y_val))

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x1fb2e183100>

#### Modèles avec LSTM bidirectionnel : 

In [81]:
blstm_model = keras.Sequential()
blstm_model.add(embedding_layer)
blstm_model.add(layers.Bidirectional(layers.LSTM(128, return_sequences=True)))
blstm_model.add(layers.Conv1D(128, 5, activation="relu", input_shape=(None,128)))
blstm_model.add(layers.MaxPooling1D(5))
blstm_model.add(layers.Conv1D(10, 2, activation="relu", input_shape=(None,128)))
blstm_model.add(layers.MaxPooling1D(5))
blstm_model.add(layers.Dropout(0.1))
blstm_model.add(layers.Dense(1, activation='sigmoid'))

In [82]:
blstm_model.layers[0].trainable = True
blstm_model.summary()

Model: "sequential_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 200)         10000200  
_________________________________________________________________
bidirectional_3 (Bidirection (None, None, 256)         336896    
_________________________________________________________________
conv1d_13 (Conv1D)           (None, None, 128)         163968    
_________________________________________________________________
max_pooling1d_10 (MaxPooling (None, None, 128)         0         
_________________________________________________________________
conv1d_14 (Conv1D)           (None, None, 10)          2570      
_________________________________________________________________
max_pooling1d_11 (MaxPooling (None, None, 10)          0         
_________________________________________________________________
dropout_6 (Dropout)          (None, None, 10)        

In [84]:
blstm_model.compile(
    loss="binary_crossentropy", optimizer="rmsprop", metrics=['acc']
)
blstm_model.fit(x_train, y_train, batch_size=128, epochs=3, validation_data=(x_val, y_val))

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x1fb51e12100>

On essaie d'ajouter une couche LSTM : 

In [92]:
dl_blstm_model = keras.Sequential()
dl_blstm_model.add(embedding_layer)
dl_blstm_model.add(layers.Bidirectional(layers.LSTM(128, return_sequences=True,dropout=0.2, recurrent_dropout=0.2)))
dl_blstm_model.add(layers.Bidirectional(layers.LSTM(128, return_sequences=True,dropout=0.2, recurrent_dropout=0.2)))
dl_blstm_model.add(layers.Conv1D(128, 5, activation="relu", input_shape=(None,128)))
dl_blstm_model.add(layers.MaxPooling1D(5))
dl_blstm_model.add(layers.Conv1D(32, 2, activation="relu", input_shape=(None,128)))
dl_blstm_model.add(layers.MaxPooling1D(3))
dl_blstm_model.add(layers.Dropout(0.2))
dl_blstm_model.add(layers.Dense(1, activation='sigmoid'))
dl_blstm_model.summary()

Model: "sequential_15"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 200)         10000200  
_________________________________________________________________
bidirectional_8 (Bidirection (None, None, 256)         336896    
_________________________________________________________________
bidirectional_9 (Bidirection (None, None, 256)         394240    
_________________________________________________________________
conv1d_17 (Conv1D)           (None, None, 128)         163968    
_________________________________________________________________
max_pooling1d_14 (MaxPooling (None, None, 128)         0         
_________________________________________________________________
conv1d_18 (Conv1D)           (None, None, 32)          8224      
_________________________________________________________________
max_pooling1d_15 (MaxPooling (None, None, 32)        

In [93]:
dl_blstm_model.compile(
    loss="binary_crossentropy", optimizer="rmsprop", metrics=[['acc']]
)

Pour des raisons de temps d'entraînement, la cellule ci dessous a été recopiée d'un autre notebook - il s'agit du résultat d'un entraînement du modèle ci-dessus : 

In [218]:
dl_blstm_model.fit(x_train, y_train, batch_size=128, epochs=3, validation_data=(x_val, y_val))

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x20ee9caa100>

### Conclusion des essais : 
De nombreuses architectures ont été essayées, le meilleur résultat est obtenu par le réseau le plus complexe (double couche de LSTM bidirectionnel). Le temps d'entraînement est cependant élevé, le modèle assez lourd en terme de poids et ce modèle semble atteindre son best fit assez rapidement (entre 2 et 3 epochs) malgré une régularisation optimisée par de nombreux essais. <br>On choisit donc de mettre en prod un modèle plus simple et léger pour faire la mise en prod sous Azure même si les performances de ce dernier modèle pourraient justifier. 
<br>On note aussi que rendre la couche d'embedding entraînable permet d'améliorer les résultats obtenus, et on créera donc notre modèle final avec cette propriété. 