#4. Deep learning 

## 4.2 Création d'un réseau de neurones avec Keras

### Import librairies 

In [2]:
import pandas as pd
from numpy import loadtxt
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import os
import numpy as np
import plotly.graph_objects as go

ImportError: Keras requires TensorFlow 2.2 or higher. Install TensorFlow via `pip install tensorflow`

### Lire les données

In [None]:
PATH_TO_DATA=os.path.join("..","Data")
FILE="data_2000_2020.csv"
df = pd.read_csv(os.path.join(PATH_TO_DATA,FILE))

### Séparer Y variable d'intérêt et X l'information 

In [3]:
df_X = df.drop(['winner'], axis = 1)
df_Y = df['winner'] 
df_X.shape

(110532, 275)

### Séparer entre train et test

In [4]:
x_train,x_test = train_test_split(df_X, test_size=0.33, random_state=42)
y_train, y_test = train_test_split(df_Y, test_size=0.33, random_state=42)

In [5]:
#to use loadtxt we need csv but we could have done otherwise 
x_train.to_csv('/content/dataset_x_train.csv',index=False)
x_test.to_csv('/content/dataset_x_test.csv',index=False)
y_train.to_csv('/content/dataset_y_train.csv',index=False)
y_test.to_csv('/content/dataset_y_test.csv',index=False)

Ici, nous avons recréé des csv pour pouvoir utiliser la fonction loadtxt directement. Cette étape n'était pas nécessaire si on avait utilisé une fonction qui prend en argument un dataframe. 

In [6]:
# load the dataset
x_train = loadtxt('dataset_x_train.csv', skiprows = 1,delimiter=',')
x_test = loadtxt('dataset_x_test.csv', skiprows = 1,delimiter=',')
y_train = loadtxt('dataset_y_train.csv', skiprows = 1,delimiter=',')
y_test = loadtxt('dataset_y_test.csv', skiprows = 1,delimiter=',')

## Créer les modèles


### Hyperparamètres à fixer pour les modèles:

Je vous réduis le nombre d'epochs à 20 car sinon, vous risquez de devoir patienter très longtemps.

Pour les résultats présentés dans le mémoire, nous avons pris epochs = 150.

Nous avons tester les modèles ci-dessous en modifiant les hyperparamètres de multiple fois.

In [7]:
nb_neurones = 70 #chose number of neurones in hidden layers (1rst and last layer's neurones are fixed)
ep = 20 #chose number of epochs
bs = 30 #chose batch_size 

### Modèle à 3 couches cachées 

#### Définition des couches

In [8]:
model = Sequential()
model.add(Dense(nb_neurones, input_dim=x_train.shape[1], activation='relu'))
model.add(Dense(nb_neurones, activation= 'relu'))
model.add(Dense(nb_neurones, activation = 'relu'))
model.add(Dense(1, activation='sigmoid'))

#### Choix de la fonction de perte, de l'optimiseur et de la métrique 


In [9]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

#### On entraîne le réseau sur les données 'train'



In [10]:
res=model.fit(x_train, y_train,validation_data=(x_test,y_test), epochs=ep, batch_size=bs)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


#### Graphique représentant l'évolution de l'accuracy pour les données train et les données test.  

In [11]:
size_epochs = len(res.history["accuracy"])
epochs = np.linspace(0,size_epochs,size_epochs+1)

In [12]:
# Create traces
fig = go.Figure()
fig.add_trace(go.Scatter(x=epochs, y=res.history["accuracy"],mode='lines', name='training set' ))
fig.add_trace(go.Scatter(x=epochs, 
                         y=res.history["val_accuracy"],
                         mode='lines',
                         name='test set',
                         marker=dict(
                          color='red'
                          )))

#fig.update_layout(legend_title_text = "Accuracy evolution according to number of trees")
fig.update_xaxes(title_text="Epoch")
fig.update_yaxes(title_text="Accuracy")
fig.show()

### Modèle à 5 couches cachées 

In [None]:
model1 = Sequential()
model1.add(Dense(nb_neurones, input_dim=(x_train.shape[1]), activation='relu'))
model1.add(Dense(nb_neurones, activation='relu'))
model1.add(Dense(nb_neurones, activation = 'relu'))
model1.add(Dense(nb_neurones, activation = 'relu'))
model1.add(Dense(nb_neurones, activation = 'relu'))
model1.add(Dense(1, activation='sigmoid'))

In [None]:
model1.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
res1=model1.fit(x_train, y_train,validation_data=(x_test,y_test), epochs=150, batch_size=bs)

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

In [None]:
size_epochs = len(res1.history["accuracy"])
epochs = np.linspace(0,size_epochs,size_epochs+1)

In [None]:
# Create traces
fig = go.Figure()
fig.add_trace(go.Scatter(x=epochs, y=res1.history["accuracy"],mode='lines', name='training set' ))
fig.add_trace(go.Scatter(x=epochs, 
                         y=res1.history["val_accuracy"],
                         mode='lines',
                         name='test set',
                         marker=dict(
                          color='red'
                          )))

#fig.update_layout(legend_title_text = "Accuracy evolution according to number of trees")
fig.update_xaxes(title_text="Epoch")
fig.update_yaxes(title_text="Accuracy")
fig.show()

### Modèle à 7 couches cachées 

In [None]:
model2 = Sequential()
model2.add(Dense(nb_neurones, input_dim=x_train.shape[1], activation='relu'))
model2.add(Dense(nb_neurones, activation='relu'))
model2.add(Dense(nb_neurones, activation = 'relu'))
model2.add(Dense(nb_neurones, activation = 'relu'))
model2.add(Dense(nb_neurones, activation = 'relu'))
model2.add(Dense(nb_neurones, activation = 'relu'))
model2.add(Dense(nb_neurones, activation = 'relu'))
model2.add(Dense(1, activation='sigmoid'))

In [None]:
model2.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
res2=model2.fit(x_train, y_train,validation_data=(x_test,y_test), epochs=ep, batch_size=bs)

In [None]:
size_epochs = len(res2.history["accuracy"])
epochs = np.linspace(0,size_epochs,size_epochs+1)

In [None]:
# Create traces
fig = go.Figure()
fig.add_trace(go.Scatter(x=epochs, y=res2.history["accuracy"],mode='lines', name='training set' ))
fig.add_trace(go.Scatter(x=epochs, 
                         y=res2.history["val_accuracy"],
                         mode='lines',
                         name='test set',
                         marker=dict(
                          color='red'
                          )))

#fig.update_layout(legend_title_text = "Accuracy evolution according to number of trees")
fig.update_xaxes(title_text="Epoch")
fig.update_yaxes(title_text="Accuracy")
fig.show()

### Modèle à 10 couches cachées 

In [None]:
model3 = Sequential()
model3.add(Dense(nb_neurones, input_dim=x_train.shape[1], activation='relu'))
model3.add(Dense(nb_neurones, activation='relu'))
model3.add(Dense(nb_neurones, activation = 'relu'))
model3.add(Dense(nb_neurones, activation = 'relu'))
model3.add(Dense(nb_neurones, activation = 'relu'))
model3.add(Dense(nb_neurones, activation = 'relu'))
model3.add(Dense(nb_neurones, activation = 'relu'))
model3.add(Dense(1, activation='sigmoid'))

In [None]:
model3.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
res3=model3.fit(x_train, y_train,validation_data=(x_test,y_test), epochs=ep, batch_size=bs)

In [None]:
size_epochs = len(res3.history["accuracy"])
epochs = np.linspace(0,size_epochs,size_epochs+1)

In [None]:
# Create traces
fig = go.Figure()
fig.add_trace(go.Scatter(x=epochs, y=res3.history["accuracy"],mode='lines', name='training set' ))
fig.add_trace(go.Scatter(x=epochs, 
                         y=res3.history["val_accuracy"],
                         mode='lines',
                         name='test set',
                         marker=dict(
                          color='red'
                          )))

#fig.update_layout(legend_title_text = "Accuracy evolution according to number of trees")
fig.update_xaxes(title_text="Epoch")
fig.update_yaxes(title_text="Accuracy")
fig.show()