# Défi INSA Toulouse 2019

## Traitement des données

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn import svm
from sklearn.cluster import KMeans

In [2]:
data_train = pd.read_hdf('train.hdf5')
data_valid = pd.read_hdf('validation.hdf5')

In [3]:
data_train.shape

(1677, 61440)

In [4]:
data_valid.shape

(594, 61440)

## Clustering-Based Anomaly Detection : K-Means

L'algorithme des K-Means est un algorithme de classification largement utilisé dans ce domaine. Il crée «k» des groupes similaires de points de données. Les instances de données qui ne font pas partie de ces groupes peuvent potentiellement être marquées comme des anomalies.

**K-Means**

## 1ère étude : 

In [None]:
## Faisons un kmeans sur toute les données, et on va observer leurs classes

In [5]:
donnees_train = data_train
donnees_valid = data_valid

In [6]:
liste_indice=[]
for indice in range(1,len(data_train)+1):
    liste_indice.append(indice)
    
Idft=[]
for elem in liste_indice:
    Idft.append(str(elem)+str('_train'))
    
donnees_train['Type_JeuDeDonnees'] = 'train'
donnees_train['Idft'] = Idft

liste_indice_valid =[]
for indice in range(1,len(data_valid)+1):
    liste_indice_valid.append(indice)
    
Idft_valid=[]
for elem in liste_indice_valid:
    Idft_valid.append(str(elem)+str('_valid'))

donnees_valid['Type_JeuDeDonnees'] = 'valid'
donnees_valid['Idft'] = Idft_valid

In [7]:
concat_train_valid = pd.concat([donnees_train, donnees_valid])
concat_train_valid_2 = concat_train_valid.iloc[:,:-2]

#### * 1ère étape : 

Le paramètre **random_state** de la fonction KMeans() a comme valeur par défaut None. Elle détermine la génération de nombres aléatoires pour l'initialisation du centroïde. On utilise un entier pour rendre le caractère aléatoire déterministe.

In [8]:
mod_kmeans = KMeans(n_clusters=10, random_state=0).fit(concat_train_valid_2)

In [9]:
mod_kmeans.labels_

array([0, 0, 0, ..., 0, 0, 0])

In [10]:
np.unique(mod_kmeans.labels_)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [11]:
tb_kmeans = pd.DataFrame({'Type_JeuDeDonnees': concat_train_valid['Type_JeuDeDonnees'], 'Idft': concat_train_valid['Idft'], "gpe_kmeans" : mod_kmeans.labels_})
tb_kmeans

Unnamed: 0,Type_JeuDeDonnees,Idft,gpe_kmeans
0,train,1_train,0
1,train,2_train,0
2,train,3_train,0
3,train,4_train,0
4,train,5_train,0
...,...,...,...
589,valid,590_valid,4
590,valid,591_valid,2
591,valid,592_valid,0
592,valid,593_valid,0


In [12]:
np.unique(tb_kmeans['gpe_kmeans'])

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [13]:
compte = [len(tb_kmeans[tb_kmeans['gpe_kmeans']==k]) for k in np.unique(tb_kmeans['gpe_kmeans'])]
indice_k = [k for k in np.unique(tb_kmeans['gpe_kmeans'])]
compte_train = [len(tb_kmeans[(tb_kmeans['gpe_kmeans']==k) & (tb_kmeans['Type_JeuDeDonnees']=='train')]) for k in np.unique(tb_kmeans['gpe_kmeans'])]
compte_valid = [len(tb_kmeans[(tb_kmeans['gpe_kmeans']==k) & (tb_kmeans['Type_JeuDeDonnees']=='valid')]) for k in np.unique(tb_kmeans['gpe_kmeans'])]
pourc_seq_train = [compte_train[k]/compte[k] for k in range(0,len(np.unique(tb_kmeans['gpe_kmeans'])))]
pourc_seq_valid = [compte_valid[k]/compte[k] for k in range(0,len(np.unique(tb_kmeans['gpe_kmeans'])))]

In [14]:
summary_kmeans = pd.DataFrame({"compte_seq": compte, "Pourc_train": pourc_seq_train, "Pourc_valid": pourc_seq_valid, "gpe_kmeans": indice_k})
summary_kmeans

Unnamed: 0,compte_seq,Pourc_train,Pourc_valid,gpe_kmeans
0,2067,0.811321,0.188679,0
1,31,0.0,1.0,1
2,27,0.0,1.0,2
3,10,0.0,1.0,3
4,15,0.0,1.0,4
5,13,0.0,1.0,5
6,15,0.0,1.0,6
7,1,0.0,1.0,7
8,89,0.0,1.0,8
9,3,0.0,1.0,9


In [25]:
# On remarque que toutes les données d'apprentissage sont regroupés dans le groupe 0
# On suppose donc que les données anormales pour le jeu de données de validation sont celles pour les séquences qui appartiennent aux groupes différent 
# du groupe 0.
# Il est cependant possible que dans le groupe 0, certaines séquences du jeu de données de validation soient anormales.

In [15]:
summary_kmeans[ summary_kmeans['Pourc_valid']== min(summary_kmeans['Pourc_valid']) ].index

Int64Index([0], dtype='int64')

In [16]:
tb_kmeans_valid = tb_kmeans[ tb_kmeans['Type_JeuDeDonnees'] == 'valid' ]
index_min_gps_kmeans_valid = summary_kmeans[ summary_kmeans['Pourc_valid']== min(summary_kmeans['Pourc_valid']) ].index

In [None]:
# Ne gardons pour l'instant que les groupes différent du groupe 0 :

In [17]:
liste_indice = [gpe for gpe in np.unique(tb_kmeans['gpe_kmeans']) if gpe!=index_min_gps_kmeans_valid]
liste_indice

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [None]:
# Je vais d'abord remplacés les valeurs 0 par "normales?", et non par 0 pour indiquer que c'est une séquence normale, 
# puis remplacer les autres valeurs correspondant aux autres groupes par "anormales" avant de le remplacer par 1 pour indiquer que c'est une 
# séquence anormale, afin d'éviter que Python confonde les groupes 1 et 0 obtenus de la liste 'np.unique(tb_kmeans['gpe_kmeans'])'
# en des séquences normales ou anormales par exemple.

In [18]:
gpe_predit = tb_kmeans_valid['gpe_kmeans']
tb_kmeans_valid['gpe_predit'] = gpe_predit

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [19]:
tb_kmeans_valid['gpe_predit'] = tb_kmeans_valid['gpe_predit'].replace(index_min_gps_kmeans_valid,"normales?")
for gpe_within_0 in liste_indice:
    tb_kmeans_valid['gpe_predit'] = tb_kmeans_valid['gpe_predit'].replace(gpe_within_0,"anormales")
tb_kmeans_valid['gpe_predit'] = tb_kmeans_valid['gpe_predit'].replace("normales?",0)
tb_kmeans_valid['gpe_predit'] = tb_kmeans_valid['gpe_predit'].replace("anormales",1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a

In [20]:
tb_kmeans_valid

Unnamed: 0,Type_JeuDeDonnees,Idft,gpe_kmeans,gpe_predit
0,valid,1_valid,0,0
1,valid,2_valid,8,1
2,valid,3_valid,0,0
3,valid,4_valid,0,0
4,valid,5_valid,6,1
...,...,...,...,...
589,valid,590_valid,4,1
590,valid,591_valid,2,1
591,valid,592_valid,0,0
592,valid,593_valid,0,0


In [21]:
print( list(tb_kmeans_valid['gpe_predit']).count(1) ) #204
tb_kmeans_valid.head()

204


Unnamed: 0,Type_JeuDeDonnees,Idft,gpe_kmeans,gpe_predit
0,valid,1_valid,0,0
1,valid,2_valid,8,1
2,valid,3_valid,0,0
3,valid,4_valid,0,0
4,valid,5_valid,6,1


In [22]:
tb_kmeans_valid['anomaly'] = tb_kmeans_valid['gpe_predit']
pred_anomaly = pd.DataFrame(tb_kmeans_valid['anomaly'])
pred_anomaly['seqID'] = np.arange(0,len(data_valid))
pred_anomaly = pred_anomaly[['seqID', 'anomaly']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [23]:
len(pred_anomaly[pred_anomaly['anomaly']==1])

204

#### * 2ème étape : 

In [None]:
# Il est cependant possible que dans le groupe 0, certaines séquences du jeu de données de validation soient anormales.
# Nous allons de nouveau effectuer un kmeans sur les 2067 séquences qui ont eu pour groupe le groupe 0.

In [48]:
gpe0 = tb_kmeans[tb_kmeans['gpe_kmeans']==0]
gpe0

Unnamed: 0,Type_JeuDeDonnees,Idft,gpe_kmeans
0,train,1_train,0
1,train,2_train,0
2,train,3_train,0
3,train,4_train,0
4,train,5_train,0
...,...,...,...
586,valid,587_valid,0
588,valid,589_valid,0
591,valid,592_valid,0
592,valid,593_valid,0


In [26]:
concat_train_valid

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,61432,61433,61434,61435,61436,61437,61438,61439,Type_JeuDeDonnees,Idft
0,0.041259,0.041259,0.032573,0.023887,0.029315,0.041259,0.045602,0.038001,0.030401,0.032573,...,-0.003257,0.065145,0.047773,-0.009772,0.031487,0.096632,0.077089,0.040173,train,1_train
1,-0.211722,-0.264924,-0.274696,-0.236694,-0.156349,-0.059716,0.005429,0.046687,0.153091,0.281210,...,-0.241037,-0.636252,-0.953292,-0.980436,-0.846888,-0.838202,-0.880546,-0.739398,train,2_train
2,0.214105,0.154930,0.136640,0.013987,-0.038733,-0.015063,-0.111894,-0.104363,0.047340,-0.054871,...,0.699337,0.965085,1.086662,1.132926,1.279249,1.296464,0.937112,0.451879,train,3_train
3,-0.154837,-0.127768,-0.217638,-0.284770,-0.299929,-0.270694,-0.077960,0.092036,0.076877,0.154837,...,-0.036814,-0.024904,0.031400,0.140761,-0.011911,-0.173244,-0.063884,0.081208,train,4_train
4,-1.022780,-0.916376,-0.676425,-0.461445,-0.330069,-0.122690,0.178064,0.489675,0.799115,0.931577,...,0.836030,0.479903,0.109661,-0.285553,-0.628651,-0.916376,-1.010837,-0.804544,train,5_train
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
589,-18.291415,-18.288957,-18.285270,-18.282812,-18.280354,-18.276668,-18.271752,-18.268065,-18.268065,-18.270523,...,-19.515430,-19.514201,-19.511744,-19.506828,-19.501912,-19.496996,-19.493310,-19.490852,valid,590_valid
590,-13.352138,-13.352138,-13.352138,-13.352138,-13.352138,-13.352138,-13.352138,-13.352138,-13.352138,-13.352138,...,-13.352138,-13.352138,-13.352138,-13.352138,-13.352138,-13.352138,-13.352138,-13.352138,valid,591_valid
591,0.031800,-0.143735,-0.059784,0.248039,0.283655,-0.010176,-0.153911,0.069960,0.283655,0.183167,...,0.166631,0.171719,0.153911,0.160271,0.141191,0.050880,-0.020352,0.025440,valid,592_valid
592,-0.026635,-0.026635,-0.006721,0.009210,-0.008464,-0.036094,-0.039082,-0.026137,-0.017674,-0.017674,...,0.104798,0.094841,0.073184,0.058747,0.062978,0.081399,0.103056,0.113760,valid,593_valid


In [27]:
np.unique(concat_train_valid['Idft'])

array(['1000_train', '1001_train', '1002_train', ..., '99_valid',
       '9_train', '9_valid'], dtype=object)

In [49]:
m = concat_train_valid.Idft.isin(gpe0.Idft)
new_tb = concat_train_valid[m]
new_tb

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,61432,61433,61434,61435,61436,61437,61438,61439,Type_JeuDeDonnees,Idft
0,0.041259,0.041259,0.032573,0.023887,0.029315,0.041259,0.045602,0.038001,0.030401,0.032573,...,-0.003257,0.065145,0.047773,-0.009772,0.031487,0.096632,0.077089,0.040173,train,1_train
1,-0.211722,-0.264924,-0.274696,-0.236694,-0.156349,-0.059716,0.005429,0.046687,0.153091,0.281210,...,-0.241037,-0.636252,-0.953292,-0.980436,-0.846888,-0.838202,-0.880546,-0.739398,train,2_train
2,0.214105,0.154930,0.136640,0.013987,-0.038733,-0.015063,-0.111894,-0.104363,0.047340,-0.054871,...,0.699337,0.965085,1.086662,1.132926,1.279249,1.296464,0.937112,0.451879,train,3_train
3,-0.154837,-0.127768,-0.217638,-0.284770,-0.299929,-0.270694,-0.077960,0.092036,0.076877,0.154837,...,-0.036814,-0.024904,0.031400,0.140761,-0.011911,-0.173244,-0.063884,0.081208,train,4_train
4,-1.022780,-0.916376,-0.676425,-0.461445,-0.330069,-0.122690,0.178064,0.489675,0.799115,0.931577,...,0.836030,0.479903,0.109661,-0.285553,-0.628651,-0.916376,-1.010837,-0.804544,train,5_train
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
586,0.100795,0.148156,0.183373,0.199161,0.194303,0.163943,0.117796,0.060720,-0.018216,-0.109295,...,0.024288,0.102009,0.144513,0.177301,0.223448,0.268381,0.306027,0.358246,valid,587_valid
588,0.072936,0.055511,0.055760,0.095588,0.116996,0.085631,0.026386,-0.010704,-0.014438,-0.011451,...,-0.010704,0.039082,0.034103,-0.003983,-0.013442,0.016927,0.018918,-0.022403,valid,589_valid
591,0.031800,-0.143735,-0.059784,0.248039,0.283655,-0.010176,-0.153911,0.069960,0.283655,0.183167,...,0.166631,0.171719,0.153911,0.160271,0.141191,0.050880,-0.020352,0.025440,valid,592_valid
592,-0.026635,-0.026635,-0.006721,0.009210,-0.008464,-0.036094,-0.039082,-0.026137,-0.017674,-0.017674,...,0.104798,0.094841,0.073184,0.058747,0.062978,0.081399,0.103056,0.113760,valid,593_valid


In [None]:
# Refaisons un Kmeans sur le groupe 0, afin de voir si d'autres séquences anormales resortent.

In [50]:
new_tb2 = new_tb.iloc[:,:-2]
modele_2_kmeans = KMeans(n_clusters=10, random_state = 42).fit(new_tb2)

In [51]:
tb_kmeans_2 = pd.DataFrame({'Type_JeuDeDonnees': new_tb['Type_JeuDeDonnees'], 'Idft': new_tb['Idft'], "gpe_kmeans" : modele_2_kmeans.labels_})

In [52]:
compte = [len(tb_kmeans_2[tb_kmeans_2['gpe_kmeans']==k]) for k in np.unique(tb_kmeans_2['gpe_kmeans'])]
indice_k = [k for k in np.unique(tb_kmeans_2['gpe_kmeans'])]
compte_train = [len(tb_kmeans_2[(tb_kmeans_2['gpe_kmeans']==k) & (tb_kmeans_2['Type_JeuDeDonnees']=='train')]) for k in np.unique(tb_kmeans_2['gpe_kmeans'])]
compte_valid = [len(tb_kmeans_2[(tb_kmeans_2['gpe_kmeans']==k) & (tb_kmeans_2['Type_JeuDeDonnees']=='valid')]) for k in np.unique(tb_kmeans_2['gpe_kmeans'])]
pourc_seq_train = [compte_train[k]/compte[k] for k in range(0,len(np.unique(tb_kmeans_2['gpe_kmeans'])))]
pourc_seq_valid = [compte_valid[k]/compte[k] for k in range(0,len(np.unique(tb_kmeans_2['gpe_kmeans'])))]

In [53]:
summary_kmeans2 = pd.DataFrame({"compte_seq": compte, "Pourc_train": pourc_seq_train, "Pourc_valid": pourc_seq_valid, "gpe_kmeans": indice_k})
summary_kmeans2

Unnamed: 0,compte_seq,Pourc_train,Pourc_valid,gpe_kmeans
0,1979,0.830723,0.169277,0
1,1,1.0,0.0,1
2,1,1.0,0.0,2
3,1,1.0,0.0,3
4,78,0.320513,0.679487,4
5,2,1.0,0.0,5
6,1,1.0,0.0,6
7,2,0.0,1.0,7
8,1,1.0,0.0,8
9,1,1.0,0.0,9


In [None]:
# On remarque que toutes les plus faibles proportions des données d'apprentissage sont dans le groupe 7, puis 4, et donc les plus fortes proportions des données
# de validation sont dans ces groupes. 
# On va donc supposer que ces séquences sont des séquences anormales. 

In [54]:
tb_kmeans_valid2 = tb_kmeans_2[ tb_kmeans_2['Type_JeuDeDonnees'] == 'valid' ]
index_max_gps_kmeans_valid2 = summary_kmeans2[ summary_kmeans2['Pourc_valid']>= 0.6  ].index

In [55]:
liste_indice2 = [gpe for gpe in np.unique(tb_kmeans_2['gpe_kmeans']) if gpe not in index_max_gps_kmeans_valid2]
liste_indice2

[0, 1, 2, 3, 5, 6, 8, 9]

In [59]:
gpe_predit2 = tb_kmeans_valid2['gpe_kmeans']
tb_kmeans_valid2['gpe_predit'] = gpe_predit2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [60]:
tb_kmeans_valid2['gpe_predit'] = tb_kmeans_valid2['gpe_predit'].replace(index_max_gps_kmeans_valid2,"anormales?")
for gpe_within_4_7 in liste_indice2:
    tb_kmeans_valid2['gpe_predit'] = tb_kmeans_valid2['gpe_predit'].replace(gpe_within_4_7,"normales")
tb_kmeans_valid2['gpe_predit'] = tb_kmeans_valid2['gpe_predit'].replace("anormales?",1)
tb_kmeans_valid2['gpe_predit'] = tb_kmeans_valid2['gpe_predit'].replace("normales",0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a

In [61]:
print(len(tb_kmeans_valid2[tb_kmeans_valid2['gpe_predit']==1]))
tb_kmeans_valid2.head()

55


Unnamed: 0,Type_JeuDeDonnees,Idft,gpe_kmeans,gpe_predit
0,valid,1_valid,0,0
2,valid,3_valid,0,0
3,valid,4_valid,0,0
6,valid,7_valid,0,0
7,valid,8_valid,0,0


In [None]:
## Concaténer les 2 tbx suivants : tb_kmeans_valid2 et tb1_anomalies

In [62]:
tb1_anomalies = tb_kmeans_valid.iloc[:,:-1]
tb1_anomalies = tb1_anomalies[ tb1_anomalies['gpe_predit']==1 ]

In [63]:
tb1_anomalies

Unnamed: 0,Type_JeuDeDonnees,Idft,gpe_kmeans,gpe_predit
1,valid,2_valid,8,1
4,valid,5_valid,6,1
5,valid,6_valid,8,1
8,valid,9_valid,8,1
14,valid,15_valid,1,1
...,...,...,...,...
582,valid,583_valid,8,1
585,valid,586_valid,8,1
587,valid,588_valid,8,1
589,valid,590_valid,4,1


In [64]:
tb_predictions_2 = pd.concat([tb_kmeans_valid2, tb1_anomalies]).sort_index(axis = 0, ascending = True)
list(tb_predictions_2['gpe_predit']).count(1)

259

In [65]:
tb1_anomalies = tb_kmeans_valid.iloc[:,:-1]
tb1_anomalies = tb1_anomalies[ tb1_anomalies['gpe_predit']==1 ]

In [66]:
tb1_anomalies

Unnamed: 0,Type_JeuDeDonnees,Idft,gpe_kmeans,gpe_predit
1,valid,2_valid,8,1
4,valid,5_valid,6,1
5,valid,6_valid,8,1
8,valid,9_valid,8,1
14,valid,15_valid,1,1
...,...,...,...,...
582,valid,583_valid,8,1
585,valid,586_valid,8,1
587,valid,588_valid,8,1
589,valid,590_valid,4,1


In [67]:
tb_predictions_2

Unnamed: 0,Type_JeuDeDonnees,Idft,gpe_kmeans,gpe_predit
0,valid,1_valid,0,0
1,valid,2_valid,8,1
2,valid,3_valid,0,0
3,valid,4_valid,0,0
4,valid,5_valid,6,1
...,...,...,...,...
589,valid,590_valid,4,1
590,valid,591_valid,2,1
591,valid,592_valid,0,0
592,valid,593_valid,0,0


In [68]:
tb_predictions_2['anomaly'] = tb_predictions_2['gpe_predit']
pred_anomaly2 = pd.DataFrame(tb_predictions_2['anomaly'])
pred_anomaly2['seqID'] = np.arange(0,len(tb_predictions_2))
pred_anomaly2 = pred_anomaly2[['seqID', 'anomaly']]

pred_anomaly2

Unnamed: 0,seqID,anomaly
0,0,0
1,1,1
2,2,0
3,3,0
4,4,1
...,...,...
589,589,1
590,590,1
591,591,0
592,592,0


In [47]:
len(pred_anomaly2[pred_anomaly2['anomaly']==1])

259

In [48]:
#pred_anomaly2.to_csv('pred_anomaly_kmeans_2_JeuDonneesTot_nbCluster_10_2emeEtape.csv',  index = False, sep=";")

In [None]:
# Avec nb_clusters=10 et random_state= 40 : on a un score de : F1-Score: 0.93165 	Precision: 1.00000  	Recall: 0.87205 # soumission 65

Il y a donc des anomalies que nous n'avons pas détécté.