In [1]:
import sqlite3
import pandas as pd
import json

# Connexion à la première base de données
conn = sqlite3.connect('database.db')
# Lire la table depuis cette base de données et l'afficher
df = pd.read_sql_query("SELECT * from participant", conn)[['matchId', 'teamId', 'win', 'championName']]

# Fermer la connexion à la première base de données
conn.close()
df



Unnamed: 0,matchId,teamId,win,championName
0,EUW1_6648462202,100,0,Malphite
1,EUW1_6648462202,100,0,Kindred
2,EUW1_6648462202,100,0,Tristana
3,EUW1_6648462202,100,0,Seraphine
4,EUW1_6648462202,100,0,Rell
...,...,...,...,...
13505,EUW1_6645511796,200,1,Teemo
13506,EUW1_6645511796,200,1,Lillia
13507,EUW1_6645511796,200,1,Renekton
13508,EUW1_6645511796,200,1,Kaisa


In [2]:

champion_list = list(pd.read_json('champion.json').index)
champion_list = [i.lower() for i in champion_list] 



grouped = df.sort_values('teamId').groupby('matchId')
dataset = pd.DataFrame(columns=['matchId', 'champions', 'win'])
for id, group in grouped:
    
    champion_encoded_blue = [0]*len(champion_list)
    for champ in group['championName'].iloc[:5]:
        champion_encoded_blue[champion_list.index(champ.lower())] = 1

    champion_encoded_red = [0]*len(champion_list)
    for champ in group['championName'].iloc[5:10]:
        champion_encoded_red[champion_list.index(champ.lower())] = 1

    champion_encoded = champion_encoded_blue + champion_encoded_red
    new_line = pd.DataFrame([[group['matchId'].iloc[0], champion_encoded, group['win'].iloc[0]]], columns=['matchId', 'champions', 'win'])
    dataset = pd.concat([dataset, new_line], ignore_index=True)
    
dataset

Unnamed: 0,matchId,champions,win
0,EUW1_6615291154,"[1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0
1,EUW1_6615441057,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0
2,EUW1_6619224580,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0
3,EUW1_6619293216,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
4,EUW1_6619310994,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0
...,...,...,...
1346,EUW1_6649816402,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0
1347,EUW1_6649825403,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
1348,EUW1_6649843032,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0
1349,EUW1_6649843858,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0


In [57]:
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV 


df_train, df_test  = train_test_split(dataset, test_size=0.2, random_state=0, stratify=dataset['win'])

y_train = list(df_train["win"])
y_test = list(df_test["win"])

X_train = list(df_train["champions"])
X_test = list(df_test["champions"])

param_grid = {'C': [1],  
              'gamma': [0.1], 
              'kernel': ['rbf']}  
  
model = GridSearchCV(svm.SVC(), param_grid, refit = True, verbose = 3) 


model.fit(X_train, y_train)

print(model.best_params_) 
 
print(model.best_estimator_) 




Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.514 total time=   0.0s
[CV 2/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.514 total time=   0.0s
[CV 3/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.523 total time=   0.0s
[CV 4/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.597 total time=   0.0s
[CV 5/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.491 total time=   0.1s
{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}
SVC(C=1, gamma=0.1)


In [58]:
y_pred = model.predict(X_test)
y_pred

array([1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1,
       1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1,
       0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0,
       0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1,
       0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
       0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0,
       0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0,
       0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0,
       0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 1, 1])

In [59]:
from sklearn.metrics import confusion_matrix
import numpy as np

df_confusion = pd.crosstab(y_test, y_pred)
df_confusion 




col_0,0,1
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1
0,90,54
1,63,64


In [61]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred)) 

              precision    recall  f1-score   support

           0       0.59      0.62      0.61       144
           1       0.54      0.50      0.52       127

    accuracy                           0.57       271
   macro avg       0.57      0.56      0.56       271
weighted avg       0.57      0.57      0.57       271

