In [1]:
# Data Processing
import pandas as pd
import numpy as np

# Modelling
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, ConfusionMatrixDisplay
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from scipy.stats import randint

from sklearn.compose import ColumnTransformer, TransformedTargetRegressor
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split, GridSearchCV

# Tree Visualisation
from sklearn.tree import export_graphviz
from IPython.display import Image
import graphviz

In [2]:
from pickle import dump

In [3]:
from sklearn.pipeline import Pipeline

In [4]:
df = pd.read_csv("../data/final_cutted.csv")

In [5]:
df

Unnamed: 0.1,Unnamed: 0,index,GOLDEARNED,TOTALMINIONSKILLED,KILLS,ASSISTS,DEATHS,VISIONSCORE,TOTALDAMAGEDEALTTOCHAMPIONS,P_MATCH_ID,WIN,CHAMPION,PUUID,SUMMONERNAME,GAMEVERSION
0,0,0.0,5218.0,16.0,1.0,6.0,6.0,28.0,4500.0,BR1_2304032235_utility,False,Bard,UNNl1KcPO98UoXiuRpQBefEKJbtCF_80b0_2s0Cwa5FiYi...,batata 12121212,11.13.382.1241
1,1,1.0,7515.0,29.0,1.0,19.0,1.0,40.0,7716.0,BR1_2304032235_utility,True,Blitzcrank,w2DLeo91qdfD72dpGgapMOKh_4IZ9IMF29neabiS0QTe8W...,love yourseIf,11.13.382.1241
2,2,2.0,9197.0,47.0,5.0,5.0,5.0,17.0,9696.0,BR1_2304032235_jungle,False,Nocturne,wDtmVguiopT93yrxtv2L88LxAVWC8E2fj_F3FDW81nCuSU...,NTM HACKER,11.13.382.1241
3,3,3.0,10564.0,37.0,6.0,8.0,4.0,12.0,15291.0,BR1_2304032235_jungle,True,Kayn,zVKtTZrdKVIpXwIMlsuSQjwOgqxx0DMhnWDFL7MrAKxXZq...,unsuri,11.13.382.1241
4,4,4.0,10598.0,158.0,6.0,8.0,7.0,17.0,20568.0,BR1_2303451507_top,False,Pantheon,sTevUOXxKjNW7dpbtyu9wjn8KZxzN63_f2MfGc1EALDjtq...,Nome e Numeros,11.13.382.1241
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9995,9995.0,10365.0,152.0,5.0,11.0,4.0,31.0,17876.0,NA1_3922489413_middle,True,Neeko,CipVayli1m6gWF0KFuIetWO9hDvtE3FBDHROLvA_xf-j9O...,Hozackeno,11.11.377.6311
9996,9996,9996.0,8799.0,196.0,2.0,3.0,2.0,14.0,11076.0,NA1_3922489413_bottom,False,Jhin,Sqmu7vaSYGa1UculrBU29oWuZ-etDFjteUZP0a9F25ekQZ...,Deadly Flourish,11.11.377.6311
9997,9997,9997.0,12616.0,188.0,11.0,5.0,1.0,17.0,13453.0,NA1_3922489413_bottom,True,Jinx,KxUibZSTbil5noU6II2WBzZ66tjPhmHP14stRTAfCJ7Yfe...,StrangerToLife,11.11.377.6311
9998,9998,9998.0,5806.0,12.0,1.0,9.0,4.0,40.0,4145.0,NA1_3922489413_utility,False,Soraka,S5fToe0p-RmiM4f_JIbG1UkUwzU0mvy_K6hZMbK9Iy6Uqi...,beIIe,11.11.377.6311


In [6]:
df['WIN'] = df['WIN'].map({True:1, False:0})

In [7]:
numeric_features = ['GOLDEARNED', 'TOTALMINIONSKILLED', 'KILLS', 
                    'ASSISTS', 'DEATHS', 'VISIONSCORE', 'TOTALDAMAGEDEALTTOCHAMPIONS']
numeric_transformer = Pipeline(
    steps=[
        ("imputer", SimpleImputer(strategy="median")), 
        ("scaler", StandardScaler()),
    ]
)

In [8]:
categorical_features = ["CHAMPION"]
categorical_transformer = Pipeline(
    steps=[
        ("onehot-encoder", OneHotEncoder(drop="first", 
                                         handle_unknown="ignore")),
    ]
)

In [9]:
preprocessor = ColumnTransformer(
    transformers=[
        ("numeric", numeric_transformer, numeric_features),
        ("categorical", categorical_transformer, categorical_features),
    ]
)

In [10]:
rf = RandomForestClassifier()

pipe = Pipeline(
    steps=[
        ("preprocessor", preprocessor), 
        ("rf", rf)
    ]
)

In [11]:
train, test = train_test_split(df, test_size=0.3)

In [12]:
train.head()

Unnamed: 0.1,Unnamed: 0,index,GOLDEARNED,TOTALMINIONSKILLED,KILLS,ASSISTS,DEATHS,VISIONSCORE,TOTALDAMAGEDEALTTOCHAMPIONS,P_MATCH_ID,WIN,CHAMPION,PUUID,SUMMONERNAME,GAMEVERSION
5565,5565,5565.0,12518.0,180.0,3.0,9.0,6.0,19.0,15736.0,EUW1_5352193529_top,0,Volibear,izOQ3P612UY0yHqO-XLsqV5JvncVBTSZr25hWjeT9DlEdc...,Paardenpikkie,11.13.382.1241
7080,7080,7080.0,17418.0,222.0,14.5,1.0,5.0,21.0,33636.0,BR1_2335955918_top,1,Irelia,NWHV8tHAGiRiugTIdKWOu9VcgC8T37a5Ln_spzTrTZNK-P...,MAYNE IS MY VAlN,11.16.390.1945
3835,3835,3835.0,11384.0,73.0,4.0,19.0,3.0,56.5,21274.0,LA1_1123984618_utility,0,Morgana,Hc9IiJ64VD8t6HfrNSRPKyh9ziydmR9M44IVQyMFyz92dm...,Khoralie,11.16.390.1945
2166,2166,2166.0,13510.0,181.0,12.0,10.0,5.0,22.0,31980.0,EUN1_2880393141_top,0,Malphite,_tMLBKmVzuYp7L4uqMmaKe_ebnOfD_fFVZaq1y-vuK00uV...,Mix234,11.14.385.9967
9503,9503,9503.0,5928.0,126.0,1.0,2.0,6.0,5.0,13310.0,KR_5240179665_top,0,LeeSin,C9CPtcdTWp39tPxNFIrdgyfiePfCorpYgLX391-jW8KOtB...,서폿빼고라인다감,11.11.377.6311


In [13]:
X_train = train[['GOLDEARNED', 'TOTALMINIONSKILLED', 'KILLS', 
                    'ASSISTS', 'DEATHS', 'VISIONSCORE', 'TOTALDAMAGEDEALTTOCHAMPIONS']]
Y_train = train[["WIN"]]

In [14]:
X_test = test[['GOLDEARNED', 'TOTALMINIONSKILLED', 'KILLS', 
                    'ASSISTS', 'DEATHS', 'VISIONSCORE', 'TOTALDAMAGEDEALTTOCHAMPIONS']]
Y_test = test[["WIN"]]

In [22]:
param_grid = [
        {'n_estimators': [80,90, 100],
        },
    ]

In [23]:
search_cv = GridSearchCV(pipe, param_grid)

In [24]:
search_cv.fit(X_train, Y_train)

ValueError: Invalid parameter 'n_estimators' for estimator Pipeline(steps=[('preprocessor',
                 ColumnTransformer(transformers=[('numeric',
                                                  Pipeline(steps=[('imputer',
                                                                   SimpleImputer(strategy='median')),
                                                                  ('scaler',
                                                                   StandardScaler())]),
                                                  ['GOLDEARNED',
                                                   'TOTALMINIONSKILLED',
                                                   'KILLS', 'ASSISTS', 'DEATHS',
                                                   'VISIONSCORE',
                                                   'TOTALDAMAGEDEALTTOCHAMPIONS']),
                                                 ('categorical',
                                                  Pipeline(steps=[('onehot-encoder',
                                                                   OneHotEncoder(drop='first',
                                                                                 handle_unknown='ignore'))]),
                                                  ['CHAMPION'])])),
                ('rf', RandomForestClassifier())]). Valid parameters are: ['memory', 'steps', 'verbose'].