# implement a simple KNN model

In [45]:
import pandas as pd


In [46]:
df = pd.read_csv("sampled_matches.csv")
df.drop(columns='Unnamed: 0', inplace=True)

In [47]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [48]:
X, y = df.drop(columns='result'), df[['result']]

In [49]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [50]:
scaler = MinMaxScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)



In [51]:
neigh = KNeighborsClassifier(n_neighbors=5)
neigh.fit(X_train, y_train)
y_pred = neigh.predict(X_test)
y_pred

  return self._fit(X, y)


array([-1.,  0.,  1., -1.,  1.,  1.,  1., -1., -1.,  0.,  1.,  0.,  0.,
        1., -1.,  1.,  1.,  0.,  0.,  1., -1.,  1.,  1.,  0., -1.,  1.,
        1., -1., -1.,  1.,  0., -1.,  1., -1., -1., -1.,  1.,  1.,  0.,
        0.,  0.,  1., -1.,  1.,  1., -1.,  0.,  1.,  1.,  0.,  1.,  1.,
        1.,  0.,  0.,  1.,  1., -1.,  0.,  0.,  1.,  1.,  0.,  1.,  1.,
        1., -1.,  1., -1.,  0.,  1.,  1.,  0.,  0.,  1., -1.,  1.,  1.,
       -1.,  1.,  0.,  0.,  0.,  1.,  0., -1.,  0., -1., -1.,  1.,  0.,
        0.,  0., -1.,  1., -1.,  1., -1.,  1., -1., -1.,  1.,  1., -1.,
       -1.,  1., -1.,  1.,  1.,  1., -1.,  1.,  1.,  1.,  1.,  0.,  0.,
       -1.,  1.,  1., -1.,  1.,  1.,  0.,  0.,  0., -1.,  1., -1.,  1.,
        0.,  0., -1.,  1.,  1.,  1.,  1., -1., -1.,  1.,  1.,  0., -1.,
        1.,  1.,  1., -1., -1., -1., -1.,  1.,  1., -1.,  1.,  0.,  1.,
       -1.,  1., -1.,  1.,  1.,  1., -1., -1.,  1.,  1.,  0.,  1., -1.,
        1., -1., -1., -1.,  1.,  1.,  1., -1., -1., -1., -1., -1

In [52]:
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, y_pred))
print(accuracy_score(y_train, neigh.predict(X_train)))

0.3939393939393939
0.6208955223880597


Save the model

In [53]:
from joblib import dump, load
dump(scaler, 'Simple_KNN model.joblib') 

['Simple_KNN model.joblib']

# train and tune other models
## KNN

In [54]:
from sklearn.model_selection import GridSearchCV
parameters = {'n_neighbors':range(1, 10)}
neigh = KNeighborsClassifier()
neigh_gs = GridSearchCV(neigh, parameters, scoring = "accuracy")
neigh_gs.fit(X_train, y_train.values.ravel())



In [55]:
y_pred = neigh_gs.predict(X_test)
print(accuracy_score(y_test, y_pred))
print(neigh_gs.best_params_)


0.403030303030303
{'n_neighbors': 6}


In [56]:
from joblib import dump, load
dump(neigh_gs, 'KNN.joblib') 

['KNN.joblib']

# Decision tree

In [57]:
from sklearn.tree import DecisionTreeClassifier
DT = DecisionTreeClassifier()
parameters ={"criterion":['gini', 'entropy', 'log_loss'], 'splitter':['best', 'random'], 'min_samples_leaf':[10,20,30,40,50,60,70]}
DT_gs = GridSearchCV(DT, parameters, scoring ='accuracy')
DT_gs.fit(X_train, y_train.values.ravel())
y_pred = DT_gs.predict(X_test)
print(accuracy_score(y_test, y_pred))
print(DT_gs.best_params_)

0.49696969696969695
{'criterion': 'gini', 'min_samples_leaf': 60, 'splitter': 'best'}


In [58]:
dump(DT_gs, 'DT.joblib') 

['DT.joblib']

# Random Forest

In [60]:
from sklearn.ensemble import RandomForestClassifier
RF = RandomForestClassifier()
parameters = {
 'max_depth': [40,50,60,70],
 'min_samples_leaf': range(10,20),
 'min_samples_split': range(1,10)}
RF_gs = GridSearchCV(RF, parameters, scoring ='accuracy')
RF_gs.fit(X_train, y_train.values.ravel())
y_pred = RF_gs.predict(X_test)
print(accuracy_score(y_test, y_pred))
print(RF_gs.best_params_)
dump(RF_gs, 'RF.joblib') 

0.49696969696969695
{'max_depth': 60, 'min_samples_leaf': 19, 'min_samples_split': 5}


200 fits failed out of a total of 1800.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
200 fits failed with the following error:
Traceback (most recent call last):
  File "/opt/homebrew/Caskroom/miniforge/base/envs/test_env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/opt/homebrew/Caskroom/miniforge/base/envs/test_env/lib/python3.8/site-packages/sklearn/ensemble/_forest.py", line 476, in fit
    trees = Parallel(
  File "/opt/homebrew/Caskroom/miniforge/base/envs/test_env/lib/python3.8/site-packages/joblib/parallel.py", line 1043, in __call__
    if self.dispatch_one_batch(iterator):
  File "/opt/homebrew/Caskroom/miniforge/bas