# implement a simple KNN model

In [103]:
import pandas as pd
import numpy as np
from joblib import dump, load

In [104]:
df = pd.read_csv("sampled_matches.csv")
df.drop(columns='Unnamed: 0', inplace=True)

In [105]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [106]:
X, y = df.drop(columns=['result','level_0']), df[['result']]

In [107]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [108]:

scaler = MinMaxScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
dump(scaler,'scaler.joblib')

['scaler.joblib']

In [93]:
neigh = KNeighborsClassifier(n_neighbors=5)
neigh.fit(X_train, y_train)
y_pred = neigh.predict(X_test)
y_pred

  return self._fit(X, y)


array([ 1.,  0.,  1., -1.,  1., -1.,  1., -1.,  1., -1.,  1.,  1., -1.,
        1., -1.,  1.,  1., -1.,  0., -1., -1.,  1.,  0., -1., -1.,  1.,
        0.,  1.,  0.,  1.,  1.,  0., -1.,  1.,  1.,  1.,  1.,  1.,  0.,
       -1.,  0.,  1., -1.,  1.,  1.,  1.,  0.,  1.,  1., -1.,  1., -1.,
        1., -1.,  1., -1.,  1., -1.,  1.,  1.,  1.,  1.,  0.,  1.,  1.,
        1., -1., -1.,  0., -1.,  1.,  1.,  0.,  1.,  1., -1.,  1.,  1.,
        0.,  1.,  1.,  0., -1.,  0.,  0.,  0.,  1., -1., -1.,  0.,  1.,
        0.,  0., -1.,  1., -1.,  1., -1.,  1., -1., -1.,  1.,  1.,  1.,
        1.,  0., -1.,  0.,  1.,  0., -1.,  1.,  0.,  1.,  1., -1.,  1.,
       -1.,  1.,  1., -1., -1.,  1.,  0., -1.,  0., -1.,  1., -1.,  1.,
       -1.,  0., -1.,  0., -1.,  1.,  1., -1.,  1.,  0.,  1.,  1., -1.,
        1., -1.,  0., -1., -1., -1., -1.,  1.,  1.,  0.,  1.,  0.,  1.,
       -1.,  1.,  1.,  1.,  1.,  1., -1., -1.,  1., -1., -1.,  1.,  1.,
        1., -1.,  1.,  0.,  1.,  1., -1., -1., -1., -1.,  1., -1

In [94]:
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, y_pred))


0.41818181818181815


Save the model

In [95]:
from joblib import dump, load
dump(scaler, 'Simple_KNN model.joblib') 

['Simple_KNN model.joblib']

# train and tune other models with gridsearch
## KNN

In [96]:
from sklearn.model_selection import GridSearchCV
parameters = {'n_neighbors':range(1, 20)}
neigh = KNeighborsClassifier()
neigh_gs = GridSearchCV(neigh, parameters, scoring = "accuracy")
neigh_gs.fit(X_train, y_train.values.ravel())



In [97]:
y_pred = neigh_gs.predict(X_test)
print(accuracy_score(y_test, y_pred))
print(neigh_gs.best_params_)


0.47575757575757577
{'n_neighbors': 19}


In [98]:
from joblib import dump, load
dump(neigh_gs, 'KNN.joblib') 

['KNN.joblib']

# Decision tree

In [99]:
from sklearn.tree import DecisionTreeClassifier
DT = DecisionTreeClassifier()
parameters ={"criterion":['gini', 'entropy', 'log_loss'], 'splitter':['best', 'random'], 'min_samples_leaf':[10,20,30,40,50,60,70]}
DT_gs = GridSearchCV(DT, parameters, scoring ='accuracy')
DT_gs.fit(X_train, y_train.values.ravel())
y_pred = DT_gs.predict(X_test)
print(accuracy_score(y_test, y_pred))
print(DT_gs.best_params_)

0.4575757575757576
{'criterion': 'gini', 'min_samples_leaf': 50, 'splitter': 'random'}


In [100]:
dump(DT_gs, 'DT.joblib') 

['DT.joblib']

# Random Forest

In [101]:
from sklearn.ensemble import RandomForestClassifier
RF = RandomForestClassifier()
parameters = {
 'max_depth': range(5,50,5),
 'min_samples_leaf': range(10,20),
 'min_samples_split': range(2,10)}
RF_gs = GridSearchCV(RF, parameters, scoring ='accuracy')
RF_gs.fit(X_train,np.ravel(y_train))

In [102]:
print(accuracy_score(y_test, y_pred))
print(RF_gs.best_params_)
print(y_pred)
dump(RF_gs,'RF.joblib')

0.4575757575757576
{'max_depth': 45, 'min_samples_leaf': 19, 'min_samples_split': 7}
[-1. -1. -1. -1.  1.  1. -1.  1.  1. -1.  1.  1. -1.  1. -1.  1.  1.  1.
  1. -1. -1. -1.  1.  1.  1. -1. -1. -1. -1.  1.  1.  1.  1.  1.  1. -1.
  1.  1.  1.  1.  1.  1. -1.  1.  1.  1.  1. -1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1. -1.  1.  1.  1.  1.  1.  1.  1. -1.  1.  1. -1.  1.  1.
  1.  1.  1. -1.  1.  1.  1.  1.  1. -1. -1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1. -1.  1. -1.  1.  1.  1. -1. -1.  1. -1.  1.  1.  1. -1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1. -1.  1.  1.  1. -1.  1.
 -1.  1.  1.  1.  1.  1. -1.  1.  1.  1.  1. -1.  1.  1. -1. -1. -1. -1.
 -1. -1. -1.  1. -1. -1.  1.  1.  1.  1. -1.  1. -1.  1. -1.  1.  1.  1.
 -1.  1.  1. -1.  1.  1. -1.  1. -1.  1.  1.  1.  1. -1.  1.  1.  1. -1.
  1.  1.  1.  1.  1.  1. -1.  1. -1.  1.  1.  1. -1.  1.  1. -1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1. -1.  1.  1.  1.  1. -1.  1.  1.
 -1. -1. -1.  1.  1.  1.  1. -1.  1.  1

['RF.joblib']