In [123]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from joblib import load

In [124]:
X, y = load('../../data.joblib')
y.unique()

array([0, 1], dtype=int64)

In [125]:
X.head()

Unnamed: 0,time_left,ct_score,t_score,map,bomb_planted,ct_health,t_health,ct_armor,t_armor,ct_money,...,ct_grenade_flashbang,t_grenade_flashbang,ct_grenade_smokegrenade,t_grenade_smokegrenade,ct_grenade_incendiarygrenade,t_grenade_incendiarygrenade,ct_grenade_molotovgrenade,t_grenade_molotovgrenade,ct_grenade_decoygrenade,t_grenade_decoygrenade
0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.05,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.891594,0.0,0.0,0.0,0.0,1.0,1.0,0.8,0.6,0.0075,...,0.0,0.0,0.0,0.4,0.0,0.0,0.0,0.0,0.0,0.0
2,0.548717,0.0,0.0,0.0,0.0,0.782,0.8,0.588,0.4,0.009375,...,0.0,0.0,0.0,0.4,0.0,0.0,0.0,0.0,0.0,0.0
3,0.434425,0.0,0.0,0.0,0.0,0.782,0.8,0.588,0.4,0.009375,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.999829,0.03125,0.0,0.0,0.0,1.0,1.0,0.384,0.0,0.229375,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [126]:
X.isna().any().any()

False

In [127]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

In [128]:
def model_info(y_real, y_pred):
    print('Confusion matrix: \n', confusion_matrix(y_real, y_pred))
    print('Accuracy score: ', accuracy_score(y_real, y_pred))
    print('Precision score: ', precision_score(y_real, y_pred))
    print('Recall score: ', recall_score(y_real, y_pred))
    print('F1 score: ', f1_score(y_real, y_pred))

In [129]:
dtc_default = DecisionTreeClassifier(random_state=55)

In [130]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=55)

In [131]:
dtc_default.fit(X_train, y_train)

In [132]:
y_train_pred = dtc_default.predict(X_train)
model_info(y_train, y_train_pred)

Confusion matrix: 
 [[41895    97]
 [  149 43530]]
Accuracy score:  0.9971285499177085
Precision score:  0.9977766062300869
Recall score:  0.9965887497424392
F1 score:  0.997182324238884


In [133]:
y_pred = dtc_default.predict(X_test)
model_info(y_test, y_pred)

Confusion matrix: 
 [[14665  3339]
 [ 3466 15247]]
Accuracy score:  0.8146635073671596
Precision score:  0.820348649521145
Recall score:  0.8147811681718591
F1 score:  0.8175554304404943


In [134]:
from sklearn.model_selection import GridSearchCV

params = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [10, 15, 20, 25, 30, 35, 40, 45]
}

In [135]:
estimator = GridSearchCV(DecisionTreeClassifier(random_state=55), param_grid=params, cv=3)

In [136]:
estimator.fit(X_train, y_train)

In [137]:
estimator.best_params_

{'criterion': 'entropy', 'max_depth': 45}

In [138]:
estimator.best_score_

0.7871391719485006

In [139]:
dtc = DecisionTreeClassifier(criterion=estimator.best_params_['criterion'], max_depth=estimator.best_params_['max_depth'],random_state=55)

In [140]:
dtc.fit(X_train, y_train)

In [141]:
y_train_pred = dtc.predict(X_train)
model_info(y_train, y_train_pred)

Confusion matrix: 
 [[41745   247]
 [  309 43370]]
Accuracy score:  0.9935100559115687
Precision score:  0.9943370704083271
Recall score:  0.9929256622175416
F1 score:  0.9936308651026394


In [142]:
y_pred = dtc.predict(X_test)
model_info(y_test, y_pred)

Confusion matrix: 
 [[14494  3510]
 [ 3380 15333]]
Accuracy score:  0.8123485034180352
Precision score:  0.8137239293106193
Recall score:  0.8193769037567467
F1 score:  0.8165406326552349
