In [1]:
import os
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
dataCSV = os.path.join(os.getcwd(), "..", "CSGODemo", "Dataset", "AveragedDataset.csv")
modelSaveFolder = os.path.join(os.getcwd(), "modelSave")

df = pd.read_csv(dataCSV)

print(df.columns.tolist())

invalidRows = (df == -1).any(axis=1)

print(f"Invalid rows --> {invalidRows}")

print(df)

['Team1_Avg_Kills', 'Team1_Avg_Assists', 'Team1_Avg_Deaths', 'Team1_Avg_HeadshotsPerc', 'Team1_Avg_KR', 'Team1_Avg_KD', 'Team1_Avg_KAST', 'Team1_Avg_EntryKills', 'Team1_Avg_RWS', 'Team1_Avg_Rating', 'Team1_Avg_Rating2', 'Team1_Avg_ATD', 'Team1_Avg_ADR', 'Team1_Avg_ELO', 'Team2_Avg_Kills', 'Team2_Avg_Assists', 'Team2_Avg_Deaths', 'Team2_Avg_HeadshotsPerc', 'Team2_Avg_KR', 'Team2_Avg_KD', 'Team2_Avg_KAST', 'Team2_Avg_EntryKills', 'Team2_Avg_RWS', 'Team2_Avg_Rating', 'Team2_Avg_Rating2', 'Team2_Avg_ATD', 'Team2_Avg_ADR', 'Team2_Avg_ELO', 'Team_1_Win?', 'ScoreDifference', 'closeMatch?']
Invalid rows --> 0       False
1       False
2        True
3        True
4        True
        ...  
9522    False
9523    False
9524    False
9525    False
9526    False
Length: 9527, dtype: bool
      Team1_Avg_Kills  Team1_Avg_Assists  Team1_Avg_Deaths  \
0            20.72634            4.47970          18.94140   
1            18.99140            4.45454          20.21516   
2             3.10000      

In [3]:
invalidRows = invalidRows.reset_index(drop=True)

cleaneddf = df[df['Team_1_Win?'] != 2]
cleaneddf = cleaneddf.dropna()

print(df.shape)
print(cleaneddf.shape)

print(cleaneddf['Team_1_Win?'].unique())

(9527, 31)
(9523, 31)
[0 1]


In [4]:
abs_correlations = cleaneddf.corr()[['closeMatch?']].abs()

# Sort absolute correlations in descending order
sorted_abs_correlations = abs_correlations.sort_values(by='closeMatch?', ascending=False)

print(sorted_abs_correlations.head(30))

                         closeMatch?
closeMatch?                 1.000000
ScoreDifference             0.710119
Team_1_Win?                 0.046962
Team2_Avg_ELO               0.029695
Team1_Avg_HeadshotsPerc     0.016808
Team2_Avg_Kills             0.014136
Team2_Avg_Assists           0.012248
Team1_Avg_Assists           0.011081
Team1_Avg_ATD               0.010555
Team1_Avg_KR                0.010323
Team2_Avg_Deaths            0.009823
Team2_Avg_RWS               0.009569
Team1_Avg_ADR               0.009436
Team1_Avg_Rating            0.009329
Team1_Avg_Rating2           0.008247
Team1_Avg_EntryKills        0.008058
Team1_Avg_KD                0.008045
Team1_Avg_RWS               0.007718
Team1_Avg_KAST              0.007007
Team2_Avg_EntryKills        0.005922
Team2_Avg_ADR               0.005236
Team1_Avg_Deaths            0.004520
Team2_Avg_Rating2           0.003258
Team2_Avg_HeadshotsPerc     0.002265
Team2_Avg_KD                0.002259
Team2_Avg_Rating            0.002192
T

In [9]:
feature_columns = [
    "Team1_Avg_ELO",
    "Team1_Avg_RWS",
    "Team1_Avg_Rating",
    "Team1_Avg_Rating2",
    "Team1_Avg_KR",
    "Team1_Avg_KD",
    "Team1_Avg_KAST",
    "Team1_Avg_ADR",
    "Team2_Avg_ELO",
    "Team2_Avg_RWS",
    "Team2_Avg_Rating",
    "Team2_Avg_Rating2",
    "Team2_Avg_KR",
    "Team2_Avg_KD",
    "Team2_Avg_KAST",
    "Team2_Avg_ADR"
]

features = df[feature_columns]

labels = df['ScoreDifference']

print(features.shape)
print(labels.shape)

(9527, 16)
(9527,)


In [10]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

scaled_features = scaler.fit_transform(features)

X_train, X_test, y_train, y_test = train_test_split(scaled_features, labels, test_size=0.2, random_state=42)

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)


(7621, 16)
(1906, 16)
(7621,)
(1906,)


In [7]:
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve, roc_auc_score
from skopt import BayesSearchCV

clf = RandomForestClassifier(n_estimators=100, random_state=42, verbose=2)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

building tree 1 of 100
building tree 2 of 100
building tree 3 of 100
building tree 4 of 100
building tree 5 of 100
building tree 6 of 100
building tree 7 of 100
building tree 8 of 100
building tree 9 of 100
building tree 10 of 100
building tree 11 of 100
building tree 12 of 100
building tree 13 of 100
building tree 14 of 100
building tree 15 of 100
building tree 16 of 100
building tree 17 of 100
building tree 18 of 100
building tree 19 of 100
building tree 20 of 100
building tree 21 of 100
building tree 22 of 100
building tree 23 of 100
building tree 24 of 100
building tree 25 of 100
building tree 26 of 100
building tree 27 of 100
building tree 28 of 100
building tree 29 of 100
building tree 30 of 100
building tree 31 of 100
building tree 32 of 100
building tree 33 of 100
building tree 34 of 100
building tree 35 of 100
building tree 36 of 100
building tree 37 of 100
building tree 38 of 100
building tree 39 of 100
building tree 40 of 100
building tree 41 of 100
building tree 42 of 100


[Parallel(n_jobs=1)]: Done  40 tasks      | elapsed:    3.0s


building tree 43 of 100
building tree 44 of 100
building tree 45 of 100
building tree 46 of 100
building tree 47 of 100
building tree 48 of 100
building tree 49 of 100
building tree 50 of 100
building tree 51 of 100
building tree 52 of 100
building tree 53 of 100
building tree 54 of 100
building tree 55 of 100
building tree 56 of 100
building tree 57 of 100
building tree 58 of 100
building tree 59 of 100
building tree 60 of 100
building tree 61 of 100
building tree 62 of 100
building tree 63 of 100
building tree 64 of 100
building tree 65 of 100
building tree 66 of 100
building tree 67 of 100
building tree 68 of 100
building tree 69 of 100
building tree 70 of 100
building tree 71 of 100
building tree 72 of 100
building tree 73 of 100
building tree 74 of 100
building tree 75 of 100
building tree 76 of 100
building tree 77 of 100
building tree 78 of 100
building tree 79 of 100
building tree 80 of 100
building tree 81 of 100
building tree 82 of 100
building tree 83 of 100
building tree 84

[Parallel(n_jobs=1)]: Done  40 tasks      | elapsed:    0.0s


In [8]:
accuracy = accuracy_score(y_test, y_pred)
print(f"accuracy --> {accuracy}")

precision = precision_score(y_test, y_pred, average='binary')
recall = recall_score(y_test, y_pred, average='binary')
f1 = f1_score(y_test, y_pred, average='binary')
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"precision --> {precision}")
print(f"recall --> {recall}")
print(f"f1 --> {f1}")
print(conf_matrix)

accuracy --> 0.6610703043022036
precision --> 0.36363636363636365
recall --> 0.07177814029363784
f1 --> 0.11989100817438691
[[1216   77]
 [ 569   44]]


In [24]:
y_pred_prob = clf.predict_proba(X_test)[:, 1]  # Get predicted probabilities for class 1
fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob)
roc_auc = roc_auc_score(y_test, y_pred_prob)

print(f"probabilites --> {y_pred_prob}")
print(f"AUC --> {roc_auc}")

probabilites --> [0.43 0.15 0.53 ... 0.35 0.38 0.42]
AUC --> 0.509963929251371


[Parallel(n_jobs=1)]: Done  40 tasks      | elapsed:    0.0s


In [12]:
reg = RandomForestRegressor(n_estimators=100, random_state=42, verbose=2)

reg.fit(X_train, y_train)

y_pred = clf.predict(X_test)

building tree 1 of 100
building tree 2 of 100
building tree 3 of 100
building tree 4 of 100
building tree 5 of 100
building tree 6 of 100
building tree 7 of 100
building tree 8 of 100
building tree 9 of 100
building tree 10 of 100
building tree 11 of 100
building tree 12 of 100
building tree 13 of 100
building tree 14 of 100
building tree 15 of 100
building tree 16 of 100
building tree 17 of 100
building tree 18 of 100
building tree 19 of 100
building tree 20 of 100
building tree 21 of 100
building tree 22 of 100
building tree 23 of 100
building tree 24 of 100
building tree 25 of 100
building tree 26 of 100
building tree 27 of 100
building tree 28 of 100
building tree 29 of 100
building tree 30 of 100
building tree 31 of 100
building tree 32 of 100
building tree 33 of 100
building tree 34 of 100
building tree 35 of 100
building tree 36 of 100
building tree 37 of 100
building tree 38 of 100
building tree 39 of 100
building tree 40 of 100


[Parallel(n_jobs=1)]: Done  40 tasks      | elapsed:   13.1s


building tree 41 of 100
building tree 42 of 100
building tree 43 of 100
building tree 44 of 100
building tree 45 of 100
building tree 46 of 100
building tree 47 of 100
building tree 48 of 100
building tree 49 of 100
building tree 50 of 100
building tree 51 of 100
building tree 52 of 100
building tree 53 of 100
building tree 54 of 100
building tree 55 of 100
building tree 56 of 100
building tree 57 of 100
building tree 58 of 100
building tree 59 of 100
building tree 60 of 100
building tree 61 of 100
building tree 62 of 100
building tree 63 of 100
building tree 64 of 100
building tree 65 of 100
building tree 66 of 100
building tree 67 of 100
building tree 68 of 100
building tree 69 of 100
building tree 70 of 100
building tree 71 of 100
building tree 72 of 100
building tree 73 of 100
building tree 74 of 100
building tree 75 of 100
building tree 76 of 100
building tree 77 of 100
building tree 78 of 100
building tree 79 of 100
building tree 80 of 100
building tree 81 of 100
building tree 82

[Parallel(n_jobs=1)]: Done  40 tasks      | elapsed:    0.0s


In [13]:
mse = mean_squared_error(y_test, y_pred)
print(f"Mean squared error --> {mse}")

Mean squared error --> 42.494753410283316
