# 🚀 CS:GO Initial ML (PyCaret)
This notebook aims at using PyCaret to model CS:GO players dataset.
First we use ``compare_model()`` method to evaluate a set of available methods.
Afterwards, we will use the best model to create a ML model and tune it.
Finally we report the results in the form of confusion matrix and basic metrics.

In [1]:
# Required Modluels
import os
import json
import pandas as pd
import pycaret

from collections import defaultdict

import pycaret
from pycaret.classification import *

# from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_score, recall_score, f1_score


import sys
import platform
from datetime import date
print(f"  System: {sys.platform}")
print(f"  Python version: {platform.python_version()}")
print(f"  Last update: {date.today().strftime('%Y-%m-%d')}")

  System: darwin
  Python version: 3.11.8
  Last update: 2024-03-26


# 📊 Dataset
**Note:** The dataset used in this notebook is subject to change.

In [2]:
# Load dataset

# Paths to save
folder_model_path = f"models"
folder_eval_path = f"evaluation/metrics/"
folder_report_path = f"evaluation/reports"

folder_prediction_path = f"predictions"

# Path to load datasets
folder_data_ml_path = f"data/"
folder_processed_path = f"data/processed"


players_df = pd.read_csv('data/players.csv')
results_df = pd.read_csv('data/results.csv')

df = pd.read_csv('updated_players.csv')
df.head()

Unnamed: 0,date,player_name,team,opponent,country,player_id,match_id,event_id,event_name,best_of,...,m3_adr_ct,m3_kast_ct,m3_rating_ct,m3_kills_t,m3_deaths_t,m3_kddiff_t,m3_adr_t,m3_kast_t,m3_rating_t,team_won
0,2020-02-26,Brehze,Evil Geniuses,Liquid,United States,9136,2339385,4901,IEM Katowice 2020,3,...,72.5,80.0,0.93,7.0,9.0,-2.0,70.4,63.6,0.89,0.0
1,2020-02-26,CeRq,Evil Geniuses,Liquid,Bulgaria,11219,2339385,4901,IEM Katowice 2020,3,...,79.5,53.3,1.12,4.0,8.0,-4.0,40.7,54.5,0.53,0.0
2,2020-02-26,EliGE,Liquid,Evil Geniuses,United States,8738,2339385,4901,IEM Katowice 2020,3,...,81.5,63.6,1.03,9.0,9.0,0.0,87.9,73.3,1.05,1.0
3,2020-02-26,Ethan,Evil Geniuses,Liquid,United States,10671,2339385,4901,IEM Katowice 2020,3,...,67.2,66.7,0.97,1.0,9.0,-8.0,14.8,45.5,0.31,0.0
4,2020-02-26,NAF,Liquid,Evil Geniuses,Canada,8520,2339385,4901,IEM Katowice 2020,3,...,72.9,81.8,0.96,8.0,7.0,1.0,56.3,80.0,0.99,1.0


In [3]:
df.columns

Index(['date', 'player_name', 'team', 'opponent', 'country', 'player_id',
       'match_id', 'event_id', 'event_name', 'best_of',
       ...
       'm3_adr_ct', 'm3_kast_ct', 'm3_rating_ct', 'm3_kills_t', 'm3_deaths_t',
       'm3_kddiff_t', 'm3_adr_t', 'm3_kast_t', 'm3_rating_t', 'team_won'],
      dtype='object', length=102)

# 🤓 Preprocess Data
In this section, we will preprocess the dataset and get it ready for Machine Learning

In [4]:
# #remove unneeded col's (stats)
# cols_Remove = [0, 1, 2, 5, 6, 7, 8, 21, 22, 23, 25]
# df = df.drop(df.columns[cols_Remove], axis=1)

# #divide specific stats by rounds for normalization
# cols_label_Divide = ["0_kill_rounds", "1_kill_rounds", "2_kill_rounds", "3_kill_rounds", "4_kill_rounds", "5_kill_rounds", "rifle_kills", "sniper_kills", "smg_kills", "pistol_kills", "grenade_kills", "other_kills"]

# #Iterate over the columns and perform division
# for col in cols_label_Divide:
#     df[col] = df[col] / df["rounds_played"]
   
# print(df.iloc[0])

In [5]:
df.shape

(383317, 102)

In [6]:
df.dropna(subset=['team_won'], inplace=True)


df = df[df['best_of'] == 3]
df.shape

(119630, 102)

In [7]:
columns_to_keep = [
    'date', 'player_name', 'team', 'opponent', 'country',
    'player_id', 'match_id', 'event_id', 'event_name',
    'best_of', 'map_1','map_2','map_3','team_won', 
    'm1_kills', 'm1_assists', 'm1_deaths', 'm1_hs', 'm1_flash_assists',
    'm1_kast', 'm1_kddiff', 'm1_adr', 'm1_fkdiff', 'm1_rating'
]


df = df[columns_to_keep]
df.shape

(119630, 24)

# 🌎 General ML models
In this section, we create an experiment to evaluate different models. To this end, we require to perform the following steps:
1. Create an experiment object
2. Create a setup dictionary and setup the environment
3. perfomt the model comparison
4. evaluate model to observe the initial performance

In [8]:
# Define PyCaret classificaiton experimetn
obj_exp = ClassificationExperiment()

In [9]:
# Configure and build the setup environment
steup_dict= {
                "data": df,
                "fold": 10,
                "target": "team_won",
                "index": True,
                "ignore_features": [
                        'date', 'player_name', 
                    # 'team', 'opponent', 
                    'country',
                        'player_id', 'match_id', 'event_id', 'event_name',
                        'best_of',
                    ],
                "session_id": 420,
                "experiment_name": "experiment_csgo_initial",
                "normalize": True,
                "normalize_method": "zscore",
                "verbose": True
            }

obj_exp.setup(**steup_dict)

Unnamed: 0,Description,Value
0,Session id,420
1,Target,team_won
2,Target type,Binary
3,Original data shape,"(119630, 24)"
4,Transformed data shape,"(119630, 41)"
5,Transformed train set shape,"(83741, 41)"
6,Transformed test set shape,"(35889, 41)"
7,Ignore features,8
8,Numeric features,10
9,Categorical features,5


<pycaret.classification.oop.ClassificationExperiment at 0x1390574d0>

In [10]:
# Perform the model comparison
best = obj_exp.compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.7909,0.0,0.7777,0.7988,0.7881,0.5818,0.582,1.999
rf,Random Forest Classifier,0.7652,0.0,0.7504,0.7733,0.7617,0.5304,0.5306,2.042
lightgbm,Light Gradient Boosting Machine,0.7606,0.0,0.7553,0.7635,0.7594,0.5212,0.5213,0.679
gbc,Gradient Boosting Classifier,0.7524,0.0,0.7427,0.7575,0.75,0.5049,0.505,2.639
ridge,Ridge Classifier,0.7341,0.0,0.7243,0.7389,0.7315,0.4682,0.4683,0.127
lda,Linear Discriminant Analysis,0.7341,0.0,0.7243,0.7389,0.7315,0.4682,0.4683,0.192
lr,Logistic Regression,0.734,0.0,0.7271,0.7373,0.7322,0.468,0.4681,0.312
ada,Ada Boost Classifier,0.7327,0.0,0.7223,0.7377,0.7299,0.4653,0.4655,0.725
dt,Decision Tree Classifier,0.7292,0.0,0.7319,0.728,0.73,0.4584,0.4584,0.272
svm,SVM - Linear Kernel,0.7281,0.0,0.7165,0.7338,0.7249,0.4563,0.4566,0.274


In [11]:
obj_exp.evaluate_model(best)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…

# ✅ Best ML Model
In this section we use the ML model obtained from the previous step to:
1. Create the model
2. Tune the model
3. Finalize the model
4. Evaluate the model performance (metrics)