# 🚀 CS:GO Initial ML (PyCaret)
This notebook aims at using PyCaret to model CS:GO players dataset.
First we use ``compare_model()`` method to evaluate a set of available methods.
Afterwards, we will use the best model to create a ML model and tune it.
Finally we report the results in the form of confusion matrix and basic metrics.

In [43]:
# Required Modluels
import os
import json
import pandas as pd
import pycaret

from collections import defaultdict

import pycaret
from pycaret.regression import *

# from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_score, recall_score, f1_score


import sys
import platform
from datetime import date
print(f"  System: {sys.platform}")
print(f"  Python version: {platform.python_version()}")
print(f"  Last update: {date.today().strftime('%Y-%m-%d')}")

  System: darwin
  Python version: 3.11.8
  Last update: 2024-03-26


# 📊 Dataset
**Note:** The dataset used in this notebook is subject to change.

In [39]:
# Load dataset

# Paths to save
folder_model_path = f"models"
folder_eval_path = f"evaluation/metrics/"
folder_report_path = f"evaluation/reports"

folder_prediction_path = f"predictions"

# Path to load datasets
folder_data_ml_path = f"data/"
folder_processed_path = f"data/processed"


df = pd.read_csv(f"{folder_data_ml_path}/csgo_players.csv", low_memory=False)

df.head()

Unnamed: 0,player_id,nickname,real_name,age,country,current_team,teams,total_kills,total_deaths,headshot_percentage,...,3_kill_rounds,4_kill_rounds,5_kill_rounds,rifle_kills,sniper_kills,smg_kills,pistol_kills,grenade_kills,other_kills,rating
0,11893,ZywOo,Mathieu Herbaut,21,France,Vitality,"Vitality, aAa",22622,16458,41.2%,...,1474,369,71,8261,8552,1290,4262,260,49,1.32
1,7998,s1mple,Aleksandr Kostyliev,24,Ukraine,Natus Vincere,Natus Vincere,35512,26413,41.3%,...,2262,504,64,14717,13451,901,6057,199,254,1.25
2,16920,sh1ro,Dmitry Sokolov,21,Russia,Cloud9,"Gambit Youngsters, Gambit",18091,12492,27.8%,...,1075,221,30,3791,10580,550,3008,175,70,1.26
3,20113,deko,Denis Zhukov,21,Russia,1WIN,1WIN,8738,6397,36.7%,...,518,112,8,2664,4348,181,1459,86,40,1.23
4,18835,saffee,Rafael Costa,27,Brazil,FURIA,"FURIA, paiN",8482,6517,30.9%,...,524,114,14,2545,4412,264,1209,52,28,1.22


In [40]:
df.columns

Index(['player_id', 'nickname', 'real_name', 'age', 'country', 'current_team',
       'teams', 'total_kills', 'total_deaths', 'headshot_percentage',
       'damage_per_round', 'grenade_dmg_per_round', 'maps_played',
       'rounds_played', 'kills_per_death', 'kills_per_round',
       'assists_per_round', 'deaths_per_round', 'saved_by_teammate_per_round',
       'saved_teammates_per_round', 'rounds_with_kills', 'kill_to_death_diff',
       'total_opening_kills', 'total_opening_deaths', 'opening_kill_ratio',
       'opening_kill_rating', 'team_win_percent_after_first_kill',
       'first_kill_in_won_rounds', '0_kill_rounds', '1_kill_rounds',
       '2_kill_rounds', '3_kill_rounds', '4_kill_rounds', '5_kill_rounds',
       'rifle_kills', 'sniper_kills', 'smg_kills', 'pistol_kills',
       'grenade_kills', 'other_kills', 'rating'],
      dtype='object')

# 🤓 Preprocess Data
In this section, we will preprocess the dataset and get it ready for Machine Learning

In [34]:
#remove unneeded col's (stats)
cols_Remove = [0, 1, 2, 5, 6, 7, 8, 21, 22, 23, 25]
df = df.drop(df.columns[cols_Remove], axis=1)

#divide specific stats by rounds for normalization
cols_label_Divide = ["0_kill_rounds", "1_kill_rounds", "2_kill_rounds", "3_kill_rounds", "4_kill_rounds", "5_kill_rounds", "rifle_kills", "sniper_kills", "smg_kills", "pistol_kills", "grenade_kills", "other_kills"]

#Iterate over the columns and perform division
for col in cols_label_Divide:
    df[col] = df[col] / df["rounds_played"]
   
print(df.iloc[0])

age                                        21
country                                France
headshot_percentage                     41.2%
damage_per_round                         88.0
grenade_dmg_per_round                     3.6
maps_played                              1020
rounds_played                           26803
kills_per_death                          1.37
kills_per_round                          0.84
assists_per_round                        0.12
deaths_per_round                         0.61
saved_by_teammate_per_round               0.1
saved_teammates_per_round                0.12
rounds_with_kills                       14196
opening_kill_ratio                        1.6
team_win_percent_after_first_kill       73.1%
first_kill_in_won_rounds                21.4%
0_kill_rounds                        0.470358
1_kill_rounds                        0.305749
2_kill_rounds                        0.152483
3_kill_rounds                        0.054994
4_kill_rounds                     

# 🌎 General ML models
In this section, we create an experiment to evaluate different models. To this end, we require to perform the following steps:
1. Create an experiment object
2. Create a setup dictionary and setup the environment
3. perfomt the model comparison
4. evaluate model to observe the initial performance

In [35]:
# Define PyCaret classificaiton experimetn
obj_exp = RegressionExperiment()

In [36]:
# Configure and build the setup environment
steup_dict= {
                "data": df,
                "fold": 10,
                "target": "rating",
                "index": True,
                "session_id": 420,
                "experiment_name": "experiment_csgo_initial",
                "normalize": True,
                "normalize_method": "zscore",
                "verbose": True
            }

obj_exp.setup(**steup_dict)

Unnamed: 0,Description,Value
0,Session id,420
1,Target,rating
2,Target type,Regression
3,Original data shape,"(811, 30)"
4,Transformed data shape,"(811, 30)"
5,Transformed train set shape,"(567, 30)"
6,Transformed test set shape,"(244, 30)"
7,Numeric features,25
8,Categorical features,4
9,Preprocess,True


<pycaret.regression.oop.RegressionExperiment at 0x2814d5c90>

In [37]:
# Perform the model comparison
best = obj_exp.compare_models()

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lr,Linear Regression,0.0192,0.0006,0.0234,0.8974,0.0116,0.0187,0.26
ridge,Ridge Regression,0.0192,0.0006,0.0235,0.8966,0.0116,0.0187,0.009
br,Bayesian Ridge,0.0193,0.0006,0.0236,0.8958,0.0117,0.0188,0.011
huber,Huber Regressor,0.0195,0.0006,0.0239,0.8929,0.0118,0.019,0.016
gbr,Gradient Boosting Regressor,0.0198,0.0006,0.0253,0.8803,0.0125,0.0193,0.071
et,Extra Trees Regressor,0.0213,0.0007,0.0258,0.8762,0.0127,0.0207,0.043
lightgbm,Light Gradient Boosting Machine,0.0201,0.0007,0.026,0.8724,0.0128,0.0196,0.238
rf,Random Forest Regressor,0.0215,0.0007,0.0264,0.8693,0.013,0.0209,0.087
omp,Orthogonal Matching Pursuit,0.0214,0.0007,0.0269,0.8646,0.0132,0.0207,0.01
ada,AdaBoost Regressor,0.0273,0.0009,0.0304,0.8278,0.015,0.0267,0.041


In [38]:
obj_exp.evaluate_model(best)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…

# ✅ Best ML Model
In this section we use the ML model obtained from the previous step to:
1. Create the model
2. Tune the model
3. Finalize the model
4. Evaluate the model performance (metrics)