In [1]:
# Imports
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import MinMaxScaler

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

import warnings
warnings.filterwarnings("ignore")

In [2]:
# Craindo o dataframe do arquivo .csv dentro da pasta zip
df = pd.read_csv('csgo_round_snapshots.zip')
# Fazendo a leitura das 5 primeiras linhas
df.head()

Unnamed: 0,time_left,ct_score,t_score,map,bomb_planted,ct_health,t_health,ct_armor,t_armor,ct_money,...,t_grenade_flashbang,ct_grenade_smokegrenade,t_grenade_smokegrenade,ct_grenade_incendiarygrenade,t_grenade_incendiarygrenade,ct_grenade_molotovgrenade,t_grenade_molotovgrenade,ct_grenade_decoygrenade,t_grenade_decoygrenade,round_winner
0,175.0,0.0,0.0,de_dust2,False,500.0,500.0,0.0,0.0,4000.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,CT
1,156.03,0.0,0.0,de_dust2,False,500.0,500.0,400.0,300.0,600.0,...,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,CT
2,96.03,0.0,0.0,de_dust2,False,391.0,400.0,294.0,200.0,750.0,...,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,CT
3,76.03,0.0,0.0,de_dust2,False,391.0,400.0,294.0,200.0,750.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,CT
4,174.97,1.0,0.0,de_dust2,False,500.0,500.0,192.0,0.0,18350.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,CT


In [3]:
# Alterando a coluna de bomba plantada para 0 e 1
df['bomb_planted'] = df.bomb_planted.map(lambda x: x *1)
# Selecionando apenas dados de bomba plantada
df = df.query('bomb_planted == 1')
# Dummizando os mapas
df = pd.get_dummies(df)
# Fazendo a leitura das 5 primeiras linhas
df.head()

Unnamed: 0,time_left,ct_score,t_score,bomb_planted,ct_health,t_health,ct_armor,t_armor,ct_money,t_money,...,map_de_cache,map_de_dust2,map_de_inferno,map_de_mirage,map_de_nuke,map_de_overpass,map_de_train,map_de_vertigo,round_winner_CT,round_winner_T
20,29.27,2.0,1.0,1,379.0,99.0,487.0,93.0,2050.0,100.0,...,0,1,0,0,0,0,0,0,1,0
26,22.7,3.0,1.0,1,100.0,149.0,100.0,175.0,1050.0,1250.0,...,0,1,0,0,0,0,0,0,0,1
33,37.38,3.0,2.0,1,300.0,414.0,300.0,441.0,250.0,6300.0,...,0,1,0,0,0,0,0,0,0,1
34,17.38,3.0,2.0,1,300.0,414.0,300.0,441.0,250.0,6300.0,...,0,1,0,0,0,0,0,0,0,1
39,24.84,3.0,3.0,1,67.0,0.0,96.0,0.0,2150.0,0.0,...,0,1,0,0,0,0,0,0,1,0


In [4]:
# Visualizando as informações do data frame
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 13684 entries, 20 to 122405
Columns: 105 entries, time_left to round_winner_T
dtypes: float64(94), int64(1), uint8(10)
memory usage: 10.2 MB


In [5]:
# Separando as variaveis explicativas
X = df.drop(columns=['round_winner_CT'])
# Separando as variaveis dependentes
y = df.round_winner_CT

In [6]:
# Separando o dataframe em treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1)

In [7]:
# Criando o modelo
model = LogisticRegression(random_state=1)
print("------------------------------ Treinando Modelo ------------------------------")
# Treinando o modelo
model.fit(X_train, y_train)
print("------------------------------- Modelo treinado ------------------------------")
# Prevendo as probabilidades do modelo
y_pred = model.predict_proba(X_test)
print("--------------------------- Probablidades definidas --------------------------")

------------------------------ Treinando Modelo ------------------------------
------------------------------- Modelo treinado ------------------------------
--------------------------- Probablidades definidas --------------------------


In [8]:
# Extraíndo os valores de probabilidade da lista, colocando no formato de porcentagem com duas casas depois da vírgula
y_predict = [round(pred[1]*100, 2) for pred in y_pred]

In [9]:
# Criando um data frame comparando os round em que o retake foi bem-sucedido (0) ou não (1), assim como sua respectiva probabilidade de sucesso.
data = {'round_winner_CT': y_test, 'predict_in_pct': y_predict}
result = pd.DataFrame(data)
result.sample(10)

Unnamed: 0,round_winner_CT,predict_in_pct
60727,0,25.88
58197,0,2.23
46895,0,24.61
1085,0,8.32
19485,0,55.91
115204,0,12.5
67694,0,6.08
7055,0,0.44
93913,0,0.11
53820,0,0.13


In [10]:
# A acurácia do modelo
print(f'A acurácia do modelo foi de {round(model.score(X_test, y_test)*100, 2)}%')

A acurácia do modelo foi de 90.62%
