# Model Training

In [1]:
# Libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Dataset
total_data = pd.read_csv('../data/interim/clean_match_data.csv')
total_data['members_team_1'] = total_data['members_team_1'].apply(eval)
total_data['members_team_2'] = total_data['members_team_2'].apply(eval)
total_data.head()

Unnamed: 0,members_team_1,members_team_2,map,avg_adr_team_1,sum_assists_team_1,sum_deaths_team_1,sum_fkdiffs_team_1,sum_hs_team_1,mean_kdratio_team_1,sum_kills_team_1,avg_adr_team_2,sum_assists_team_2,sum_deaths_team_2,sum_fkdiffs_team_2,sum_hs_team_2,mean_kdratio_team_2,sum_kills_team_2,winning_team
0,"(168, 5737, 2492, 10814, 11247)","(5698, 10563, 2532, 7382, 5736)",Train,58.72,4.0,89.0,-6.0,24.0,0.47,48.0,92.0,15.0,48.0,6.0,36.0,0.82,89.0,2
1,"(483, 484, 2757, 3347, 7594)","(2469, 7398, 7592, 4954, 429)",Dust2,72.598373,21.0,54.0,12.0,32.0,0.69258,89.0,75.954746,15.0,90.0,-12.0,25.0,0.709509,54.0,1
2,"(10565, 11302, 10795, 10797, 10798)","(5698, 10563, 2532, 7382, 5736)",Cache,62.88,11.0,96.0,-5.0,28.0,0.536,66.0,81.04,17.0,66.0,5.0,33.0,0.792,96.0,2
3,"(12272, 10897, 12102, 8493, 11230)","(9571, 10372, 8708, 9705, 9069)",Mirage,69.66,13.0,102.0,3.0,45.0,0.6374,86.0,79.9,26.0,86.0,-3.0,51.0,0.6892,102.0,2
4,"(483, 484, 2757, 3347, 7594)","(338, 7796, 472, 1866, 7403)",Train,87.8,19.0,65.0,3.0,39.0,0.8176,96.0,66.64,18.0,96.0,-3.0,31.0,0.5826,65.0,1


In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, Dense, Concatenate, Embedding, Flatten, concatenate, Dropout

X = total_data[[
    'members_team_1',
    'members_team_2',
    'map',  
    'avg_adr_team_1',
    'sum_assists_team_1', 
    'sum_deaths_team_1', 
    'sum_fkdiffs_team_1',
    'sum_hs_team_1', 
    'mean_kdratio_team_1', 
    'sum_kills_team_1',     
    'avg_adr_team_2', 
    'sum_assists_team_2',
    'sum_deaths_team_2', 
    'sum_fkdiffs_team_2', 
    'sum_hs_team_2',
    'mean_kdratio_team_2', 
    'sum_kills_team_2'
]]

y = total_data['winning_team']

numerical_features = [
    'avg_adr_team_1',
    'sum_assists_team_1', 
    'sum_deaths_team_1', 
    'sum_fkdiffs_team_1',
    'sum_hs_team_1', 
    'mean_kdratio_team_1', 
    'sum_kills_team_1',     
    'avg_adr_team_2', 
    'sum_assists_team_2',
    'sum_deaths_team_2', 
    'sum_fkdiffs_team_2', 
    'sum_hs_team_2',
    'mean_kdratio_team_2', 
    'sum_kills_team_2']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=69, stratify=y)








In [3]:

from pickle import dump

X_train_frozenset_1 = X_train[['members_team_1']]
X_test_frozenset_1 = X_test[['members_team_1']]

X_train_frozenset_2 = X_train[['members_team_2']]
X_test_frozenset_2 = X_test[['members_team_2']]

# Standardize numerical features
scaler = StandardScaler()
X_train[numerical_features] = scaler.fit_transform(X_train[numerical_features])
X_test[numerical_features] = scaler.transform(X_test[numerical_features])

# One-hot encode categorical features
encoder_categorical = OneHotEncoder(sparse_output=False, drop='first')
X_train['map'] = encoder_categorical.fit_transform(X_train[['map']])
X_test['map'] = encoder_categorical.transform(X_test[['map']])

# Determine the number of categories dynamically
num_categories = len(total_data['members_team_1'].unique()) + len(total_data['members_team_2'].unique())

# Use a hash function to map frozensets to integers
hash_function = lambda x: hash(x)
X_train['members_team_1'] = X_train_frozenset_1['members_team_1'].apply(hash_function)
X_test['members_team_1'] = X_test_frozenset_1['members_team_1'].apply(hash_function)

X_train['members_team_2'] = X_train_frozenset_2['members_team_2'].apply(hash_function)
X_test['members_team_2'] = X_test_frozenset_2['members_team_2'].apply(hash_function)

# Standardize numerical features
X_train[['members_team_1', 'members_team_2']] = scaler.fit_transform(X_train[['members_team_1', 'members_team_2']])
X_test[['members_team_1', 'members_team_2']] = scaler.transform(X_test[['members_team_1', 'members_team_2']])

# Save scalers
with open("../models/scalers/onehot_scaler.pkl", "wb") as f:
    dump(encoder_categorical, f)

with open("../models/scalers/standard_scaler.pkl", "wb") as f:
    dump(scaler, f)


In [4]:
# Input layers for each branch
input1 = Input(shape=(2,))
input2 = Input(shape=(len(X_train.columns)-2,))

dense_layer1 = Dense(64, activation='relu')(input1)
flatten1 = Flatten()(dense_layer1)

# Dense layer for the second input
dense_layer2 = Dense(32, activation='relu')(input2)
flatten2 = Flatten()(dense_layer2)

# Add more hidden layers
dense_layer3 = Dense(64, activation='relu')(flatten1)
dense_layer4 = Dense(64, activation='relu')(flatten2)

dropout1 = Dropout(0.5)(dense_layer3)
dropout2 = Dropout(0.5)(dense_layer4)

concatenated = concatenate([dropout1, dropout2])

# Additional layers for further processing
dense1 = Dense(128, activation='relu')(concatenated)
output_layer = Dense(1, activation='sigmoid')(dense1)

# Create the model
model = Model(inputs=[input1, input2], outputs=output_layer)

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Display the model summary
model.summary()



Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 2)]                  0         []                            
                                                                                                  
 input_2 (InputLayer)        [(None, 15)]                 0         []                            
                                                                                                  
 dense (Dense)               (None, 64)                   192       ['input_1[0][0]']             
                                                                                                  
 dense_1 (Dense)             (None, 32)                   512       ['input_2[0][0]']             
                                                                                            

In [5]:


# Train the model
model.fit([X_train[['members_team_1','members_team_2']], pd.concat([X_train['map'], X_train[numerical_features]], axis=1)], y_train, epochs=20, batch_size=32, validation_split=0.1)



Epoch 1/20


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x1c610092150>

In [6]:
# Evaluate the model on the test set
loss = model.evaluate([X_test[['members_team_1','members_team_2']], pd.concat([X_test['map'], X_test[numerical_features]], axis=1)], y_test)
print(f'Mean Squared Error on Test Set: {loss}')

Mean Squared Error on Test Set: [-3620518756352.0, 0.504741907119751]


In [7]:
from pickle import dump

with open("../models/csgo_match_results_predictor-beta1.pkl", "wb") as f:
    dump(model, f)

model.save("../models/csgo_match_results_predictor-beta1.h5")

  saving_api.save_model(


In [8]:
temp = X_test.iloc[0]

len(temp)

17

In [9]:
temp = X_test.iloc[0]

temp[2:len(temp)] = 0.0

input1_data = np.array([[-0.025245, 0.959970]])  
input2_data = np.array([temp.iloc[2:].values])  

predictions = model.predict([input1_data, input2_data])

# predictions 

predictions

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp[2:len(temp)] = 0.0




array([[1.]], dtype=float32)

In [10]:
total_data.head()

Unnamed: 0,members_team_1,members_team_2,map,avg_adr_team_1,sum_assists_team_1,sum_deaths_team_1,sum_fkdiffs_team_1,sum_hs_team_1,mean_kdratio_team_1,sum_kills_team_1,avg_adr_team_2,sum_assists_team_2,sum_deaths_team_2,sum_fkdiffs_team_2,sum_hs_team_2,mean_kdratio_team_2,sum_kills_team_2,winning_team
0,"(168, 5737, 2492, 10814, 11247)","(5698, 10563, 2532, 7382, 5736)",Train,58.72,4.0,89.0,-6.0,24.0,0.47,48.0,92.0,15.0,48.0,6.0,36.0,0.82,89.0,2
1,"(483, 484, 2757, 3347, 7594)","(2469, 7398, 7592, 4954, 429)",Dust2,72.598373,21.0,54.0,12.0,32.0,0.69258,89.0,75.954746,15.0,90.0,-12.0,25.0,0.709509,54.0,1
2,"(10565, 11302, 10795, 10797, 10798)","(5698, 10563, 2532, 7382, 5736)",Cache,62.88,11.0,96.0,-5.0,28.0,0.536,66.0,81.04,17.0,66.0,5.0,33.0,0.792,96.0,2
3,"(12272, 10897, 12102, 8493, 11230)","(9571, 10372, 8708, 9705, 9069)",Mirage,69.66,13.0,102.0,3.0,45.0,0.6374,86.0,79.9,26.0,86.0,-3.0,51.0,0.6892,102.0,2
4,"(483, 484, 2757, 3347, 7594)","(338, 7796, 472, 1866, 7403)",Train,87.8,19.0,65.0,3.0,39.0,0.8176,96.0,66.64,18.0,96.0,-3.0,31.0,0.5826,65.0,1
