In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_squared_error
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
import pickle

# Read match data and remove ID
match_data = pd.read_csv("match_data.csv")
match_data = match_data.drop(columns=['ID'])

champions = ['Top', 'Jungle', 'Mid', 'Bot', 'Support', 'Enemy Top', 'Enemy Jungle', 'Enemy Mid', 'Enemy Bot', 'Enemy Support']
champion_data = match_data[champions]
in_game_attributes = ['Team Gold', 'Team Kills', 'Team Deaths', 'Dragons', 'Dragon Soul', 'Baron', 'Rift Herald', 'First tower', 'First Kill', 'Team Total CC']
game_data = match_data[in_game_attributes].astype(int)
display(champion_data)
display(game_data)
scaler = StandardScaler()
champions_encoded = pd.get_dummies(champion_data, columns=champions)
display(champions_encoded)

Unnamed: 0,Top,Jungle,Mid,Bot,Support,Enemy Top,Enemy Jungle,Enemy Mid,Enemy Bot,Enemy Support
0,Singed,Taliyah,Gangplank,Zeri,Renata,Riven,Graves,Swain,Twitch,Lulu
1,Riven,Graves,Swain,Twitch,Lulu,Singed,Taliyah,Gangplank,Zeri,Renata
2,Akali,Maokai,Yasuo,Ezreal,Pyke,Gangplank,Kindred,Sylas,Aphelios,Lulu
3,Gangplank,Kindred,Sylas,Aphelios,Lulu,Akali,Maokai,Yasuo,Ezreal,Pyke
4,Jayce,Elise,Sylas,Lucian,Sona,Karma,Ekko,Anivia,Caitlyn,Nami
...,...,...,...,...,...,...,...,...,...,...
16641,Vi,Alistar,Teemo,Zilean,Nilah,Gangplank,Khazix,Malphite,Rengar,KogMaw
16642,Renekton,Karthus,Yasuo,Caitlyn,Lulu,Camille,Viego,Sylas,Jhin,Lux
16643,Camille,Viego,Sylas,Jhin,Lux,Renekton,Karthus,Yasuo,Caitlyn,Lulu
16644,Fiora,Elise,Varus,Syndra,Alistar,Sett,Nidalee,Yasuo,Sivir,Nautilus


Unnamed: 0,Team Gold,Team Kills,Team Deaths,Dragons,Dragon Soul,Baron,Rift Herald,First tower,First Kill,Team Total CC
0,50485,22,41,0,0,0,0,0,0,114
1,59860,41,22,4,1,1,2,1,1,122
2,49125,22,38,0,0,1,2,1,1,110
3,52775,38,22,3,0,0,0,0,0,108
4,34785,20,4,1,0,0,2,1,1,32
...,...,...,...,...,...,...,...,...,...,...
16641,61370,52,35,0,0,0,0,1,1,149
16642,2500,0,0,0,0,0,0,0,0,0
16643,2450,0,0,0,0,0,0,0,0,0
16644,21685,7,21,0,0,0,0,0,0,62


Unnamed: 0,Top_Aatrox,Top_Ahri,Top_Akali,Top_Akshan,Top_Alistar,Top_Amumu,Top_Anivia,Top_Annie,Top_Aphelios,Top_Ashe,...,Enemy Support_Yone,Enemy Support_Yorick,Enemy Support_Yuumi,Enemy Support_Zac,Enemy Support_Zed,Enemy Support_Zeri,Enemy Support_Ziggs,Enemy Support_Zilean,Enemy Support_Zoe,Enemy Support_Zyra
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16641,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
16642,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
16643,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
16644,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Functions for FNN and dataset conversion in PyTorch

In [2]:
class create_dataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, index):
        return self.X[index], self.y[index]

def create_fnn(input_size, output_size, hidden_sizes):
    layers = []
    
    #input layer
    layers.append(nn.Linear(input_size, hidden_sizes[0]))
    layers.append(nn.ReLU())
    
    for i in range(1, len(hidden_sizes)):
        layers.append(nn.Linear(hidden_sizes[i-1], hidden_sizes[i]))
        layers.append(nn.ReLU())
    #hidden layers
    layers.append(nn.Linear(hidden_sizes[-1], output_size))
    #output layer
    model = nn.Sequential(*layers)
    
    return model

### Model Construction for Champions to Game Data

In [16]:

x_train, x_test, y_train_u, y_test_u = train_test_split(champions_encoded, game_data, test_size=0.3, random_state=40)
y_train = scaler.fit_transform(y_train_u)
y_test = scaler.fit_transform(y_test_u)

x_train = torch.tensor(np.array(x_train), dtype=torch.float32)
y_train = torch.tensor(y_train.astype(np.float32), dtype=torch.float32)
x_test = torch.tensor(np.array(x_test), dtype=torch.float32)
y_test = torch.tensor(y_test.astype(np.float32), dtype=torch.float32)

# Cuda
device = torch.device("cuda")

model = create_fnn(input_size=x_train.shape[1], output_size=y_train.shape[1], hidden_sizes=[64, 32]).to(device)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

batch_size = 32
train_dataset = create_dataset(x_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

num_epochs = 2
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, targets in train_loader:
        inputs = inputs.to(device)
        targets = targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    epoch_loss = running_loss / len(train_dataset)
    print(f"Epoch {epoch+1}/{num_epochs}, Training Loss: {epoch_loss:.4f}")
    model.eval()
    with torch.no_grad():
        x_test = x_test.to(device)
        y_test = y_test.to(device)
        y_pred = model(x_test)
        mse = mean_squared_error(y_test.cpu().numpy(), y_pred.cpu().numpy())
        print("Mean Squared Error:", mse)



Epoch 1/2, Training Loss: 0.9252
Mean Squared Error: 0.8796854
Epoch 2/2, Training Loss: 0.8486
Mean Squared Error: 0.8745535


In [17]:
model.eval()
with torch.no_grad():
    x_test = x_test.to(device)
    y_test = y_test.to(device)
    y_pred = model(x_test)
    mse = mean_squared_error(y_test.cpu().numpy(), y_pred.cpu().numpy())
    print("Mean Squared Error:", mse)

Mean Squared Error: 0.8745535


In [18]:
torch.save(model, 'fnn_num_model.pth')

## Testing Correlation to original test data

In [19]:
y_pred_unnorm = np.round(scaler.inverse_transform(y_pred.cpu().numpy())).astype(int)
np.set_printoptions(linewidth=np.inf)
y_pred_table = pd.DataFrame(y_pred_unnorm, columns=in_game_attributes)
display(y_pred_table)
display(y_test_u)

Unnamed: 0,Team Gold,Team Kills,Team Deaths,Dragons,Dragon Soul,Baron,Rift Herald,First tower,First Kill,Team Total CC
0,56579,38,38,1,0,0,0,1,0,110
1,46795,24,22,2,0,1,1,1,1,87
2,66877,53,51,1,0,0,0,1,0,128
3,62500,47,46,1,0,0,0,1,0,117
4,50077,29,24,2,0,1,1,1,1,88
...,...,...,...,...,...,...,...,...,...,...
4989,51743,31,31,1,0,0,1,1,1,105
4990,45886,24,24,1,0,0,1,0,1,129
4991,46798,24,22,2,0,1,1,1,1,93
4992,50442,27,23,2,0,1,1,1,1,129


Unnamed: 0,Team Gold,Team Kills,Team Deaths,Dragons,Dragon Soul,Baron,Rift Herald,First tower,First Kill,Team Total CC
7068,79200,44,42,3,0,0,0,1,1,174
13032,63945,34,21,3,0,1,2,1,0,122
7495,88580,72,57,0,0,0,0,1,1,181
13697,73500,67,44,0,0,0,0,1,0,150
11220,51225,27,27,3,0,0,1,1,1,91
...,...,...,...,...,...,...,...,...,...,...
11114,54225,29,34,2,0,0,1,1,1,107
7082,55350,38,34,1,0,1,1,1,0,160
12801,49400,34,26,2,0,0,1,0,1,89
701,60975,32,25,4,1,2,1,0,1,178


In [20]:
correlation_matrix = y_pred_table.corrwith(y_test_u)
print(correlation_matrix)

Team Gold        0.015714
Team Kills       0.032603
Team Deaths      0.026257
Dragons          0.008112
Dragon Soul           NaN
Baron           -0.001092
Rift Herald      0.011365
First tower      0.003619
First Kill       0.026876
Team Total CC   -0.047042
dtype: float64


In [21]:
miss_count = (y_pred_table['Dragon Soul'] == 0).sum()
miss_count2 = (y_test_u['Dragon Soul'] == 0).sum()
print(miss_count)
print(miss_count2)

4994
4592


### 2-Step Model Testing

In [28]:
import pickle
encoding_scheme = champions_encoded.columns
with open('random_forest_model.pkl', 'rb') as file:
    loaded_model = pickle.load(file)
with open("scaler.pkl", "wb") as file:
    pickle.dump(scaler, file)
with open("encoding_scheme.pkl", "wb") as file:
    pickle.dump(encoding_scheme, file)

In [23]:
encoding_scheme = champions_encoded.columns
new_input = pd.DataFrame(columns=encoding_scheme)
row_with_zeroes = pd.Series(0, index=encoding_scheme)

# Add the row to the new_input DataFrame
new_input = pd.concat([new_input, row_with_zeroes.to_frame().T], ignore_index=True)
input_dict = {
    'Enemy Top': 'Singed',
    'Enemy Jungle': 'Taliyah',
    'Enemy Mid': 'Gangplank',
    'Enemy Bot': 'Zeri',
    'Enemy Support': 'Renata',
    'Top': 'Riven',
    'Jungle': 'Graves',
    'Mid': 'Swain',
    'Bot': 'Twitch',
    'Support': 'Lulu'
}
df = pd.DataFrame(input_dict, index=[0])
encoded_input = pd.get_dummies(df)
intersect = new_input.columns.intersection(encoded_input.columns)
new_input[intersect] = 1
display(new_input)

Unnamed: 0,Top_Aatrox,Top_Ahri,Top_Akali,Top_Akshan,Top_Alistar,Top_Amumu,Top_Anivia,Top_Annie,Top_Aphelios,Top_Ashe,...,Enemy Support_Yone,Enemy Support_Yorick,Enemy Support_Yuumi,Enemy Support_Zac,Enemy Support_Zed,Enemy Support_Zeri,Enemy Support_Ziggs,Enemy Support_Zilean,Enemy Support_Zoe,Enemy Support_Zyra
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [24]:
model = torch.load('fnn_num_model.pth')
model.eval()
input_tensor = torch.tensor(np.array(new_input, dtype=np.float32), dtype=torch.float32)

if torch.cuda.is_available():
    input_tensor = input_tensor.cuda()
with torch.no_grad():
    output = model(input_tensor)
    
post_out = np.round(scaler.inverse_transform(output.cpu().numpy())).astype(int)
out_table = pd.DataFrame(post_out, columns=in_game_attributes)
display(out_table)

Unnamed: 0,Team Gold,Team Kills,Team Deaths,Dragons,Dragon Soul,Baron,Rift Herald,First tower,First Kill,Team Total CC
0,47617,25,24,2,0,1,1,1,1,119


In [25]:
output = loaded_model.predict(post_out)
print(output[0])
prob_out = loaded_model.predict_proba(post_out)[:, 1]
print("{:.2f}%".format(prob_out[0] * 100))

True
54.22%
