### Important Libraries

In [1041]:
# Python Libraries
import random
import math
import numbers
import platform
import copy
import os
import time
import re
import pickle

# Importing essential libraries for basic image manipulations.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import PIL

import torch
import torch.nn.functional as F
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision.io import read_image
import torchvision.transforms as transforms
import torchvision.transforms.functional as tF
import torchvision.models as models
from sklearn.model_selection import train_test_split

In [1042]:
%matplotlib inline

# Enable/Disable GPU 
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

### Functions for transforming CNN annotation_file data to inputable data for DNN

In [1043]:
def format_for_dnn(champion_label_df, champion_label_df_count):

    s_list = []
    o_list = []

    for i in range(len(champion_label_df_count)):
        youtuber = champion_label_df_count.iloc[i, :].youtuber
        video_name = champion_label_df_count.iloc[i, :].video_name
        frame_name = champion_label_df_count.iloc[i, :].frame_name

        subdf = champion_label_df[(champion_label_df.youtuber == youtuber)&(champion_label_df.video_name == video_name)&(champion_label_df.frame_name == frame_name)]
        subdf = subdf.loc[:, ['cropped_name', 'predicted']]


        input_s = np.zeros(shape=(28,))
        input_o = np.zeros(shape=(28,))

        for j in range(len(subdf)):
            cropped_name = subdf.iloc[j, :].cropped_name
            predicted = subdf.iloc[j, :].predicted


            # Find which player
            player_type = re.search(r'S|O', cropped_name)
            if player_type:
                player_type = player_type[0]
            else:
                print('something wrong')
            
            champ_onehot = master_champ_list.index(predicted)


            # Find which board index
            board_index = re.search(r'\d+', cropped_name)
            if board_index:
                board_index = int(board_index[0])
            else:
                print('something wrong')

            if player_type == 'S':
                input_s[board_index] = champ_onehot
            elif player_type == 'O':
                input_o[board_index] = champ_onehot

        input_s = input_s.reshape((4, 7)).astype(int)
        input_o = input_o.reshape((4, 7)).astype(int)
        s_list.append(input_s)
        o_list.append(input_o)

    s, o = np.array(s_list), np.array(o_list)

    return s, o

In [1044]:
def before_customimagedataset(s, o, y):
    n = len(s)
    # Proportion of test set size
    test_size = 0.5
    # Calculate where to split
    test_start_idx = int(np.ceil(test_size * n))
    # All indices of data
    indices = np.arange(0, n)
    # Shuffle indices array
    np.random.shuffle(indices)


    train_indices = indices[:test_start_idx]
    test_indices = indices[test_start_idx:]

    s_train = s[train_indices]
    s_test = s[test_indices]
    o_train = o[train_indices]
    o_test = o[test_indices]

    y_train = y[train_indices]
    y_test = y[test_indices]

    return s_train, s_test, o_train, o_test, y_train, y_test

In [1045]:
class CustomImageDataset(Dataset):
    def __init__(self, s, o, y):
        
        self.s = s
        self.o = o
        self.y = y
        
    def __len__(self):
        return len(self.s)

    def __getitem__(self, idx):

        board_s = self.s[idx]
        board_o = self.o[idx]
        label = self.y[idx]

        return board_s, board_o, label

In [1046]:
embedding = nn.Embedding(10, 3)
# a batch of 2 samples of 4 indices each
input = torch.LongTensor([[1,2,4,5],[4,3,2,9]])
embedding(input)

tensor([[[ 0.9947, -0.4180,  0.0167],
         [ 0.6942,  0.5163,  1.5398],
         [-0.7825, -1.4319, -0.0774],
         [-1.6815, -1.4351,  0.0784]],

        [[-0.7825, -1.4319, -0.0774],
         [-0.9028, -1.6391,  0.5435],
         [ 0.6942,  0.5163,  1.5398],
         [-0.3060,  1.1971,  2.3434]]], grad_fn=<EmbeddingBackward0>)

### WIN PREDICTOR NET

In [1047]:
ny, nx = 10, 10
x, y = np.arange(nx), np.arange(ny)
np.meshgrid(x, y)

[array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]]),
 array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
        [3, 3, 3, 3, 3, 3, 3, 3, 3, 3],
        [4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
        [5, 5, 5, 5, 5, 5, 5, 5, 5, 5],
        [6, 6, 6, 6, 6, 6, 6, 6, 6, 6],
        [7, 7, 7, 7, 7, 7, 7, 7, 7, 7],
        [8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
        [9, 9, 9, 9, 9, 9, 9, 9, 9, 9]])]

In [1048]:
# Function takes in cmp which is 2 dimensional 
# [ [10, 8],
#   [84, 3] ]
# Where i, j is the one hot representation of the champions on ith row, jth col
# trained_vec is a dictionary that convert each champions into a vector formate of size 2

def pretrain_init(cmp, champ2vec):
    
    def vectoring_champ(x):
        return champ2vec.get(x)
    
    return np.vectorize(vectoring_champ)(cmp)

In [1049]:
class Win_Predictor_Net(nn.Module):
    def __init__(self, criterion, 
                 cmp_size = 85, embedding_size = 4, hidden_size = 10):
        super(Win_Predictor_Net, self).__init__()

        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.cmp_size = cmp_size
        self.criterion = criterion
        # Embeddings to learn for champions 
        self.layer_cmp_emb = nn.Embedding(
            num_embeddings=self.cmp_size+1,
            embedding_dim=self.embedding_size,
            padding_idx=85)#the onehot representation for background (check later)
        
        self.layer_w_0_s = nn.Linear(
            in_features=self.embedding_size*28,
            out_features=self.hidden_size,
            bias=True)
        
        self.layer_w_0_o = nn.Linear(
            in_features=self.embedding_size*28,
            out_features=self.hidden_size,
            bias=True)

        self.layer_w_1 = nn.Linear(
            in_features=2*self.hidden_size,
            out_features=1,
            bias=True)
        
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, scmp, ocmp, pretrain = False):

        # champion level embedings
        if not pretrain:
            E_self = self.layer_cmp_emb(scmp)
            E_opp = self.layer_cmp_emb(ocmp)
        else:
            E_self = pretrain_init(scmp)
            E_opp = pretrain_init(ocmp)
            
        # SELF SIDE
        s = E_self.view(-1, self.embedding_size)# (28 x self.embedding_size)
        s = torch.flatten(s) #(28 x self.embedding_size, 1)
        s = torch.tanh(self.layer_w_0_s(s))# (28 x self.hidden_size)

        # OPPONENT SIDE
        o = E_opp.view(-1, self.embedding_size)
        o = torch.flatten(o) #(28 x self.embedding_size, 1)
        o = torch.tanh(self.layer_w_0_o(o))
        
        # concat SELF AND OPPONENT
        concat = torch.cat((s, o), axis = 0) # (2 x 28 x self.embedding_size,1)

        x = self.layer_w_1(concat)

        x = torch.tanh(x)
        x = self.sigmoid(x)
        
        return x.float()

In [1050]:
def train_model(model, dataloaders, optimizer, num_epochs=25):
    
    since = time.time()
    acc_list = []
    model.train() # In training mode

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        running_loss = 0.0
        running_corrects = 0

        # Iterate over data.
        for inputs_s, inputs_o, labels in dataloaders:
            inputs_s = inputs_s.to(device)
            inputs_o = inputs_o.to(device)
            labels = labels.to(device).float()

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward
            outputs = model(inputs_s, inputs_o)
            loss = model.criterion(outputs, labels)
            
            preds = torch.round(outputs)

            # backward + optimize only if in training phase
            loss.backward()
            optimizer.step()

            # statistics
            running_loss += loss.item() * inputs_s.size(0)
            running_corrects += torch.sum(preds == labels.data)

        # Epoch information
        epoch_loss = running_loss / len(dataloaders.dataset)
        epoch_acc = running_corrects.double() / len(dataloaders.dataset)
        acc_list.append(epoch_acc)

        print('Training Loss: {:.4f} Acc: {:.4f}'.format(epoch_loss, epoch_acc))


    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    
    return acc_list

In [1051]:
def eval_model(model, dataloaders):
    
    since = time.time()
    model.eval() # In training mode

    running_loss = 0.0
    running_corrects = 0

    # Iterate over data.
    for inputs_s, inputs_o, labels in dataloaders:
        inputs_s = inputs_s.to(device)
        inputs_o = inputs_o.to(device)
        labels = labels.to(device).float()
        
        with torch.no_grad():

            # forward
            outputs = model(inputs_s, inputs_o)
            loss = model.criterion(outputs, labels)

            preds = torch.round(outputs)

            # statistics
            running_loss += loss.item() * inputs_s.size(0)
            running_corrects += torch.sum(preds == labels.data)

    overall_loss = running_loss / len(dataloaders.dataset)
    overall_acc = running_corrects.double() / len(dataloaders.dataset)

    print('Evaluation Loss: {:.4f} Acc: {:.4f}'.format(overall_loss, overall_acc))
    

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    
    return overall_acc

## Make data inputable:
- input data should be (n, 4, 7)

In [1052]:
# Data with predicted labels:
#   useful columns: 
#           youtuber --> (only Mortdog for now)
#           video_name
#           frame_name
#           cropped_name --> for positional value
#           predicted --> for converting to one-hot

champion_label_df = pd.read_csv(os.path.join(os.getcwd(), 'data', 'Champion_Label_FINAL.csv'))


with open("data/master_champ_list.pkl", "rb") as input_file:
        master_champ_list = pickle.load(input_file)

In [1070]:
# Work on only Mortdog data
champion_label_df = champion_label_df[champion_label_df.youtuber == 'Mortdog']

# Figure out the youtuber, video_name, frame_name keys that are valid for the input
champion_label_df_count = champion_label_df[champion_label_df.youtuber == 'Mortdog'].groupby(['youtuber', 'video_name', 'frame_name']).size().reset_index(name='img_count')
champion_label_df_count = champion_label_df_count[champion_label_df_count.img_count == 56]

champion_label_df_count['y'] = np.nan

champion_label_df_count = champion_label_df_count.loc[:, ['youtuber', 'video_name', 'frame_name', 'y']]

# # Change data so it fits the DNN
# s, o = format_for_dnn(champion_label_df, champion_label_df_count)

# y = champion_label_df_count['y']# NEED TO LABEL AT SOME POINT



# # # Uncomment if you need to relabel the win/loss
# # path_to_win_loss_label = os.path.join(os.getcwd(), 'data', 'win_loss_label.csv')
# # champion_label_df_count.to_csv(path_to_win_loss_label, index = False)

### DELETE CODE CHUNK BELOW WHEN IT 100% WORKS

In [1088]:
# Work on only Mortdog data
sample_df = champion_label_df[champion_label_df.youtuber == 'Mortdog'][0:56*100]

# Figure out the youtuber, video_name, frame_name keys that are valid for the input
sample_df_count = sample_df[sample_df.youtuber == 'Mortdog'].groupby(['youtuber', 'video_name', 'frame_name']).size().reset_index(name='img_count')
sample_df_count = sample_df_count[sample_df_count.img_count == 56]

sample_df_count = sample_df_count.loc[:, ['youtuber', 'video_name', 'frame_name']]

s, o = format_for_dnn(sample_df, sample_df_count)

y = np.round(np.random.random(len(s))).astype(int)

### Data Loader

In [1089]:
batch_size = 1

s_train, s_test, o_train, o_test, y_train, y_test = before_customimagedataset(s, o, y)

trainset = CustomImageDataset(s_train, o_train, y_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size, num_workers=0, shuffle=False)

testset = CustomImageDataset(s_test, o_test, y_test)
testloader = torch.utils.data.DataLoader(testset, batch_size, num_workers=0, shuffle=False)

### Train

In [1090]:
win_predictor = Win_Predictor_Net(nn.BCELoss())

In [1091]:
optimizer_SGD = torch.optim.SGD(win_predictor.parameters(), lr=0.01, momentum=0.9)

In [1096]:
train_model(win_predictor, trainloader, optimizer_SGD, 5)

Epoch 0/4
----------
Training Loss: 0.3159 Acc: 1.0000
Epoch 1/4
----------
Training Loss: 0.3157 Acc: 1.0000
Epoch 2/4
----------
Training Loss: 0.3155 Acc: 1.0000
Epoch 3/4
----------
Training Loss: 0.3154 Acc: 1.0000
Epoch 4/4
----------
Training Loss: 0.3152 Acc: 1.0000
Training complete in 0m 0s


[tensor(1., dtype=torch.float64),
 tensor(1., dtype=torch.float64),
 tensor(1., dtype=torch.float64),
 tensor(1., dtype=torch.float64),
 tensor(1., dtype=torch.float64)]

### Eval

In [1097]:
eval_model(win_predictor, testloader)

Evaluation Loss: 0.9011 Acc: 0.4000
Training complete in 0m 0s


tensor(0.4000, dtype=torch.float64)

Unnamed: 0,youtuber,video_name,frame_name,img_count
0,Mortdog,100 HP Challenger - Nerfed but still so good ...,frame11880.jpg,56
1,Mortdog,100 HP Challenger - Nerfed but still so good ...,frame13710.jpg,56
2,Mortdog,100 HP Challenger - Nerfed but still so good ...,frame15480.jpg,56
3,Mortdog,100 HP Challenger - Nerfed but still so good ...,frame17220.jpg,56
4,Mortdog,100 HP Challenger - Nerfed but still so good ...,frame1800.jpg,56
