### Important Libraries

In [4]:
# Python Libraries
import random
import math
import numbers
import platform
import copy
import os
import time
import re
import pickle

# Importing essential libraries for basic image manipulations.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import PIL

import torch
import torch.nn.functional as F
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision.io import read_image
import torchvision.transforms as transforms
import torchvision.transforms.functional as tF
import torchvision.models as models
from sklearn.model_selection import train_test_split

In [5]:
%matplotlib inline

# Enable/Disable GPU 
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

### Functions for transforming CNN annotation_file data to inputable data for DNN

In [6]:
def format_for_dnn(champion_label_df, champion_label_df_count):

    s_list = []
    o_list = []
    y_list = []

    for i in range(len(champion_label_df_count)):
        youtuber = champion_label_df_count.iloc[i, :].youtuber
        video_name = champion_label_df_count.iloc[i, :].video_name
        frame_name = champion_label_df_count.iloc[i, :].frame_name
        outcome = champion_label_df_count.iloc[i, :].outcome

        subdf = champion_label_df[(champion_label_df.youtuber == youtuber)&(champion_label_df.video_name == video_name)&(champion_label_df.frame_name == frame_name)]
        subdf = subdf.loc[:, ['cropped_name', 'predicted']]


        input_s = np.zeros(shape=(28,))
        input_o = np.zeros(shape=(28,))

        y_list.append(outcome)

        for j in range(len(subdf)):
            cropped_name = subdf.iloc[j, :].cropped_name
            predicted = subdf.iloc[j, :].predicted


            # Find which player
            player_type = re.search(r'S|O', cropped_name)
            if player_type:
                player_type = player_type[0]
            else:
                print('something wrong')
            
            champ_onehot = master_champ_list.index(predicted)


            # Find which board index
            board_index = re.search(r'\d+', cropped_name)
            if board_index:
                board_index = int(board_index[0])
            else:
                print('something wrong')

            if player_type == 'S':
                input_s[board_index] = champ_onehot
            elif player_type == 'O':
                input_o[board_index] = champ_onehot

        input_s = input_s.reshape((4, 7)).astype(int)
        input_o = input_o.reshape((4, 7)).astype(int)
        s_list.append(input_s)
        o_list.append(input_o)

    s, o, y = np.array(s_list), np.array(o_list), np.array(y_list)

    return s, o, y

In [7]:
def before_customimagedataset(s, o, y, test_size):
    n = len(s)
    # Calculate where to split
    test_start_idx = int(np.ceil(test_size * n))
    # All indices of data
    indices = np.arange(0, n)
    # Shuffle indices array
    np.random.shuffle(indices)


    train_indices = indices[test_start_idx:]
    test_indices = indices[:test_start_idx]

    s_train = s[train_indices]
    s_test = s[test_indices]
    o_train = o[train_indices]
    o_test = o[test_indices]

    y_train = y[train_indices]
    y_test = y[test_indices]

    return s_train, s_test, o_train, o_test, y_train, y_test

In [8]:
class CustomImageDataset(Dataset):
    def __init__(self, s, o, y):
        
        self.s = s
        self.o = o
        self.y = y
        
    def __len__(self):
        return len(self.s)

    def __getitem__(self, idx):

        board_s = self.s[idx]
        board_o = self.o[idx]
        label = self.y[idx]

        return board_s, board_o, label

### WIN PREDICTOR NET

In [9]:
# Function takes in cmp which is 2 dimensional 
# [ [10, 8],
#   [84, 3] ]
# Where i, j is the one hot representation of the champions on ith row, jth col
# trained_vec is a dictionary that convert each champions into a vector formate of size 2

def pretrain_init(cmp, champ2vec):
    
    def vectoring_champ(x):
        return champ2vec.get(x)
    
    return np.vectorize(vectoring_champ)(cmp)

In [10]:
class Win_Predictor_Net(nn.Module):
    def __init__(self, criterion, 
                 cmp_size = 85, embedding_size = 4, hidden_size_1 = 25, hidden_size_2 = 10, pre_train = False):
        super(Win_Predictor_Net, self).__init__()

        self.embedding_size = embedding_size
        self.hidden_size_1 = hidden_size_1
        self.hidden_size_2 = hidden_size_2
        self.cmp_size = cmp_size
        self.criterion = criterion
        
        # Embeddings to learn for champions 
        self.layer_cmp_emb = nn.Embedding(
            num_embeddings=self.cmp_size+1,
            embedding_dim=self.embedding_size,
            padding_idx=85)#the onehot representation for background (check later)
        
        # Embeddings to learn for champions 
        self.layer_cmp_emb = nn.Embedding(
            num_embeddings=self.cmp_size+1,
            embedding_dim=self.embedding_size,
            padding_idx=85)#the onehot representation for background (check later)



        self.layer_w_0_s = nn.Linear(
            in_features=self.embedding_size*28,
            out_features=self.hidden_size_1,
            bias=True)
        
        self.layer_w_0_o = nn.Linear(
            in_features=self.embedding_size*28,
            out_features=self.hidden_size_1,
            bias=True)

        self.layer_w_1 = nn.Linear(
            in_features=2*self.hidden_size_1,
            out_features=2*self.hidden_size_2,
            bias=True)

        self.layer_w_2 = nn.Linear(
            in_features=2*self.hidden_size_2,
            out_features=1,
            bias=True)
        
        self.sigmoid = nn.Sigmoid()


        # Pretrained embeddings
        if pre_train:
            emb_train = np.load("data/emb_train.npy")
            self.layer_cmp_emb.weight.data.copy_(torch.from_numpy(emb_train))
            
    def forward(self, scmp, ocmp):

        # scmp --> torch.Size([1, 4, 7])
        # ocmp --> torch.Size([1, 4, 7])

        E_self = self.layer_cmp_emb(scmp)
        E_opp = self.layer_cmp_emb(ocmp)
        
        # scmp --> torch.Size([1, 4, 7, self.embedding_size])
        # ocmp --> torch.Size([1, 4, 7, self.embedding_size])

        # SELF SIDE
        s = E_self.view(-1, self.embedding_size)
        # s --> torch.Size([28, self.embedding_size])
        s = torch.flatten(s)
        # s --> torch.Size([28 x self.embedding_size])
        s = torch.nn.LeakyReLU()(self.layer_w_0_s(s))
        # s --> torch.Size([self.hidden_size_1])

        # OPPONENT SIDE
        o = E_opp.view(-1, self.embedding_size)
        # o --> torch.Size([28, self.embedding_size])
        o = torch.flatten(o)
        # o --> torch.Size([28 x self.embedding_size])
        o = torch.nn.LeakyReLU()(self.layer_w_0_o(o))
        # o --> torch.Size([self.hidden_size_1])
        
        # concat SELF AND OPPONENT
        concat = torch.cat((s, o), axis = 0)
        # concat --> torch.Size([2 x self.hidden_size_1])
        
        x = torch.nn.LeakyReLU()(self.layer_w_1(concat))
        # x --> torch.Size([2 x self.hidden_size_2])

        x = self.layer_w_2(x)
        # x --> torch.Size([1])

        x = self.sigmoid(x)
        
        return x.float()

In [11]:
def train_model(model, dataloaders, optimizer, num_epochs=25):
    
    since = time.time()
    acc_list = []
    model.train() # In training mode

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        running_loss = 0.0
        running_corrects = 0

        # Iterate over data.
        for inputs_s, inputs_o, labels in dataloaders:
            inputs_s = inputs_s.to(device)
            inputs_o = inputs_o.to(device)
            labels = labels.to(device).float()

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward
            outputs = model(inputs_s, inputs_o)
            loss = model.criterion(outputs, labels)
            
            preds = torch.round(outputs)

            # backward + optimize only if in training phase
            loss.backward()
            optimizer.step()

            # statistics
            running_loss += loss.item() * inputs_s.size(0)
            running_corrects += torch.sum(preds == labels.data)

        # Epoch information
        epoch_loss = running_loss / len(dataloaders.dataset)
        epoch_acc = running_corrects.double() / len(dataloaders.dataset)
        acc_list.append(epoch_acc)

        print('Training Loss: {:.4f} Acc: {:.4f}'.format(epoch_loss, epoch_acc))


    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    
    return acc_list

In [12]:
def eval_model(model, dataloaders):
    
    since = time.time()
    model.eval() # In training mode

    running_loss = 0.0
    running_corrects = 0

    # Iterate over data.
    for inputs_s, inputs_o, labels in dataloaders:
        inputs_s = inputs_s.to(device)
        inputs_o = inputs_o.to(device)
        labels = labels.to(device).float()
        
        with torch.no_grad():

            # forward
            outputs = model(inputs_s, inputs_o)
            loss = model.criterion(outputs, labels)

            preds = torch.round(outputs)

            # statistics
            running_loss += loss.item() * inputs_s.size(0)
            running_corrects += torch.sum(preds == labels.data)

    overall_loss = running_loss / len(dataloaders.dataset)
    overall_acc = running_corrects.double() / len(dataloaders.dataset)

    print('Evaluation Loss: {:.4f} Acc: {:.4f}'.format(overall_loss, overall_acc))
    

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    
    return overall_acc

## Make data inputable:
- input data should be (n, 4, 7)

In [13]:
# Data with predicted labels:
#   useful columns: 
#           youtuber --> (only Mortdog for now)
#           video_name
#           frame_name
#           cropped_name --> for positional value
#           predicted --> for converting to one-hot

champion_label_df = pd.read_csv(os.path.join(os.getcwd(), 'data', 'final_data_set.csv'))

with open("data/master_champ_list.pkl", "rb") as input_file:
        master_champ_list = pickle.load(input_file)

In [14]:
# final_data_set.csv is the cleaned one
df_to_fix = champion_label_df
# Figure out the youtuber, video_name, frame_name keys that are valid for the input
df_to_fix_count = df_to_fix.groupby(['youtuber', 'video_name', 'frame_name', 'outcome']).size().reset_index(name='img_count')
df_to_fix_count = df_to_fix_count[df_to_fix_count.img_count == 56]

df_to_fix_count = df_to_fix_count.loc[:, ['youtuber', 'video_name', 'frame_name', 'outcome']]

s, o, y = format_for_dnn(df_to_fix, df_to_fix_count)

### Data Loader

In [15]:
batch_size = 1
epochs = 5

s_train, s_test, o_train, o_test, y_train, y_test = before_customimagedataset(s, o, y, test_size = 0.2)

print('Train size:', len(s_train))
print('Test size:', len(s_test))

trainset = CustomImageDataset(s_train, o_train, y_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size, num_workers=0, shuffle=False)

testset = CustomImageDataset(s_test, o_test, y_test)
testloader = torch.utils.data.DataLoader(testset, batch_size, num_workers=0, shuffle=False)

Train size: 3429
Test size: 858


### Train

In [None]:
win_predictor = Win_Predictor_Net(nn.BCELoss(), pre_train=True)

In [None]:
optimizer_SGD = torch.optim.SGD(win_predictor.parameters(), lr=0.01, momentum=0.9)

In [None]:
model_acc = train_model(win_predictor, trainloader, optimizer_SGD, epochs)

### Eval

In [None]:
eval_model(win_predictor, testloader)

## NO EMBEDDING

In [None]:
win_predictor_noemb = Win_Predictor_Net(nn.BCELoss(), pre_train=False)
optimizer_SGD_noemb = torch.optim.SGD(win_predictor_noemb.parameters(), lr=0.01, momentum=0.9)
model_acc = train_model(win_predictor_noemb, trainloader, optimizer_SGD_noemb, epochs)
eval_model(win_predictor_noemb, testloader)

## Data for Linear Models

In [20]:
#s_train, s_test, o_train, o_test, y_train, y_test

s_train_lr = s_train.reshape(len(s_train),4*7)
s_test_lr = s_test.reshape(len(s_test),4*7)

o_train_lr = o_train.reshape(len(o_train),4*7)
o_test_lr = o_test.reshape(len(o_test),4*7)

X_train_lr = np.concatenate([s_train_lr, o_train_lr], axis = 1)
X_test_lr = np.concatenate([s_test_lr, o_test_lr], axis = 1)

y_train_lr = y_train
y_test_lr = y_test

n_train = len(y_train_lr)
n_test = len(y_test_lr)

## Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
logreg = LogisticRegression(random_state=0).fit(X_train_lr, y_train_lr)

pred_logreg = logreg.predict(X_test_lr)

In [None]:
np.sum(y_test_lr.reshape(n_test,1) == pred_logreg.reshape(n_test,1))/n_test

## SVM

In [None]:
from sklearn import svm
# Initialize SVM classifier
clf = svm.SVC(kernel='linear')
clf = clf.fit(X_train_lr, y_train_lr)

In [None]:
predictions_svm = clf.predict(X_test_lr)

In [None]:
np.sum(y_test_lr.reshape(n_test,1) == predictions_svm.reshape(n_test,1))/n_test

## KNN

In [22]:
from sklearn.neighbors import KNeighborsClassifier

In [23]:
knc = KNeighborsClassifier(n_neighbors=2)
knc.fit(X_train_lr, y_train_lr)

predictions_knc = knc.predict(X_test_lr)

In [24]:
np.sum(y_test_lr.reshape(n_test,1) == predictions_knc.reshape(n_test,1))/n_test

0.6282051282051282

## Decision Tree Classifier

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
dtc = DecisionTreeClassifier(random_state=0)

dtc.fit(X_train_lr, y_train_lr)

predictions_dtc = dtc.predict(X_test_lr)

In [None]:
np.sum(y_test_lr.reshape(n_test,1) == predictions_dtc.reshape(n_test,1))/n_test

## Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
rfc = RandomForestClassifier(max_depth=5, random_state=0)
rfc.fit(X_train_lr, y_train_lr)

predictions_rfc = rfc.predict(X_test_lr)

In [None]:
np.sum(y_test_lr.reshape(n_test,1) == predictions_rfc.reshape(n_test,1))/n_test