### This notebook contains all the required functions and structures to perform experiments with the ANN models

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import time


# Define an early stopping mechanism for regularization use - from https://stackoverflow.com/questions/71998978/early-stopping-in-pytorch
class EarlyStopper:
    def __init__(self, patience=1, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.min_validation_loss = np.inf

    def early_stop(self, validation_loss):
        if validation_loss < self.min_validation_loss:
            self.min_validation_loss = validation_loss
            self.counter = 0
        elif validation_loss > (self.min_validation_loss + self.min_delta):
            self.counter += 1
            # print(validation_loss,self.min_validation_loss,(self.min_validation_loss + self.min_delta) )
            if self.counter >= self.patience:
                return True
        return False
    
def train_data_mirror(X_train, y_train):
   
   '''This Function is used to perform mirroring data augmentation (use only for train data)
        Input: X_train, y_train - dataframes of features and target
        Output: X_train, y_train - dataframes of features and targed with mirroring applied
   '''

   titles = X_train.columns

   columns_titles = np.concatenate((titles[int((len(titles))/2):],titles[0:int((len(titles))/2)]))
   

   X_train_2 = X_train.reindex(columns=columns_titles) 

   X_train_2.columns = X_train.columns

   y_train_2 = 1-y_train
   X_train = pd.concat([X_train, X_train_2], axis=0)
   y_train = pd.concat([y_train, y_train_2], axis=0)

   return X_train, y_train


def create_data_loaders(data, batch_size, train_mirror=False, standarize=True):

    '''Function used to create pytorch data loader objects
        Input: 
                data - dataframe used for experiments
                batch_size - int, number of samples to use per learning batch
                train_mirror - bool, whether or not to add mirrored data 
                standarize - bool, whether or not to standarize the data
        Output:
                train_loader, valid_loader, test_loader - pytorch dataloader object used to train, validate and test the model
    '''

    # Scale the data?
    if standarize:  
        X = data.iloc[:, :-1]
        y = data.iloc[:, -1]

        scaler = StandardScaler()

        X_ = scaler.fit_transform(X)

        data_ = pd.DataFrame(X_)
        data_['winner'] = y

        data = data_
    
    # Split data into training and testing sets
    train_data, test_data = train_test_split(data, test_size=0.15)
    train_data, validation_data = train_test_split(train_data, test_size=0.15/0.85)
 
    # Data augmentation flip?

    if train_mirror:
        
        X_train, y_train = train_data_mirror(train_data.iloc[:, :-1], train_data.iloc[:, -1])

    # Convert data to PyTorch tensors and create data loaders
    train_dataset = TensorDataset(torch.from_numpy(train_data.iloc[:,:-1].values.astype(np.float32)), torch.from_numpy(train_data.iloc[:, -1].values.reshape(-1, 1).astype(np.float32)))
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
    valid_dataset = TensorDataset(torch.from_numpy(validation_data.iloc[:,:-1].values.astype(np.float32)), torch.from_numpy(validation_data.iloc[:, -1].values.reshape(-1, 1).astype(np.float32)))
    valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
    test_dataset = TensorDataset(torch.from_numpy(test_data.iloc[:, :-1].values.astype(np.float32)), torch.from_numpy(test_data.iloc[: ,-1].values.reshape(-1, 1).astype(np.float32)))
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
    
    print(f'Data loaded -- starting model training...')

    return train_loader, valid_loader, test_loader

def train(model, loader, optimizer, criterion, device, mode, epoch, print_history = True):
    '''
    Function used to train the pytorch model
    Input: 
        model - pytorch model object
        loader - dataloader object to use for the model
        optimizer - nn.optim object, used as a optimizer in the learning process
        criterion - loss function
        device - "cpu" or "cuda" depending on the available resources
        mode - used only for prining the current mode (ex. training, validation)
        epoch - current epoch
        print_history - bool, whether to display the loss and accuracy for each epoch

    Output: 
        running_loss - loss value up to the current epoch
        accuracy - model accuracy caluclated in this step
    '''
    model.train()
    # Train the model for 1 epoch
    running_loss = 0.0
    correct = 0
    total = 0
    for i, data in enumerate(loader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        predicted = (outputs > 0.5).float()
        batch_size = inputs.size(0)
        total += batch_size
        try:
            loss = criterion(outputs, labels)
        except:
            loss = criterion(outputs.squeeze(1), labels.squeeze(1))

        
        loss.mean().backward()
        optimizer.step()
        running_loss += loss.item() * batch_size / len(loader.dataset)
        correct += (predicted == labels).sum().item()
        
    accuracy =  correct/total
    if print_history:
        print(f"Epoch {epoch+1} {mode} loss: {running_loss}")
        print(f"Epoch {epoch+1} {mode} accuracy: {accuracy}")

    return running_loss, accuracy


def test(model, loader, criterion, device, mode, epoch, print_history = True):

    '''
       Function used to test the pytorch model
    Input: 
        model - pytorch model object
        loader - dataloader object to use for the model
        optimizer - nn.optim object, used as a optimizer in the learning process
        criterion - loss function
        device - "cpu" or "cuda" depending on the available resources
        mode - used only for prining the current mode (ex. training, validation)
        epoch - current epoch
        print_history - bool, whether to display the loss and accuracy for each epoch

    Output: 
        running_loss - loss value up to the current epoch
        accuracy - model accuracy caluclated in this step
        inference time in [s]
    
    '''
    model.eval()
    # Evaluate the neural network on the testing set
    running_loss = 0.0
    avg_inference_time = 0
    counter = 0
    with torch.no_grad():
        correct = 0
        total = 0
        for data in loader:
            counter += 1

            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            start = time.time()
            outputs = model(inputs)
            end = time.time()
            predicted = (outputs > 0.5).float()
            batch_size = inputs.size(0)
            total += batch_size
            batch_inference_time = end-start
            avg_inference_time += batch_inference_time
            try:
                loss = criterion(outputs, labels)
            except:
                loss = criterion(outputs.squeeze(1), labels.squeeze(1))
            running_loss += loss.item() * batch_size / len(loader.dataset)
            correct += (predicted == labels).sum().item()

        accuracy =  correct / total
        
        if print_history:
            print(f"Epoch {epoch+1} {mode} loss: {running_loss}")
            print(f"Epoch {epoch+1} {mode} accuracy: {accuracy}")
        
    return running_loss, accuracy, avg_inference_time/counter


# Functions used to plot accuracy and loss for the respective epochs if needed

def plot_loss(train_loss, validation_loss, title):
    plt.grid(True)
    plt.xlabel("subsequent epochs",fontsize=16)
    plt.ylabel('average loss',fontsize=16)
    plt.plot(range(1, len(train_loss)+1), train_loss, 'o-', label='training')
    plt.plot(range(1, len(validation_loss)+1), validation_loss, 'o-', label='validation')
    plt.legend()
    plt.title(title,fontsize=20)
    plt.tick_params(labelsize=12)

    plt.show()
    
def plot_acc(train_loss, validation_loss, title):
    plt.grid(True)
    plt.xlabel("subsequent epochs",fontsize=16)
    plt.ylabel('average accuracy',fontsize=16)
    plt.plot(range(1, len(train_loss)+1), train_loss, 'o-', label='training')
    plt.plot(range(1, len(validation_loss)+1), validation_loss, 'o-', label='validation')
    plt.legend()
    plt.title(title,fontsize=20)
    plt.tick_params(labelsize=12)
    plt.show()

In [2]:
def go_ANN_model(data, model, optimizer, num_epochs, batch_size, device='cuda', train_mirror=False, early_stopping = False, es_patience = 5, es_delta = 0.01):
    
    '''
        Function that encapsulates the steps needed to perform a trial run of the prediction precess
        
    '''

    model_state = None
    hi_accuracy = 0
    train_loss_arr = []
    validation_loss_arr = []

    train_acc_arr = []
    validation_acc_arr = []



    train_loader, validation_loader, test_loader = create_data_loaders(data, batch_size,train_mirror) # Initialize the dataloaders from dataset

    model = model.to(device) # Initialize linear model with layer sizes
    criterion = nn.BCELoss() # Initialize loss function - Binary Cross Entropy Loss
    optimizer = optimizer # Initialize optimizer with starting learning rate

    # Initialize result collecting lists for later plotting
    train_loss_arr = []
    validation_loss_arr = []

    train_acc_arr = []
    validation_acc_arr = []

    # Train the model
    if(early_stopping):
        early_stopper = EarlyStopper(patience=es_patience, min_delta=es_delta) # Enable early stopping

    for epoch in range(num_epochs):
        train_loss, train_accuracy = train(model,train_loader, optimizer, criterion=criterion, device=device, mode='Train', epoch=epoch)
        valid_loss, valid_accuracy = test(model, validation_loader, criterion=criterion, device=device, mode='Validation', epoch=epoch)

        
        if(early_stopping):
            if early_stopper.early_stop(valid_loss):
                break

        train_loss_arr.append(train_loss)
        train_acc_arr.append(train_accuracy)

        try:
            if valid_accuracy>=hi_accuracy:
                hi_accuracy = valid_accuracy
                print(f'New best state saved with valid. acc. = {hi_accuracy}.')
                model_state = model.state_dict()
        except:
            print("Cannot yet save model state dict.")

        validation_loss_arr.append(valid_loss)
        validation_acc_arr.append(valid_accuracy)




    plot_loss(train_loss=train_loss_arr, validation_loss=validation_loss_arr, title='ANN model loss')
    plot_acc(train_acc_arr, validation_acc_arr, title='ANN model accuracy')

    model.load_state_dict(model_state)
    test(model, test_loader, criterion=criterion, device=device, mode='Test', epoch=epoch)

    return model_state

In [3]:
def create_loader_cv(data, batch_size, train_mirror=True, standarize=True):

    '''
    Function used to create the dataloader object for the cross-validation process
    '''
    # Scale the data?
    if standarize:  
        data.reset_index(drop=True, inplace=True)
        X = data.iloc[:, :-1]
        y = data.iloc[:, -1]

        scaler = StandardScaler()

        X_ = scaler.fit_transform(X)

        data_ = pd.DataFrame(X_)
        data_['winner'] = y

        data = data_
    
    # Split data into training and testing sets
    # train_data, test_data = train_test_split(data, test_size=0.15)
    # train_data, validation_data = train_test_split(train_data, test_size=0.15/0.85)
 
    # Data augmentation flip?

    if train_mirror:
        
        X, y = train_data_mirror(data.iloc[:, :-1], data.iloc[:, -1])

    # Convert data to PyTorch tensors and create data loaders
    if train_mirror:
        data_dataset = TensorDataset(torch.from_numpy(X.values.astype(np.float32)), torch.from_numpy(y.values.reshape(-1, 1).astype(np.float32)))
        data_loader = DataLoader(data_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
    else:
        data_dataset = TensorDataset(torch.from_numpy(data.iloc[:,:-1].values.astype(np.float32)), torch.from_numpy(data.iloc[:, -1].values.reshape(-1, 1).astype(np.float32)))
        data_loader = DataLoader(data_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
    
    print(f'Data loaded succesfully!')

    return data_loader

In [4]:
def cv_ANN_model(train_loader, test_loader, model, criterion, optimizer, num_epochs, device='cuda', early_stopping = False, es_patience = 5, es_delta = 0.01, plot_history = True, print_history = True):
    

    '''Function used to perform a single fold validation in the cross-validation process for the ANN model'''

    model_state = None
    hi_accuracy = 0
    train_loss_arr = []
    validation_loss_arr = []

    train_acc_arr = []
    validation_acc_arr = []

    avg_inference_time=0

    # train_loader, validation_loader, test_loader = create_data_loaders(data, batch_size,train_mirror) # Initialize the dataloaders from dataset

    model = model.to(device) # Initialize linear model with layer sizes
    # criterion = nn.BCELoss() # Initialize loss function - Binary Cross Entropy Loss
    optimizer = optimizer # Initialize optimizer with starting learning rate
    try:
        criterion = criterion()
    except:
        print('focal loss')

    # Initialize result collecting lists for later plotting
    train_loss_arr = []
    validation_loss_arr = []

    train_acc_arr = []
    validation_acc_arr = []

    vg_inference_time = 0

    # Train the model
    if(early_stopping):
        early_stopper = EarlyStopper(patience=es_patience, min_delta=es_delta) # Enable early stopping

    for epoch in range(num_epochs):

        train_loss, train_accuracy = train(model,train_loader, optimizer, criterion=criterion, device=device, mode='Train', epoch=epoch, print_history=print_history)
        valid_loss, valid_accuracy, inference_time = test(model, test_loader, criterion=criterion, device=device, mode='Validation', epoch=epoch, print_history=print_history)
        avg_inference_time += inference_time
        
        if(early_stopping):
            if early_stopper.early_stop(valid_loss):
                break

        train_loss_arr.append(train_loss)
        train_acc_arr.append(train_accuracy)

        try:
            if valid_accuracy>=hi_accuracy:
                hi_accuracy = valid_accuracy
                print(f'New best state saved with valid. acc. = {hi_accuracy} for epoch {epoch+1}.')
                print(f'Average inference time for a single validation batch: {inference_time}s.')
                model_state = model.state_dict()
        except:
            print("Cannot yet save model state dict.")

        validation_loss_arr.append(valid_loss)
        validation_acc_arr.append(valid_accuracy)


    if plot_history:
        plot_loss(train_loss=train_loss_arr, validation_loss=validation_loss_arr, title='ANN model loss')
        plot_acc(train_acc_arr, validation_acc_arr, title='ANN model accuracy')

    # model.load_state_dict(model_state)
    # test(model, test_loader, criterion=criterion, device=device, mode='Test', epoch=epoch)

    return hi_accuracy, avg_inference_time/num_epochs

In [6]:
def cv_split_testing_ANN(data, model_class, criterion, num_of_splits, input_size, activation_f, plot_history = True, print_history = True, early_stopping = True, optimizer = None, lr = 0.001):

    '''Function used to perform the cross-validation process. The main function of this notebook.
    Input:
        data - dataframe used for prediction
        model_class - pytorch class of the selected model, used to create a new object inside this function
        criterion - pytorch loss function
        num_of_splits - number of splits to perform in the cross-validation process
        input_size - int, size of the input feature vector (length)
        activation_f - pytorch acrivation function to use between the model layers

        plot_history - bool, create a plot of loss and accuracy over all training epoch for training and validation
        print_history - bool, print out the values of loss and accuracy over all training epoch for training and validation
        early_stopping - bool, use early stopping regularization
        optimizer - optimization algorithm, if None then Adam
        lr - learning rate, default 0.001
    '''


    from sklearn.model_selection import KFold

    splits = KFold(num_of_splits)

    cv_accuracy = []
    cv_inference_time = []
    for i, (train_split, test_split) in enumerate(splits.split(data)):
        print(f'Training and testing {i+1}/{num_of_splits} fold...')

        train_fold = data.loc[train_split,:]
        test_fold = data.loc[test_split,:]

        train_loader = create_loader_cv(train_fold, 64, False, True)
        test_loader = create_loader_cv(test_fold, 64, False, True)
        model = model_class(input_size=input_size, hidden_size=512, output_size=1, activation_f=activation_f)
        if optimizer is None:
            # Default optimizer 
            optimizer = optim.Adam(model.parameters(), lr=lr)
        else:
            try:
                optimizer = optim.RAdam(model.parameters(), lr=lr)
            except:
                print("Optimizer already declared")
        
        fold_test_accuracy, inference_time = cv_ANN_model(train_loader, test_loader, model, criterion, optimizer, num_epochs=50, early_stopping=early_stopping, es_patience=5, es_delta=0.002, plot_history=plot_history, print_history=print_history)

        print(f'Highest validation accuracy in fold {i+1}/{num_of_splits}: {fold_test_accuracy}')

        try:
            print(f'Avg. Inference time (a single forward pass): {model.inference_time/model.n_of_forward_passes}')
        except:
            print('unknown inference time')

        cv_accuracy.append(fold_test_accuracy)
        cv_inference_time.append(inference_time)

    print(f'Average accuracy over {num_of_splits} folds: {np.mean(cv_accuracy)}')
    print(f'Average inference time over {num_of_splits} folds for a single validation batch: {np.mean(cv_inference_time)}')

In [7]:
import torch.nn as nn
import torch.nn.functional as F

# Defining the FocalLoss function that in not natively implemented in pytorch

class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2, reduction='none'):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, input, target):
        BCE_loss = F.binary_cross_entropy(input, target, reduction='mean')
        pt = torch.exp(-BCE_loss)
        F_loss = self.alpha * (1-pt)**self.gamma * BCE_loss
        if self.reduction == 'mean':
            return torch.mean(F_loss)
        elif self.reduction == 'sum':
            return torch.sum(F_loss)
        else:
            return F_loss

#### Used ANN models

In [9]:
# The baseline model used for the majority of predictions
class ANNModel_simple(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, activation_f):
        super(ANNModel_simple, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.activation_f = activation_f
        self.fc2 = nn.Linear(hidden_size, 128)
        self.fc3 = nn.Linear(128,256)
        self.fc4 = nn.Linear(256, output_size)
        self.do1 = nn.Dropout(0.5)
        self.do2 = nn.Dropout(0.5)
        self.do3 = nn.Dropout(0.5)
        self.bn1 = nn.BatchNorm1d(hidden_size)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(256)
        self.bn4 = nn.BatchNorm1d(output_size)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.fc1(x)
        out = self.activation_f(out)
        out = self.bn1(out)
        out = self.do1(out)
        out = self.fc2(out)
        out = self.activation_f(out)
        out = self.bn2(out)
        out = self.do2(out)
        out = self.fc3(out)
        out = self.activation_f(out)
        out = self.bn3(out)
        out = self.do3(out)
        out = self.fc4(out)
        out = self.bn4(out)
        out = self.sigmoid(out)
        return out

##### Extensions

In [11]:
# The shallow version of the baseline model
class ANNModel_simple_shallow(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, activation_f):
        super(ANNModel_simple_shallow, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.activation_f = activation_f
        self.fc2 = nn.Linear(hidden_size, 128)
        self.fc3 = nn.Linear(128, output_size)
        self.do1 = nn.Dropout(0.5)
        self.do2 = nn.Dropout(0.5)
        self.do3 = nn.Dropout(0.5)
        self.bn1 = nn.BatchNorm1d(hidden_size)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(output_size)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.fc1(x)
        out = self.activation_f(out)
        out = self.bn1(out)
        out = self.do1(out)
        out = self.fc2(out)
        out = self.activation_f(out)
        out = self.bn2(out)
        out = self.do2(out)
        out = self.fc3(out)
        out = self.bn3(out)
        out = self.sigmoid(out)
        return out

# The deep version of the baseline model
class ANNModel_simple_deep(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, activation_f):
        super(ANNModel_simple_deep, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.activation_f = activation_f
        self.fc2 = nn.Linear(64, 128)
        self.fc3 = nn.Linear(128,256)
        self.fc4 = nn.Linear(256, 128)
        self.fc5 = nn.Linear(128,32)
        self.fc6 = nn.Linear(32,output_size)
        self.do1 = nn.Dropout(0.5)
        self.do2 = nn.Dropout(0.5)
        self.do3 = nn.Dropout(0.5)
        self.do4 = nn.Dropout(0.5)
        self.do5 = nn.Dropout(0.5)
        self.do6 = nn.Dropout(0.5)
        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(256)
        self.bn4 = nn.BatchNorm1d(128)
        self.bn5 = nn.BatchNorm1d(32)
        self.bn6 = nn.BatchNorm1d(output_size)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.fc1(x)
        out = self.activation_f(out)
        out = self.bn1(out)
        out = self.do1(out)
        out = self.fc2(out)
        out = self.activation_f(out)
        out = self.bn2(out)
        out = self.do2(out)
        out = self.fc3(out)
        out = self.activation_f(out)
        out = self.bn3(out)
        out = self.do3(out)
        out = self.fc4(out)
        out = self.activation_f(out)
        out = self.bn4(out)
        out = self.do4(out)
        out = self.fc5(out)
        out = self.activation_f(out)
        out = self.bn5(out)
        out = self.fc6(out)
        out = self.sigmoid(out)
        return out

#### Example usage of the above structures for prediction

In [12]:
device = 'cuda'

# Load data from a CSV file
data = pd.read_csv('Teams_statistics.csv')
data.drop(['match_id','map','team_1','team_2'],axis=1, inplace=True)
data.dropna(inplace=True)
data.reset_index(drop=True, inplace=True)
input_size = data.shape[1] - 1

In [None]:
# Select model, loss, activation and optimizer (None = Adam)
model_class = ANNModel_simple
criterion = nn.BCELoss()
activation_f = nn.ReLU()
optimizer = None
cv_split_testing_ANN(data, model_class, num_of_splits=5, input_size=input_size,criterion=criterion, activation_f=activation_f, plot_history=False, print_history=False, early_stopping=False, optimizer=optimizer)