In [97]:
import torch
import torch.nn as nn
import os

import numpy as np 
import pandas as pd
import datetime
import pickle
import time
import random

In [98]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cpu device


In [111]:
class win_identifer_model(nn.Module):
    def __init__(self, layers):
        super(win_identifer_model, self).__init__()
        modules = nn.ModuleList([nn.Flatten(), nn.Linear(6, layers[0]), nn.ReLU()])

        for idx, size in enumerate(layers[:-1]):
            modules.append(nn.Linear(size, layers[idx + 1]))
            modules.append(nn.ReLU())

        modules.append(nn.Linear(layers[-1], 2))
        modules.append(nn.Softmax())
        self.layers = nn.Sequential(*modules)

    def forward(self, x):
        return self.layers(x)

In [112]:
def train_model (x_tensor, y_tensor, layers):
    device = (
        "cuda"
        if torch.cuda.is_available()
        else "mps"
        if torch.backends.mps.is_available()
        else "cpu"
    )
    # print(f"Using {device} device")


    # in_path = os.path.join("drive","MyDrive","Colab Notebooks","HKJC-ML")

    # x_tensor = torch.load(os.path.join(in_path, "x_tensor")).to(torch.float32).to(device)
    # y_tensor = torch.load(os.path.join(in_path, "y_tensor")).to(torch.float32).to(device)

    x_tensor = x_tensor.to(torch.float32).to(device)
    y_tensor = y_tensor.to(torch.float32).to(device)

    dataset = torch.utils.data.TensorDataset(x_tensor, y_tensor)

    rng = torch.Generator().manual_seed(42)
    training_set, val_set = torch.utils.data.random_split(dataset, [0.8, 0.2], generator=rng)

    input_size = x_tensor.shape[1]
    model = win_identifer_model(layers).to(device)
    print(model)

    learning_rate = 1e-3
    batch_size = 32

    train_dataloader = torch.utils.data.DataLoader(training_set, batch_size=batch_size, shuffle=True)
    val_dataloader = torch.utils.data.DataLoader(val_set, batch_size=batch_size, shuffle=True)

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    loss_fn = nn.CrossEntropyLoss()

    epochs = 5000
    patience = 250

    train_loss_plot = []
    val_loss_plot = []
    epochs_plot = []

    best_val_loss = float('inf')
    epochs_without_improvement = 0

    for e in range(epochs):
        # if e % 10 == 0:
        #     print(f"Epoch {e}\n-------------------------------")
        size = len(train_dataloader.dataset)
        # Set the model to training mode - important for batch normalization and dropout layers
        # Unnecessary in this situation but added for best practices
        model.train()
        train_loss_sum = 0
        num_train_batches = 0
        for batch, (X, y) in enumerate(train_dataloader):
            # Compute prediction and loss
            # X = torch.swapaxes(X, 0, 2)
            pred = model(X)
            # y = torch.unsqueeze(y, dim=1)
            # print(pred.shape, y.shape)
            loss = loss_fn(pred, y)
            train_loss_sum += loss.item()
            num_train_batches += 1
            # Backpropagation
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            if batch % 100 == 0:
                loss, current = loss.item(), (batch + 1) * len(X)
                # print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

        train_loss_plot.append(train_loss_sum/num_train_batches)

        # Set the model to evaluation mode - important for batch normalization and dropout layers
        # Unnecessary in this situation but added for best practices
        model.eval()
        size = len(val_dataloader.dataset)
        num_val_batches = len(val_dataloader)
        val_loss, val_acc = 0, 0

        # Evaluating the model with torch.no_grad() ensures that no gradients are computed during val mode
        # also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True
        with torch.no_grad():
            for X, y in val_dataloader:
                pred = model(X)
                # y = torch.unsqueeze(y, dim=1)
                val_loss += loss_fn(pred, y).item()
                val_acc += (pred == y).type(torch.float).sum().item()

        val_loss /= num_val_batches
        val_acc /= num_val_batches
        # val_loss_plot.append(val_loss)
        # epochs_plot.append(e+1)

        if e % 100 == 0:
            print(f"epoch {e:d} val loss: {val_loss:>8f} accuracy: {val_acc:>8f}")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_without_improvement = 0
            best_model_state = model.state_dict()
        else:
            epochs_without_improvement += 1
            if epochs_without_improvement >= patience:
                break

    # plt.plot(epochs_plot, train_loss_plot, label = "training loss")
    # plt.plot(epochs_plot, val_loss_plot, label = "validation loss")
    # plt.legend()
    # print('best val loss', best_val_loss)
            
    print('best val loss', best_val_loss)
            
    return best_model_state

    # file_name = f"{datetime.now().strftime('%Y_%m_%d_%H_%M')}_"
    # for l in layers:
    #     file_name += f'{str(l)}_'
    # file_name += f"{batch_size}_{e}_{str(f'{best_val_loss:.3g}').split('.')[-1]}"

    # out_path = os.path.join('model_configs','hkjc5', file_name)
    # torch.save(best_model_state, out_path)

    # return file_name


In [113]:
layers = [8, 16, 8, 4]

in_path = os.path.join('data','2_ordinal_normalise')
out_path = os.path.join('model_configs')

for file_name in [f for f in os.listdir(in_path) if 'csv' in f]:
    model_name = file_name.replace('.csv','')

    df = pd.read_csv(os.path.join(in_path, file_name), index_col=0)
    x_df = df.drop(labels=['win','trio_win','won','not_won'], axis=1)
    y_df = df[['won','not_won']]

    x = x_df.to_numpy()
    y = y_df.to_numpy()
    x_tensor = torch.from_numpy(x)
    y_tensor = torch.from_numpy(y)

    best_model_state = train_model(x_tensor, y_tensor, layers)

    torch.save(best_model_state, os.path.join(out_path, model_name))

win_identifer_model(
  (layers): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=6, out_features=8, bias=True)
    (2): ReLU()
    (3): Linear(in_features=8, out_features=16, bias=True)
    (4): ReLU()
    (5): Linear(in_features=16, out_features=8, bias=True)
    (6): ReLU()
    (7): Linear(in_features=8, out_features=4, bias=True)
    (8): ReLU()
    (9): Linear(in_features=4, out_features=2, bias=True)
    (10): Softmax(dim=None)
  )
)
epoch 0 val loss: 0.613041 accuracy: 0.000000


  input = module(input)


epoch 100 val loss: 0.389117 accuracy: 4.769231
epoch 200 val loss: 0.389116 accuracy: 21.769231
epoch 300 val loss: 0.388671 accuracy: 28.769231
epoch 400 val loss: 0.388226 accuracy: 29.230769
epoch 500 val loss: 0.388671 accuracy: 29.230769
best val loss 0.3877808749675751
win_identifer_model(
  (layers): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=6, out_features=8, bias=True)
    (2): ReLU()
    (3): Linear(in_features=8, out_features=16, bias=True)
    (4): ReLU()
    (5): Linear(in_features=16, out_features=8, bias=True)
    (6): ReLU()
    (7): Linear(in_features=8, out_features=4, bias=True)
    (8): ReLU()
    (9): Linear(in_features=4, out_features=2, bias=True)
    (10): Softmax(dim=None)
  )
)
epoch 0 val loss: 0.535585 accuracy: 0.000000


  input = module(input)


epoch 100 val loss: 0.381460 accuracy: 6.615385
epoch 200 val loss: 0.380569 accuracy: 25.153846
epoch 300 val loss: 0.381905 accuracy: 29.384615
epoch 400 val loss: 0.381460 accuracy: 29.461538
epoch 500 val loss: 0.381905 accuracy: 29.461538
epoch 600 val loss: 0.381015 accuracy: 29.461538
best val loss 0.38056933879852295
