## Imports

In [None]:
import torch
import torch.nn as nn
from model import *
from torch.utils.data import DataLoader

## ELO or Year init

In [None]:
elo_or_year = 2010

## GPU memory

In [None]:
gpu_memory_fraction = 0.9

torch.cuda.set_per_process_memory_fraction(gpu_memory_fraction)

## Device init

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # using cuda
print("Device:", device)

## Loading Tensors

In [None]:
# data_size = 10000

X : torch.Tensor = torch.load('./large_data/X_tensor_'+str(elo_or_year)+'.pt')#[:data_size]
Y : torch.Tensor = torch.load('./large_data/Y_tensor_'+str(elo_or_year)+'.pt')#[:data_size]

print(X.shape)
print(Y.shape)

# Define the split percentages
train_ratio = 0.8
val_ratio = 0.2

# Calculate the sizes of each split
total_size = len(X)
train_size = int(train_ratio * total_size)
val_size = total_size - train_size

print(total_size, train_size, val_size)

split_index = int(train_ratio * len(X))

train_batch_size = 6000
val_batch_size = 1000

# train_batch_size = train_size//4
# val_batch_size = val_size//2

# Split the data into train and test sets
train_subset_X = X[:split_index]
val_subset_X = X[split_index:]
train_loader_X = DataLoader(train_subset_X, batch_size=train_batch_size, shuffle=False)
val_loader_X = DataLoader(val_subset_X, batch_size=val_batch_size, shuffle=False)

train_subset_Y = Y[:split_index]
val_subset_Y = Y[split_index:]
train_loader_Y = DataLoader(train_subset_Y, batch_size=train_batch_size, shuffle=False)
val_loader_Y = DataLoader(val_subset_Y, batch_size=val_batch_size, shuffle=False)

# Get their size
total_train_batches = len(train_loader_X)
total_val_batches = len(val_loader_X)

print(total_train_batches)
print(total_val_batches)

## Model class init

In [None]:
model = Neuro_gambit().to(device)
print('Model initalized')

## Loading saved model

In [None]:
model.load_state_dict(torch.load('./models/'+str(model._get_name())+'_'+str(elo_or_year)+'.pt')) # it takes the loaded dictionary, not the path file itself
model.eval()
model.to(device)
print('Model loaded')

## Learning params init

In [None]:
learning_rate = 0.001
n_epochs = 600

criterion = nn.MSELoss() # MSE function
# optimizer = torch.optim.SGD(params=model.parameters(), lr=learning_rate) # stochastic gradient descent function
optimizer = torch.optim.AdamW(params=model.parameters(), lr=learning_rate) # way better performance with AdamW than SGD

### Train/Val split

In [None]:
last_val_loss_found = False
for epoch in range(n_epochs):
    batch = 0
    last_train_loss = 0
    if not last_val_loss_found: last_val_loss = -1

    for train_X, train_Y in zip(train_loader_X, train_loader_Y):
        train_X = train_X.to(device)
        train_Y = train_Y.to(device)
        # forward
        y_preds = model(train_X) # will output a tuple of 5 tensors

        # seperating the Y
        Y1 = train_Y[:, :8]
        Y2 = train_Y[:, 8:16]
        Y3 = train_Y[:, 16:24]
        Y4 = train_Y[:, 24:32]
        Y5 = train_Y[:, 32:]

        Y_list = [Y1,Y2,Y3,Y4,Y5]

        train_loss = 0
        for i in range(len(y_preds)): # calculating the loss per tensor
            y_pred = y_preds[i]
            train_loss += criterion(y_pred, Y_list[i])
        last_train_loss = train_loss

        # backward
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()


        print(f'Train: Epoch [{epoch+1}/{n_epochs}], Batch [{batch+1}/{total_train_batches}], Train Loss: {last_train_loss:.4f}, Val Loss: {last_val_loss:.4f}', end='\r')
        batch +=1

    batch = 0
    last_val_loss_found = True
    for val_X, val_Y in zip(val_loader_X, val_loader_Y):
        val_X = val_X.to(device)
        val_Y = val_Y.to(device)
        # forward
        with torch.no_grad():
            y_preds = model(val_X) # will output a tuple of 5 tensors

            # seperating the Y
            Y1 = val_Y[:, :8]
            Y2 = val_Y[:, 8:16]
            Y3 = val_Y[:, 16:24]
            Y4 = val_Y[:, 24:32]
            Y5 = val_Y[:, 32:]

            Y_list = [Y1,Y2,Y3,Y4,Y5]

            val_loss = 0
            for i in range(len(y_preds)): # calculating the loss per tensor
                y_pred = y_preds[i]
                val_loss += criterion(y_pred, Y_list[i])
            last_val_loss = val_loss

        print(f'Valid: Epoch [{epoch+1}/{n_epochs}], Batch [{batch+1}/{total_val_batches}], Train Loss: {last_train_loss:.4f}, Val Loss: {last_val_loss:.4f}', end='\r')
        batch+=1

    if epoch == 300:
        torch.save(model.state_dict(), './models/'+str(model._get_name())+'_'+str(elo_or_year)+'.pt')
        print(f'Model saved at Train Loss: {last_train_loss:.4f}, Val Loss: {last_val_loss:.4f}')

## Saving the model

In [None]:
# Save the model
torch.save(model.state_dict(), './models/'+str(model._get_name())+'_'+str(elo_or_year)+'.pt')
print('Model saved')