# TRAINING OF BETTER NN - HARD

In [1]:
# imports
import atc_dataloader, atc_model
import torch
from torch.utils.data import DataLoader
from torch import nn
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
from atc_loss import PredictionLoss_COS_MSE, PredictionLoss_BOX_Wise
import time

In [2]:
# set the device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
def transform_vector_to_integers(pred):
    # //////////////////////////////////////////////////////////
    # CURRENTLY NOT USED
    # //////////////////////////////////////////////////////////
    # get the max value, that will serve as reference - where is the max, there is supposed to be 1 (or other integer, depends...)
    # for now, everything until 1.3 will be treated as 1, until 2.3 as two etc.
    
    # lambda to round function from the specific treshold defined in range [.0;1.0)
    my_treshold = 0.3
    tr = np.min([my_treshold, pred.max()*0.9])

    myround = np.vectorize(lambda x, treshold=tr: np.floor(x) if x < (np.floor(x) + treshold) else np.ceil(x))

    result = np.apply_along_axis(func1d=myround, axis=0,arr=pred)
    return result

def evaluate(model, data_loader, device, criterion):
    """
    function used to evaluate the model, return loss and accuracy
    """
    model.eval()
    
    test_loss = []
    with torch.no_grad():
        for  _, batch_in_vectors, batch_out_vectors in data_loader:
            pred = model(batch_in_vectors.to(device))
            loss = criterion(pred, batch_out_vectors.to(device))
            test_loss.append(loss.item())
    
    return np.mean(test_loss)

# def evaluate_real_similarity(dl_test,model,device):
    

In [4]:
def train_model(model, device, train_dl, test_dl, criterion, optimizer, epochs=10, save_model_last_epoch=True, model_path='.'):
    
    # loss that will be calculated after each epoch for both test and train set 
    train_loss_overall = []
    test_loss_overall = [] 
    
    for epoch in range(epochs):
        epoch_loss = []
        model.train()  # Set the model to training mode
        
        # Wrap dataloader with tqdm for progress visualization
        for _, batch_in_vectors, batch_out_vectors in tqdm(train_dl, desc=f'Epoch {epoch + 1}/{epochs}', unit='batch'):
            # Convert data to tensors and move to the correct device
            batch_in_vectors = torch.tensor(batch_in_vectors, dtype=torch.float32, requires_grad=True).to(device)
            batch_out_vectors = torch.tensor(batch_out_vectors, dtype=torch.float32).to(device)
            
            # Zero the gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(batch_in_vectors)
            
            # Calculate loss
            loss = criterion(outputs, batch_out_vectors)
            
            # Backward pass and optimization
            loss.backward()
            optimizer.step()

            # Accumulate loss
            epoch_loss.append(loss.item())

        if (epoch % 10 == 0):
            torch.save(model.state_dict(), f'models/model_betternnhard_epoch_{epoch}.pth')
        
        # start evaluation and append test loss
        tmp_test_loss = evaluate(model, test_dl, device, criterion)
        test_loss_overall.append(tmp_test_loss)
        
        # Calculate average train loss for the epoch
        avg_train_loss = np.mean(epoch_loss)
        train_loss_overall.append(avg_train_loss) # add it to the loss over all epochs
        
        # Print loss (both train and test) for the current epoch
        print(f"Epoch [{epoch+1}/{epochs}], Train loss: {avg_train_loss:.4f}, Test loss: {tmp_test_loss:.4f}")

    # ====================================
    # AFTER TRAIN ========================
    # ====================================
    
    # save the model after the last epoch if set
    # if (save_model_last_epoch):
    #     torch.save(model.state_dict(), f'{model_path}/model_last_epoch_{time.time_ns()}.pth')
        
    # print overall loss
    print('Overall train loss: ', train_loss_overall)
    print('Overall test loss: ', test_loss_overall)
    # plot graph
    plt.figure()
    plt.plot(np.arange(0,epochs,1),train_loss_overall, label='Train loss') # train loss over all epochs
    plt.plot(np.arange(0,epochs,1),test_loss_overall, label='Test loss') # test loss over all epochs
    plt.title('Loss function')
    plt.show()
    
    return train_loss_overall, test_loss_overall

In [12]:
class BetterNN_hard(nn.Module):
    """
    This is a better neural network model ready to use feature vector
    """
    def __init__(self, input_size=40, output_size=18):
        super(BetterNN_hard, self).__init__()
        
        # use relu as activation function, cause any other is not suitable for this task
        self.activation = nn.ReLU()
        
        # First linear layer should take conv_output_size as input
        self.fc1 = nn.Linear(input_size, 200)
        
        # the hidden part of linear layers
        self.fc2 = nn.Linear(200, 500)  # Change input size according to fc1 output
        self.fc3 = nn.Linear(500, 1000)
        self.fc4 = nn.Linear(1000, 2000)
        self.fc5 = nn.Linear(2000, 2000)
        
        # Conv1d layer
        self.conv1d_kernel10 = nn.Conv1d(in_channels=1, out_channels=1, kernel_size=10)
        
        # Calculate the size after the Conv1d layer
        # Assuming input_size is the length of the sequence and the input is of shape (batch_size, 1, input_size)
        
        self.conv1d_kernel5 = nn.Conv1d(in_channels=1, out_channels=1, kernel_size=5)
        
        conv_output_size = (1991 - 5 + 1)   # since linear before convolutional has output of 2000,
                                            # kernel_size=5 and stride=1, padding=0
        
        self.fc6 = nn.Linear(conv_output_size, 1000)
        self.fc7 = nn.Linear(1000, 500)
        
        # The output layer for classification
        self.fc8 = nn.Linear(500, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.activation(x)
        x = self.fc2(x)
        x = self.activation(x)
        x = self.fc3(x)
        x = self.activation(x)
        x = self.fc4(x)
        x = self.activation(x)
        x = self.fc5(x)
        x = self.activation(x)
        
        x = x.unsqueeze(1)  # add a channel dimension: (batch_size, 1, input_size)
        x = self.conv1d_kernel10(x)  # convolutional layer 1D, kernel size 10
        x = self.activation(x)
        x = self.conv1d_kernel5(x) # convolutional layer 1D, kernel size 5
        x = self.activation(x)
        x = x.squeeze(1)     # Remove the channel dimension after conv: (batch_size, conv_output_size)
        
        x = self.fc6(x)
        x = self.activation(x)
        x = self.fc7(x)
        x = self.activation(x)
        x = self.fc8(x)
        x = self.activation(x)
        
        return x
   


### Train part

In [5]:
# load data
TRAIN_IN="data/train_in.csv"
TRAIN_OUT="data/train_out.csv"
# TRAIN_IN='data/sample_in.csv'
# TRAIN_OUT='data/sample_out.csv'
data_train = atc_dataloader.ATCDataset_v2(TRAIN_IN, TRAIN_OUT)

In [6]:
TEST_IN="data/test_in.csv"
TEST_OUT="data/test_out.csv"
# TEST_IN='data/sample_in.csv'
# TEST_OUT='data/sample_out.csv'
data_test = atc_dataloader.ATCDataset_v2(TEST_IN, TEST_OUT)

In [13]:
model = BetterNN_hard().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.0001, weight_decay=0.005)
criterion = PredictionLoss_BOX_Wise().to(device)
dl_train = DataLoader(data_train, batch_size=32, shuffle=True)  
dl_test = DataLoader(data_test, batch_size=32, shuffle=False)

In [11]:
train_loss_hard, test_loss_hard = train_model(model, device, dl_train, dl_test, criterion, optimizer, epochs=100)
torch.save(model.state_dict(), f'models/model_betterNNhard{time.time_ns()}.pth') # it will be saved, ...but just in case (rly doesnt make much sense)

  batch_in_vectors = torch.tensor(batch_in_vectors, dtype=torch.float32, requires_grad=True).to(device)
  batch_out_vectors = torch.tensor(batch_out_vectors, dtype=torch.float32).to(device)
Epoch 1/100:   0%|          | 0/3768 [00:00<?, ?batch/s]


RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x1987 and 1996x1000)