<h3>Imports</h3>

In [None]:
import torch
from torch import nn
import numpy as np
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torchvision.transforms.functional as tr
import torchvision.transforms.v2.functional as trv2
from torchvision.transforms import RandomRotation
from torchvision.transforms.functional import pil_to_tensor
import pandas as pd
import torch.nn.functional as F
from torchvision.transforms import InterpolationMode
import matplotlib.pyplot as plt
import math
import time
import os
import albumentations as A
import cv2
import random
import hickle

from torchvision.models import convnext_tiny, ConvNeXt_Tiny_Weights, resnet50, ResNet50_Weights, regnet_x_400mf, RegNet_X_400MF_Weights

<h3>Global Variables</h3>

In [None]:
input_file_dir = "/home/tyler/Documents/Data/PinData/PinVideosRaw/data.csv"

In [None]:
best_model_path = "./best_model/best-parameters.pt"
last_epoch_model_path = "./best_model/last-epoch-correction-parameters.pt"
best_loss_path = "./best_model/best-model-loss.txt"
os.makedirs("./best_model/", exist_ok=True)

In [None]:
num_of_inputs = 5
num_of_outputs = 4
num_of_frames = 31

hidden_size = 512
num_of_rnn_layers = 1
embedding_size = 1000

In [None]:
IMAGE_WIDTH = 96
IMAGE_HEIGHT = 128

In [None]:
batch_size = 8

In [None]:
torch.manual_seed(12)

In [None]:
device = ("cuda" if torch.cuda.is_available() else "cpu" )
print(f"Using {device} device")

In [None]:
TRAIN_LOSS_KEY = "Training Loss"
TRAIN_ACCURACY_KEY = "Training Accuracy"

VAL_LOSS_KEY = "Validation Loss"
VAL_ACCURACY_KEY = "Validation Accuracy"

TEST_LOSS_KEY = "Testing Loss"
TEST_ACCURACY_KEY = "Testing Accuracy"

In [None]:
transform = A.Compose([
    A.LongestMaxSize(IMAGE_HEIGHT, always_apply=True),
    # A.GaussianBlur(),
    # A.ColorJitter(),
    # A.GaussNoise(),
    A.Normalize(always_apply=True),
    A.ToFloat(always_apply=True)
])

<h3>Aux Functions</h3>

In [None]:
def get_best_loss(current_loss: float, model):
    
    try:
        file = open(best_loss_path, "r+")
    except:
        file = open(best_loss_path, "w+")
        file.write("1")
        file.close()
        get_best_loss(current_loss, model)
        return
        
    line = file.readline()
    best_loss = float(line.replace("\n", ""))

    got_new_loss = False

    if current_loss < best_loss:
        file.seek(0)
        file.write(str(current_loss))
        file.truncate()

        torch.save(model.state_dict(), best_model_path)
        print(f"New best loss!")
        got_new_loss = True

    file.close()

    return got_new_loss

In [None]:
def get_corrects_and_size(pred, y):
    
    pred = torch.argmax(pred, dim=1)
    y = torch.argmax(y, dim=1)
    corrects = torch.eq(pred,y).int()
    correct = corrects.sum().item()
    size = corrects.numel()

    return correct, size

In [None]:
def get_line(list: list):
    line = [str(x) for x in list]
    line = ','.join(line)
    line += "\n"
    line = line.replace("]", "").replace("[", "").replace(" ", "")

    return line

In [None]:
def get_dataloader(dataset):
    train_size = int(0.8 * len(dataset))
    val_size = int(0.1 * len(dataset))
    test_size = len(dataset) - (train_size + val_size) 
    train_dataset, test_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, test_size, val_size])
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
    val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

    return train_dataloader, test_dataloader, val_dataloader

In [None]:
def show_and_save_graph(metrics:dict, ylabel:str,  key_one:str, key_two:str=None):
    
    metric_one = metrics[key_one]
    metric_two = metrics[key_two]

    plt.plot([i + 1 for i in range(len(metric_one))] ,metric_one)
    plt.plot([i + 1 for i in range(len(metric_two))] ,metric_two)

    if key_two is not None:

        title = f'{key_one} and {key_two} vs Epoch'

        plt.xlabel('Epoch')
        plt.ylabel(ylabel)
        plt.title(title)
        plt.legend([key_one, key_two])

    else:
        title = f'{key_one} vs Epoch'

        plt.xlabel('Epoch')
        plt.ylabel(ylabel)
        plt.title(title)
        plt.legend([key_one])
    
    plt.grid()

    os.makedirs("./Figures/", exist_ok=True) 
    plt.savefig(f"./Figures/{title}_{int(time.time())}")

    plt.show()

In [None]:
def init_metrics(*args):

    metrics = {}

    for key in args:
        metrics[key] = []
    
    return metrics

In [None]:
def h_flip_img(img):
    global h_flip

    if h_flip >= 0.5:
        cv2.flip(img, 0, img)

In [None]:
def v_flip_img(img):
    global v_flip

    if v_flip >= 0.5:
        cv2.flip(img, 1, img)

In [None]:
def preprocess_image(image):
    global degree

    h_flip_img(image)
    v_flip_img(image)

    image = np.transpose(image, [1,2,0])

    image = A.rotate(image, degree)
    
    image = transform(image=image)["image"]

    image = np.transpose(image, [2,0,1])

    image = np.expand_dims(image, 0)

    return image

In [None]:
def preprocess_frames(frames, mask):

    for i, frame in enumerate(frames):

        if mask[i] == 0:
            break
        
        frames[i] = preprocess_image(frame.squeeze())

<h3>Custom Dataset Init</h3>

In [None]:
class CustomDataSet(Dataset):
    def __init__(self, csv):
        df = pd.read_csv(csv, header=0, dtype=str)
        self.df = df
        self.predictors = df["file_name"].to_numpy()
        self.pins = df["pin"].to_numpy()
        

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        if idx >= self.__len__():
            raise StopIteration
        
        global h_flip
        global v_flip
        global degree
        
        degree = random.randrange(-360,360)
        h_flip = random.random()
        v_flip = random.random()
        
        file_name = self.predictors[idx]
        predictors = hickle.load(file_name)["image"]
        mask = hickle.load(file_name)["mask"]

        preprocess_frames(predictors, mask)

        predictors = torch.tensor(predictors, device=device)

        pin = str(self.pins[idx])

        target_pin = [pin[0], pin[1], pin[2], pin[3]]
        target_pin = [int(x) for x in target_pin]

        target = torch.zeros((4,10), dtype=torch.float, device=device)

        target[0][target_pin[0]] = 1
        target[1][target_pin[1]] = 1
        target[2][target_pin[2]] = 1
        target[3][target_pin[3]] = 1

        mask = torch.tensor(mask, device=device)

        sample = (predictors, target, mask)

        return sample

In [None]:
custom_dataset = CustomDataSet("/home/tyler/Documents/Data/PinData/PinVideosRaw/data.csv")
train_dataloader, test_dataloader, val_dataloader = get_dataloader(custom_dataset)
# print(custom_dataset[0])

In [None]:
for i, (predictors, target1, mask) in enumerate(custom_dataset):
    print(f'i : {i} input_file: {predictors}{predictors.shape}')
    print(f'target1: {target1}{target1.shape}')
    print(f'mask: {mask}{mask.shape}')
    break

<h3>Neural Net Classes</h3>

In [None]:
class SkipConnection(nn.Module):
    def __init__(self, in_channels, out_channels):

        super().__init__()

        self.conv2d = nn.Conv3d(in_channels, out_channels, 1, padding=0)
    
    def forward(self, x):
        x = self.conv2d(x)
        
        return x

In [None]:
class ConvBlock(nn.Module):
    def __init__(self, in_features, out_features):
        super().__init__()

        self.cnn1 = nn.Sequential(
                nn.Conv3d(in_features, out_features, 3, padding=1),
                nn.ELU(),
                nn.BatchNorm3d(out_features)
        )
        self.cnn2 = nn.Sequential(
                nn.Conv3d(out_features, out_features, 3, padding=1),
                nn.ELU(),
                nn.BatchNorm3d(out_features),
        )

        self.skip1 = SkipConnection(in_features, out_features)
        self.pool = nn.MaxPool3d(2)
    
    def forward(self, x1):

        x2 = self.cnn1(x1)
        x2 = self.cnn2(x2)

        x1 = self.skip1(x1)

        x2 += x1

        x2 = F.elu(x2)
        x2 = self.pool(x2)

        return x2

In [None]:
class Encoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.embedding = regnet_x_400mf(weights=RegNet_X_400MF_Weights.DEFAULT)

        self.gru = nn.GRU(embedding_size, hidden_size, num_of_rnn_layers, batch_first=True, bidirectional=True)
        self.fc1 = nn.Linear(hidden_size, 16)
        self.fc2 = nn.Linear(16, num_of_outputs)

        # for param in self.embedding.parameters():
        #     param.requires_grad = False

        # self.embedding.eval()

        
    def forward(self, x1, mask):


        encoded_sequence = torch.zeros((x1.shape[0], hidden_size*2), device=device)

        for i, batch in enumerate(x1):

            hn = torch.zeros(num_of_rnn_layers*2, hidden_size, requires_grad=True, device=device)

            for k, frame in enumerate(batch):

                if mask[i,k] == 0:
                    break

                image_vector = self.embedding(frame.unsqueeze(0))

                x1, hn = self.gru(image_vector, hn)

            encoded_sequence[i] = torch.concat([hn[0], hn[1]])

        return encoded_sequence

In [None]:
class Decoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.gru = nn.GRU(hidden_size*2, hidden_size*2, num_of_rnn_layers, batch_first=True)
        self.fc1 = nn.Linear(hidden_size*2, 16)
        self.fc2 = nn.Linear(16, 10)
        
    def forward(self, x1):

        hn = torch.zeros(num_of_rnn_layers, hidden_size*2, requires_grad=True, device=device)

        outputs = torch.zeros((x1.shape[0], 4, 10), device=device)

        for i, batch in enumerate(x1):

            batch = batch.unsqueeze(0)

            for k in range(num_of_outputs):
                    
                batch, hn = self.gru(batch, hn)

                x2 = self.fc1(hn[0])
                x2 = F.elu(x2)
                x2 = self.fc2(x2)

                x2 = x2.unsqueeze(0)
                
                outputs[i,k] = x2.unsqueeze(0)
        
        return outputs

In [None]:
class NeuralNet(nn.Module):
    def __init__(self):
        super().__init__()


        self.encoder = Encoder()

        self.decoder = Decoder()

        
    def forward(self, x1, mask):
        
        x1 = self.encoder(x1, mask)

        x1 = self.decoder(x1)

        return x1

<h3>Model Init</h3>

In [None]:
model = NeuralNet().to(device)
pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total Params: {pytorch_total_params:,}")

In [None]:
criterion1 = nn.CrossEntropyLoss()
criterion2 = nn.CrossEntropyLoss()
criterion3 = nn.CrossEntropyLoss()
criterion4 = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(list(model.parameters()), lr=0.00001)
metrics = init_metrics(TRAIN_LOSS_KEY, TRAIN_ACCURACY_KEY, VAL_LOSS_KEY, VAL_ACCURACY_KEY)

<h3>Train and Test Init</h3>

In [None]:
def train(dataloader, optimizer):
    model.train()
    
    num_batches = len(dataloader)

    loss, correct, size = 0, 0, 0
    correct_makes, correct_misses = 0, 0
    incorrect_makes, incorrect_misses = 0, 0

    loss_history = metrics[TRAIN_LOSS_KEY]
    accuracy_history = metrics[TRAIN_ACCURACY_KEY]
    
    for X, y1, mask  in dataloader:

        pred1 = model(X, mask)

        loss = criterion1(pred1, y1)

        correct1, size1 = get_corrects_and_size(pred1, y1)

        correct += (correct1)
        size += (size1)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    loss /= num_batches
    loss_history.append(loss.item())
    
    correct /= size
    # precision = correct_makes / (correct_makes + incorrect_makes)
    # recall = correct_makes / (correct_makes + incorrect_misses)
    accuracy = 100*correct
    accuracy_history.append(accuracy)
    print(f"Train Accuracy: {(accuracy):>0.3f}%\t Train Loss: {loss:>12f}")
    # print(f"Train Loss: {loss:>12f}")
    # print(f"Train Precision: {precision}\t Train Recall: {recall}")

In [None]:
def val(dataloader):
    size = 0

    num_batches = len(dataloader)

    model.eval()
    
    loss, correct = 0, 0
    correct_makes, correct_misses = 0, 0
    incorrect_makes, incorrect_misses = 0, 0

    loss_history = metrics.get(VAL_LOSS_KEY, [])
    accuracy_history = metrics.get(VAL_ACCURACY_KEY, [])
    
    with torch.no_grad():
        for X, y1, mask in dataloader:

            pred1 = model(X, mask)

            loss = criterion1(pred1, y1)

            correct1, size1 = get_corrects_and_size(pred1, y1)

            correct += (correct1)
            size += (size1)

    loss /= num_batches
    loss_history.append(loss.item())

    correct /= size
    accuracy = 100*correct
    # precision = correct_makes / (correct_makes + incorrect_makes)
    # recall = correct_makes / (correct_makes + incorrect_misses)
    # accuracy_history.append(accuracy)
    print(f"Val Accuracy: {(accuracy):>0.3f}%\t Val Loss: {loss:>12f}")
    # print(f"Val Loss: {loss:>12f}")
    # print(f"Val Precision: {precision}\t Val Recall: {recall}")

In [None]:
def test(dataloader, loss_fn):
    size = 0

    encoder.eval()
    decoder.eval()

    num_batches = len(dataloader)
    
    loss, correct = 0, 0

    loss_history = metrics.get(TEST_LOSS_KEY, [])
    accuracy_history = metrics.get(TEST_ACCURACY_KEY, [])
    
    with torch.no_grad():
        for X, y1, y2, y3, y4, mask in dataloader:
            
            pred = encoder(X)

            pred = pred.clamp(0, 1)
            pred = torch.nan_to_num(pred)
            
            loss += loss_fn(pred, y)

            pred = torch.argmax(pred, dim=1)
            y = torch.argmax(y, dim=1)

            corrects = torch.eq(pred,y).int()

            correct += corrects.sum().item()

            size += corrects.numel()

    loss /= num_batches
    loss_history.append(loss.item())

    correct /= size
    accuracy = 100*correct
    accuracy_history.append(accuracy)
    print(f"Test Accuracy: {(accuracy):>0.3f}%\tTest Loss: {loss:>12f}")

In [None]:
def print_metrics(start, t, t_with_best_loss, best_loss):
    train_loss_hist = metrics[TRAIN_LOSS_KEY]
    val_loss_hist = metrics[VAL_LOSS_KEY]

    if t > 1:
        train_loss_dif = (train_loss_hist[-2] - train_loss_hist[-1]) * 100
        val_loss_dif = (val_loss_hist[-2] - val_loss_hist[-1]) * 100
        train_val_loss_dif = (train_loss_hist[-1] - val_loss_hist[-1]) * 100
        print()
        print(f"Train Loss Difference: {train_loss_dif:>0.4f}\t\tVal Loss Difference: {val_loss_dif:>0.4f}")
        print(f"Train Val Loss Difference: {train_val_loss_dif:>0.4f}")
        print()

    got_new_loss = get_best_loss(val_loss_hist[-1], model)

    if got_new_loss:
        t_with_best_loss = t
        best_loss = val_loss_hist[-1]
    
    t_since_best_loss = t - t_with_best_loss
    
    print(f"Epoch with best loss: {t_with_best_loss}\t\tBest Loss: {best_loss:12f}")
    print(f"Epochs Since Best Loss: {t_since_best_loss}")

    print(f"Run Time: {round((time.time() - start), 2)}s")

    print()

    return t_with_best_loss, best_loss

<h3>Training and Validation</h3>

In [None]:
t = 0
t_with_best_loss = 0
best_loss = 0

while True:
    start = time.time()
    t = t + 1

    print(f"Epoch {t}\n-------------------------------")

    train(train_dataloader, optimizer)
    val(val_dataloader)

    t_with_best_loss, best_loss = print_metrics(start, t, t_with_best_loss, best_loss)