In [1]:
import os
import datetime
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchmetrics import Accuracy
from torch.utils.data import Dataset, DataLoader
from generate_landmark_data import label_dict_from_config_file

In [2]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        list_label = label_dict_from_config_file("hand_gesture.yaml")

        self.linear_relu_stack = nn.Sequential(
            nn.Linear(63, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Dropout(0.6),
            nn.Linear(128, len(list_label))
        )

    def forward(self, x):
        x = self.flatten(x)
        x = self.linear_relu_stack(x)
        
        return x

    def predict(self,x,threshold=0.8):
        logits = self(x)
        softmax_prob = nn.Softmax(dim=1)(logits)
        chosen_ind = torch.argmax(softmax_prob,dim=1)
        
        return torch.where(softmax_prob[0,chosen_ind]>threshold,chosen_ind,-1)

    def predict_with_known_class(self,x):
        logits = self(x)
        softmax_prob = nn.Softmax(dim=1)(logits)
        
        return torch.argmax(softmax_prob,dim=1)


    def score(self,logits):
        return -torch.amax(logits,dim=1)

In [3]:
class CustomImageDataset(Dataset):
    def __init__(self, data_file):
        self.data = pd.read_csv(data_file)
        self.labels = torch.from_numpy(self.data.iloc[:,0].to_numpy())

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        one_hot_label = self.labels[idx]
        torch_data = torch.from_numpy(self.data.iloc[idx,1:].to_numpy(dtype=np.float32))
        
        return torch_data, one_hot_label

In [4]:
class EarlyStopper:
    def __init__(self, patience=1, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.watched_metrics = np.inf

    def early_stop(self, current_value):
        if current_value < self.watched_metrics:
            self.watched_metrics = current_value
            self.counter = 0
            
        elif current_value > (self.watched_metrics + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                return True
                
        return False

In [5]:
def train(trainloader, val_loader, model, loss_function, early_stopper, optimizer):
    # add auroc score
    best_vloss = 1_000_000
    timestamp = datetime.datetime.now().strftime('%d-%m %H:%M')
    for epoch in range(300):
        #training step
        model.train(True)
        running_loss = 0.0
        acc_train = Accuracy(num_classes=len(LIST_LABEL), task='MULTICLASS')
        for batch_number,data in enumerate(trainloader):
            inputs,labels = data
            
            optimizer.zero_grad()
            preds = model(inputs)

            loss = loss_function(preds, labels)
            loss.backward()
            optimizer.step()

            acc_train.update(model.predict_with_known_class(inputs), labels)
            running_loss += loss.item()
        avg_loss = running_loss / len(trainloader)
        # validating step
        model.train(False)
        running_vloss = 0.0
        acc_val = Accuracy(num_classes=len(LIST_LABEL), task='MULTICLASS')
        for i, vdata in enumerate(val_loader):
            vinputs, vlabels = vdata
            preds = model(vinputs)
            vloss = loss_function(preds, vlabels)
            running_vloss += vloss.item()
            acc_val.update(model.predict_with_known_class(vinputs), vlabels)

        # Log the running loss averaged per batch
        # for both training and validation
        print(f"Epoch {epoch}: ")
        print(f"Accuracy train:{acc_train.compute().item()}, val:{acc_val.compute().item()}")
        
        avg_vloss = running_vloss / len(val_loader)
        
        print('LOSS train {} valid {}'.format(avg_loss, avg_vloss))
        print('Training vs. Validation Loss',
                        { 'Training' : avg_loss, 'Validation' : avg_vloss },
                        epoch + 1)
        print('Training vs. Validation accuracy',
                        { 'Training' : acc_train.compute().item()
                        , 'Validation' : acc_val.compute().item() },
                        epoch + 1)

        # Track best performance, and save the model's state
        if avg_vloss < best_vloss:
            best_vloss = avg_vloss
            best_model_path = f'./{save_path}/model_{timestamp}_{model.__class__.__name__}_best'
            torch.save(model.state_dict(), best_model_path)

        if early_stopper.early_stop(avg_vloss):
            print(f'Stopping at epoch {epoch}, minimum {early_stopper.watched_metrics}')
            break

    model_path = f'./{save_path}/model_{timestamp}_{model.__class__.__name__}_last'
    torch.save(model.state_dict(), model_path)

    print(acc_val.compute())
    return model, best_model_path

In [6]:
DATA_FOLDER_PATH="./data/"
LIST_LABEL = label_dict_from_config_file("hand_gesture.yaml")
train_path = os.path.join(DATA_FOLDER_PATH,"landmark_train.csv")
val_path = os.path.join(DATA_FOLDER_PATH,"landmark_val.csv")
save_path = './models'
os.makedirs(save_path,exist_ok=True)

trainset = CustomImageDataset(train_path)
trainloader = DataLoader(dataset=trainset, batch_size=40, shuffle=True)

valset = CustomImageDataset(os.path.join(val_path))
val_loader = torch.utils.data.DataLoader(valset,batch_size=50, shuffle=False)

model = NeuralNetwork()
loss_function = nn.CrossEntropyLoss()
early_stopper = EarlyStopper(patience=30, min_delta=0.1)

optimizer = optim.Adam(model.parameters(), lr=0.0001)

model, best_model_path = train(trainloader, val_loader, model, loss_function, early_stopper, optimizer)

Epoch 0: 
Accuracy train:0.2590453028678894, val:0.4042988717556
LOSS train 1.5959115559796253 valid 1.5754782259464264
Training vs. Validation Loss {'Training': 1.5959115559796253, 'Validation': 1.5754782259464264} 1
Training vs. Validation accuracy {'Training': 0.2590453028678894, 'Validation': 0.4042988717556} 1
Epoch 1: 
Accuracy train:0.42079660296440125, val:0.6417604684829712
LOSS train 1.5400371407888023 valid 1.4940081000328065
Training vs. Validation Loss {'Training': 1.5400371407888023, 'Validation': 1.4940081000328065} 2
Training vs. Validation accuracy {'Training': 0.42079660296440125, 'Validation': 0.6417604684829712} 2
Epoch 2: 
Accuracy train:0.5630890727043152, val:0.6632548570632935
LOSS train 1.376372298562383 valid 1.196445307135582
Training vs. Validation Loss {'Training': 1.376372298562383, 'Validation': 1.196445307135582} 3
Training vs. Validation accuracy {'Training': 0.5630890727043152, 'Validation': 0.6632548570632935} 3
Epoch 3: 
Accuracy train:0.675889313220

In [7]:
list_label = label_dict_from_config_file("hand_gesture.yaml")
DATA_FOLDER_PATH="./data/"

testset = CustomImageDataset(os.path.join(DATA_FOLDER_PATH,"landmark_test.csv"))
test_loader = DataLoader(dataset=testset, batch_size=20, shuffle=False)

network = NeuralNetwork()
network.load_state_dict(torch.load(best_model_path, weights_only=False))

network.eval()
acc_test = Accuracy(num_classes=len(list_label), task='MULTICLASS')
for i, test_data in enumerate(test_loader):
    test_input, test_label = test_data
    preds = model(test_input)
    acc_test.update(preds, test_label)

print(network.__class__.__name__)
print(f"Accuracy of model:{acc_test.compute().item()}")
print("========================================================================")

NeuralNetwork
Accuracy of model:0.9819004535675049
