In [96]:
! pip install torchmetrics
! pip install mediapipe==0.10.18

  pid, fd = os.forkpty()




In [97]:
import os
import cv2
import yaml
import torch
import numpy as np
import pandas as pd
from torch import nn
import mediapipe as mp
from torch import optim
from datetime import datetime
from torchmetrics import Accuracy
from torch.utils.data import Dataset

In [98]:
def label_dict_from_config_file(relative_path):
    with open(relative_path,"r") as f:
       label_tag = yaml.full_load(f)["gestures"]
    return label_tag

In [99]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [100]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.list_label = label_dict_from_config_file("/kaggle/input/data-hand/data/hand_gesture.yaml")

        '''Hoàn thành đoạn code để xây dựng một model gồm có 4 hidden layer,
            lần lượng input và output là (63, 128), (128, 128), (128, 128), (128, 128).
            Layer đầu tiên được theo sau bổi một Relu và Batchnorm1d.
            Layer thứ 2, 3, và 4 được theo sau bỏi Relu và Dropout với rate lần lượt là 0.4, 0.4, 0.6.
            Output layer có nhịêm vụ phân loại với input là 128 và output là số lượng class cử chỉ
        '''
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(63, 128), 
            nn.ReLU(),              
            nn.BatchNorm1d(128),    
            nn.Linear(128, 128),   
            nn.ReLU(),         
            nn.Dropout(0.4),      
            nn.Linear(128, 128),     
            nn.ReLU(),               
            nn.Dropout(0.4),         
            nn.Linear(128, 128),     
            nn.ReLU(),               
            nn.Dropout(0.6),        
            nn.Linear(128, len(self.list_label))
        )
    def forward(self, x):

        ''' Hoàn thành code để thực hiện forward dự đoán cử chỉ với input x.
        Thực hiệnt flatten x
        Pass x vừa flatten vào linear_relu_stack
        Return  logits (outputs từ layer cuối cùng)
        '''
        x = self.flatten(x)
        return self.linear_relu_stack(x)

    def predict(self,x,threshold=0.8):
        logits = self(x)
        softmax_prob = nn.Softmax(dim=1)(logits)
        chosen_ind = torch.argmax(softmax_prob,dim=1)
        # xác suát thấp hơn ngưỡng thì kh đủ tin cậy và gán nó = -1
        return torch.where(softmax_prob[0,chosen_ind]>threshold,chosen_ind,-1)

    def predict_with_known_class(self,x):
        logits = self(x)
        softmax_prob = nn.Softmax(dim=1)(logits)
        return torch.argmax(softmax_prob,dim=1)

    def score(self,logits):
        return -torch.amax(logits,dim=1)

In [101]:
class HandLandmarksDetector():
    def __init__(self) -> None:
        self.mp_drawing = mp.solutions.drawing_utils
        self.mp_drawing_styles = mp.solutions.drawing_styles
        self.mp_hands = mp.solutions.hands
        self.detector = self.mp_hands.Hands(
            False, max_num_hands=1, min_detection_confidence=0.5)

    def detect_hand(self, frame):
        hands = []
        frame = cv2.flip(frame, 1)
        annotated_image = frame.copy()
        results = self.detector.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        if results.multi_hand_landmarks is not None:
            for hand_landmarks in results.multi_hand_landmarks:
                hand = []
                self.mp_drawing.draw_landmarks(
                    annotated_image,
                    hand_landmarks,
                    self.mp_hands.HAND_CONNECTIONS,
                    self.mp_drawing_styles.get_default_hand_landmarks_style(),
                    self.mp_drawing_styles.get_default_hand_connections_style())
                for landmark in hand_landmarks.landmark:
                    x, y, z = landmark.x, landmark.y, landmark.z
                    hand.extend([x, y, z])
            hands.append(hand)
        return hands, annotated_image

In [102]:
class CustomImageDataset(Dataset):
    def __init__(self, data_file):
        self.data = pd.read_csv(data_file)
        # chuyển numpy thành tensor
        self.labels = torch.from_numpy(self.data.iloc[:, 0].to_numpy())

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        one_hot_label = self.labels[idx]
        torch_data = torch.from_numpy(
            self.data.iloc[idx, 1:].to_numpy(dtype=np.float32))
        return torch_data, one_hot_label

In [103]:
class EarlyStopper:
    def __init__(self, patience=1, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.watched_metrics = np.inf

    def early_stop(self, current_value):
        if current_value < self.watched_metrics:
            self.watched_metrics = current_value
            self.counter = 0
        elif current_value > (self.watched_metrics + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False

In [104]:
def train(trainloader, val_loader, model, loss_function, early_stopper, optimizer):
    # add auroc score
    best_vloss = 1_000_000
    LIST_LABEL = model.list_label
    timestamp = datetime.now().strftime('%d-%m %H:%M')
    for epoch in range(300):
        #training step
        model.train(True)
        running_loss = 0.0
        acc_train = Accuracy(num_classes=len(LIST_LABEL), task='MULTICLASS').to(device)
        for batch_number,data in enumerate(trainloader):
            inputs = data[0].to(device)
            labels = data[1].to(device)
            ################## Your Code Here ################## Q9
            ''' Hoàn thành code để thực hiện reset gradients và dự đoán class cử
            chỉ của inputs
            '''
            optimizer.zero_grad()
            preds = model.forward(inputs)
            ####################################################

            ################## Your Code Here ################## Q10
            ''' Hoàn thành code để thực hiện tính loss dưa vào kết quả dự đoán
            và labels, sau đó thực hiện backwward và update parameters thông qua
            optimizer
            '''
            loss = loss_function(preds, labels)
            loss.backward()
            optimizer.step()

            ####################################################
            acc_train.update(model.predict_with_known_class(inputs), labels)
            running_loss += loss.item()
        avg_loss = running_loss / len(trainloader)
        # validating step
        model.train(False)
        running_vloss = 0.0
        acc_val = Accuracy(num_classes=len(LIST_LABEL), task='MULTICLASS').to(device)
        for i, vdata in enumerate(val_loader):
            vinputs = vdata[0].to(device)
            vlabels = vdata[1].to(device)
            preds = model(vinputs)
            vloss = loss_function(preds, vlabels)
            running_vloss += vloss.item()
            acc_val.update(model.predict_with_known_class(vinputs), vlabels)

        # Log the running loss averaged per batch
        # for both training and validation
        print(f"Epoch {epoch}: ")
        print(f"Accuracy train:{acc_train.compute().item()}, val:{acc_val.compute().item()}")
        avg_vloss = running_vloss / len(val_loader)
        print('LOSS train {} valid {}'.format(avg_loss, avg_vloss))
        print('Training vs. Validation Loss',
                        {'Training' : avg_loss, 'Validation' : avg_vloss},
                        epoch + 1)
        print('Training vs. Validation accuracy',
                        {'Training' : acc_train.compute().item()
                        ,'Validation' : acc_val.compute().item()},
                        epoch + 1)

        # Track best performance, and save the model's state
        if avg_vloss < best_vloss:
            best_vloss = avg_vloss
            best_model_path = f'/kaggle/working/model_{timestamp}_{model.__class__.__name__}_best'
            torch.save(model.state_dict(), best_model_path)

        if early_stopper.early_stop(avg_vloss):
            ################## Your Code Here ################## Q5
            ''' Hoàn thành đoạn code bên dướ để  print ra epoch hiện tại và
            minimum watched metric và thoát loop
            '''
            print (f"stopping at epoch {epoch}, minimum : {early_stopper.watched_metrics}")
            break
            ####################################################



    model_path = f'/kaggle/working/model_{timestamp}_{model.__class__.__name__}_last'
    torch.save(model.state_dict(), model_path)

    print(acc_val.compute())
    return model, best_model_path

In [105]:
data_folder_path = "/kaggle/input/data-hand/data/data2"
trainset = CustomImageDataset(os.path.join(
    data_folder_path, "landmark_test.csv"))
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=40, shuffle=True, num_workers=2)

testset = CustomImageDataset(os.path.join(
    data_folder_path, "landmark_train.csv"))
test_loader = torch.utils.data.DataLoader(
    testset, batch_size=20, shuffle=False, num_workers=2)

valset = CustomImageDataset(os.path.join(
    data_folder_path, "landmark_val.csv"))
val_loader = torch.utils.data.DataLoader(
    valset, batch_size=50, shuffle=False, num_workers=2)

In [106]:
model = NeuralNetwork().to(device)
loss_function = nn.CrossEntropyLoss()
early_stopper = EarlyStopper(patience=30, min_delta=0.01)
optimizer = optim.Adam(model.parameters(), lr=0.0001)

model, best_model_path = train(trainloader, val_loader, model, loss_function, early_stopper, optimizer)

  self.pid = os.fork()
  self.pid = os.fork()


Epoch 0: 
Accuracy train:0.2166212499141693, val:0.17485029995441437
LOSS train 1.6035923581374318 valid 1.6116308780277477
Training vs. Validation Loss {'Training': 1.6035923581374318, 'Validation': 1.6116308780277477} 1
Training vs. Validation accuracy {'Training': 0.2166212499141693, 'Validation': 0.17485029995441437} 1
Epoch 1: 
Accuracy train:0.3351498544216156, val:0.44610777497291565
LOSS train 1.5854596150548834 valid 1.5723073587698095
Training vs. Validation Loss {'Training': 1.5854596150548834, 'Validation': 1.5723073587698095} 2
Training vs. Validation accuracy {'Training': 0.3351498544216156, 'Validation': 0.44610777497291565} 2
Epoch 2: 
Accuracy train:0.36376020312309265, val:0.6928143501281738
LOSS train 1.5654020309448242 valid 1.5249590452979593
Training vs. Validation Loss {'Training': 1.5654020309448242, 'Validation': 1.5249590452979593} 3
Training vs. Validation accuracy {'Training': 0.36376020312309265, 'Validation': 0.6928143501281738} 3
Epoch 3: 
Accuracy train:

In [114]:
list_label= label_dict_from_config_file("/kaggle/input/data-hand/data/hand_gesture.yaml")
acc_test = Accuracy(num_classes=len(list_label), task='MULTICLASS').to(device)

network = NeuralNetwork().to(device)
network.load_state_dict(torch.load(best_model_path, weights_only=False))

for test_input, test_label in test_loader:
    test_input = test_input.to(device)
    test_label = test_label.to(device)
    preds = network(test_input)
    acc_test.update(model.predict_with_known_class(test_input), test_label)

print(network.__class__.__name__)
print(f"Accuracy of model:{acc_test.compute().item()}")
print("========================================================================")

NeuralNetwork
Accuracy of model:0.9702467322349548
