<a href="https://colab.research.google.com/github/Namtk214/Project-IOT/blob/main/IOTPRJ.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Preparing

In [None]:
!mkdir data/

mkdir: cannot create directory ‘data/’: File exists


In [None]:
!gdown  1gzWOtABiVmJ38usCSDe5F9gR2tECt3zu -O data/
!gdown  15lwipssmC_K82ukRfb0uVCiDH1TZ3QCf -O data/
!gdown  1nIo1_wBmkovz-u_BCsV5c1Kbz6ZqoKwq -O data/

Downloading...
From: https://drive.google.com/uc?id=1gzWOtABiVmJ38usCSDe5F9gR2tECt3zu
To: /content/data/landmark_val.csv
100% 369k/369k [00:00<00:00, 106MB/s]
Downloading...
From: https://drive.google.com/uc?id=15lwipssmC_K82ukRfb0uVCiDH1TZ3QCf
To: /content/data/landmark_train.csv
100% 1.28M/1.28M [00:00<00:00, 123MB/s]
Downloading...
From: https://drive.google.com/uc?id=1nIo1_wBmkovz-u_BCsV5c1Kbz6ZqoKwq
To: /content/data/landmark_test.csv
100% 320k/320k [00:00<00:00, 104MB/s]


In [None]:
!pip install mediapipe==0.10.18




In [None]:
!pip install torchmetrics



Preprocessing

In [None]:
import os
import cv2
import mediapipe as mp
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch import optim
import yaml
from datetime import datetime
from torchmetrics import Accuracy
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

Define neural network

In [None]:
def label_dict_from_config_file(relative_path):
    with open(relative_path,"r") as f:
       label_tag = yaml.full_load(f)["gestures"]
    return label_tag

In [None]:
# class NeuralNetwork(nn.Module):
#   def __init__(self):
#     super(NeuralNetwork, self).__init__()
#     self.flatten = nn.Flatten()
#     list_label = label_dict_from_config_file("/content/hand_gesture.yaml")
#     self.linear_relu_stack = nn.Sequential(
#         nn.Linear(63, 128),
#         nn.ReLU(),
#         nn.BatchNorm1d(128),
#         nn.Linear(128, 128),
#         nn.ReLU(),
#         nn.Dropout(p=0.4),
#         nn.Linear(128, 128),
#         nn.ReLU(),
#         nn.Dropout(p=0.4),
#         nn.Linear(128, 128),
#         nn.ReLU(),
#         nn.Dropout(p=0.6),
#         nn.Linear(128, len(list_label)),
#     )

#   def forward(self, x):
#     x = self.flatten(x)
#     logits = self.linear_relu_stack(x)
#     return logits

#   def predict(self, x, threshold=0.8):
#     logits = self(x)
#     softmax_prob = nn.softmax(dim=1)(logits)
#     chosen_ind = torch.argmax(softmax_prob, dim=1)
#     return torch.where(softmax_prob[0, chosen_ind] > threshold, chosen_ind, -1)

#   def predict_with_known_class(self, x):
#     logits = self(x)
#     softmax_prob = nn.softmax(dim=1)(logits)
#     return torch.argmax(softmax_prob, dim=1)

#   def score(self, logits):
#     softmax_prob = nn.softmax(dim=1)(logits)
#     return -torch.max(softmax_prob, dim=1)

In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        list_label = label_dict_from_config_file("hand_gesture.yaml")
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(63, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Dropout(p=0.4),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Dropout(p=0.4),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Dropout(p=0.6),
            nn.Linear(128, len(list_label)),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

    def predict(self,x,threshold=0.8):
        logits = self(x)
        softmax_prob = nn.Softmax(dim=1)(logits)
        chosen_ind = torch.argmax(softmax_prob,dim=1)
        return torch.where(softmax_prob[0,chosen_ind]>threshold,chosen_ind,-1)

    def predict_with_known_class(self,x):
        logits = self(x)
        softmax_prob = nn.Softmax(dim=1)(logits)
        return torch.argmax(softmax_prob,dim=1)

    def score(self,logits):
        return -torch.amax(logits,dim=1)

In [None]:
class HandLandmarksDetector():
  def __init__(self) -> None:
    self.mp_drawing = mp.solutions.drawing_utils
    self.mp_drawing_styles = mp.solutions.drawing_styles
    self.mp_hands = mp.solutions.hands
    self.detector = self.mp_hands.Hands(False, max_num_hands=1, min_detection_confidence=0.5)

  def detectHand(self, frame):
    hands = []
    frame = cv2.flip(frame, 1)
    annotated_image = frame.copy()
    results = self.detector.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    if results.multi_hand_landmarks is not None:
      for hand_landmarks in results.multi_hand_landmarks:
          hand = []
          self.mp_drawing.draw_landmarks(
              annotated_image,
              hand_landmarks,
              self.mp_hands.HAND_CONNECTIONS,
              self.mp_drawing_styles.get_default_hand_landmarks_style(),
              self.mp_drawing_styles.get_default_hand_connections_style())
          for landmark in hand_landmarks.landmark:
            hand.append(landmark.x)
            hand.append(landmark.y)
            hand.append(landmark.z)
          hands.append(hand)
    return hands, annotated_image


In [None]:
class CustomImageDataset(Dataset):
  def __init__(self, data_file):
      self.data = pd.read_csv(data_file)
      self.labels = torch.from_numpy(self.data.iloc[:, 0].to_numpy())

  def __len__(self):
      return len(self.data)

  def __getitem__(self, idx):
      one_hot_label = self.labels[idx]
      torch_data = torch.from_numpy(self.data.iloc[idx, 1:].to_numpy(dtype=np.float32))
      return torch_data, one_hot_label

In [None]:
class EarlyStopper:
  def __init__(self, patience=1, min_delta=0):
    self.patience = patience
    self.min_delta = min_delta
    self.counter = 0
    self.watched_metrics = np.inf

  def early_stop(self, current_value):
        if current_value < self.watched_metrics:
            self.watched_metrics = current_value
            self.counter = 0
        elif current_value > (self.watched_metrics + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False

In [None]:
def train(trainloader, val_loader, model, loss_function, erly_stopper, optimizer):
    best_vloss = 1_000_000
    timestamp = datetime.now().strftime('%d-%m %H:%M')
    for epoch in range(300):
        #training step
        model.train(True)
        running_loss = 0.0
        acc_train = Accuracy(num_classes=len(label_dict_from_config_file("/content/hand_gesture.yaml")), task='MULTICLASS')
        for batch_number, data in enumerate(trainloader):
            inputs, labels = data
            optimizer.zero_grad()
            preds = model(inputs)
            loss = loss_function(preds, labels)
            loss.backward()
            optimizer.step()
            acc_train.update(model.predict_with_known_class(inputs), labels)
            running_loss += loss.item()
        avg_loss = running_loss / len(trainloader)
        # validating step
        model.train(False)
        running_vloss = 0.0
        acc_val = Accuracy(num_classes=len(label_dict_from_config_file("/content/hand_gesture.yaml")), task='MULTICLASS')
        for i, vdata in enumerate(val_loader):
            vinputs, vlabels = vdata
            preds = model(vinputs)
            vloss = loss_function(preds, vlabels)
            running_vloss += vloss.item()
            acc_val.update(model.predict_with_known_class(vinputs), vlabels)

        # Log the running loss averaged per batch
        # for both training and validation
        print(f"Epoch {epoch}: ")
        print(f"Accuracy train:{acc_train.compute().item()}, val:{acc_val.compute().item()}")
        avg_vloss = running_vloss / len(val_loader)
        print('LOSS train {} valid {}'.format(avg_loss, avg_vloss))
        print('Training vs. Validation Loss',
                        { 'Training' : avg_loss, 'Validation' : avg_vloss },
                        epoch + 1)
        print('Training vs. Validation accuracy',
                        { 'Training' : acc_train.compute().item()
                        , 'Validation' : acc_val.compute().item() },
                        epoch + 1)

        # Track best performance and save model's state
        if avg_vloss < best_vloss:
            best_vloss = avg_vloss
            best_model_path = f'./{save_path}/model_{timestamp}_{model.__class__.__name__}_best'
            torch.save(model.state_dict(), best_model_path)
        if early_stopper.early_stop(avg_vloss):
            print(f"stopping at epoch {epoch}, minimum: {early_stopper.watched_metrics}")
            break

    model_path = f'./{save_path}/model_{timestamp}_{model.__class__.__name__}_last'
    torch.save(model.state_dict(), model_path)

    print(acc_val.compute())
    return model, best_model_path

In [None]:
DATA_FOLDER_PATH="./data/"
LIST_LABEL = label_dict_from_config_file("hand_gesture.yaml")
train_path = os.path.join(DATA_FOLDER_PATH,"landmark_train.csv")
val_path = os.path.join(DATA_FOLDER_PATH,"landmark_val.csv")
save_path = './models'
os.makedirs(save_path,exist_ok=True)

trainset = CustomImageDataset(train_path)
trainloader = torch.utils.data.DataLoader(trainset,batch_size=40,shuffle=True)

valset = CustomImageDataset(os.path.join(val_path))
val_loader = torch.utils.data.DataLoader(valset,batch_size=50, shuffle=False)

model = NeuralNetwork()
loss_function = nn.CrossEntropyLoss()
early_stopper = EarlyStopper(patience=30,min_delta=0.01)
optimizer = optim.Adam(model.parameters(),lr=0.0001)

model, best_model_path = train(trainloader, val_loader, model, loss_function, early_stopper, optimizer)

Epoch 0: 
Accuracy train:0.25613346695899963, val:0.14675767719745636
LOSS train 1.5998465281266432 valid 1.609969953695933
Training vs. Validation Loss {'Training': 1.5998465281266432, 'Validation': 1.609969953695933} 1
Training vs. Validation accuracy {'Training': 0.25613346695899963, 'Validation': 0.14675767719745636} 1
Epoch 1: 
Accuracy train:0.3002943992614746, val:0.19112628698349
LOSS train 1.580174106817979 valid 1.5895103812217712
Training vs. Validation Loss {'Training': 1.580174106817979, 'Validation': 1.5895103812217712} 2
Training vs. Validation accuracy {'Training': 0.3002943992614746, 'Validation': 0.19112628698349} 2
Epoch 2: 
Accuracy train:0.36113837361335754, val:0.24573378264904022
LOSS train 1.5630197525024414 valid 1.5659319559733074
Training vs. Validation Loss {'Training': 1.5630197525024414, 'Validation': 1.5659319559733074} 3
Training vs. Validation accuracy {'Training': 0.36113837361335754, 'Validation': 0.24573378264904022} 3
Epoch 3: 
Accuracy train:0.3827