In [None]:
import torch
from torchvision.datasets import ImageFolder
from torch.utils.data import random_split
from torchvision.transforms import v2
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import math
import time
import os
import glob
import seaborn as sn
import pandas as pd
import mediapipe as mp

from dataProcess import getDataLoader
from dataProcess import plot_images
from modelProcess import count_params
from modelProcess import modelTrain
from modelEvaluation import plotCurve
from modelEvaluation import applyModeltoTest

In [None]:
# set up the device
if torch.cuda.is_available():
    device = torch.device('cuda')          
    print(f'Using GPU: {torch.cuda.get_device_name(0)}')
elif torch.backends.mps.is_built():
    device = 'mps' 
    print(f'using {device}')
else:
    device = torch.device('cpu')
    print(f'using {device}')

In [None]:
# function to process images by mediapipe
def process_images(input_folder, output_folder):
    # create mediapipe model
    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands(static_image_mode=True, 
                           max_num_hands=1, 
                           min_detection_confidence=0.5)
    
    # go through each subfolder
    for class_folder in os.listdir(input_folder):
        input_class_path = os.path.join(input_folder, class_folder)
        output_class_path = os.path.join(output_folder, class_folder)

        # create output subfolder
        if not os.path.exists(output_class_path):
            os.makedirs(output_class_path)

        for image_path in glob.glob(os.path.join(input_class_path, '*.jpg')):
            # read and convert the image to rgb
            image = Image.open(image_path)
            image_rgb = np.array(image.convert('RGB'))

            # process through mediapipe model
            results = hands.process(image_rgb)
            if results.multi_hand_landmarks:
                # drop the marks on image
                for hand_landmarks in results.multi_hand_landmarks:
                    mp.solutions.drawing_utils.draw_landmarks(image_rgb, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # save the output image
            image_mp = Image.fromarray(image_rgb)
            base_name = os.path.basename(image_path)
            image_mp.save(os.path.join(output_class_path, base_name))

In [None]:
input_folder = 'data/asl_alphabet_train'
output_folder = 'data/asl_alphabet_train_mp'
process_images(input_folder, output_folder)

In [None]:
folder = 'data/asl_alphabet_train'
loader_train, loader_val, loader_test=getDataLoader(folder,batch_size=32)

In [None]:
plot_images(loader_train,16)

In [None]:
plot_images(loader_test,16)

In [None]:
class improvedCNN(nn.Module):
    def __init__(self):
        super(improvedCNN, self).__init__()
        
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        
        self.pool1 = nn.MaxPool2d(kernel_size=2)
        self.pool2 = nn.MaxPool2d(kernel_size=5)
        
        self.fc1 = nn.Linear(32 * 20 * 20, 512)
        self.fc2 = nn.Linear(512, 29)
        
        self.flatten = nn.Flatten()
        
    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        
        x = self.flatten(x)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        
        return x

In [None]:
model = improvedCNN()
count_params(model)

In [None]:
num_epochs=20
losses_train, accs_train, losses_val, accs_val, train_time=modelTrain(model=model,learning_rate=0.00015,num_epochs=num_epochs,loader_train=loader_train,loader_val=loader_val,device=device)

In [None]:
plotCurve(num_epochs, losses_train, accs_train, losses_val, accs_val)

In [None]:
print(applyModeltoTest(model=model,loader_test=loader_test,device=device))