<a href="https://colab.research.google.com/github/AmitAaranya/ASL-Recognition-with-Deep-Learning-PyTorch/blob/main/google-colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Load the ASL Image Data
**URL:** https://www.kaggle.com/datasets/lexset/synthetic-asl-alphabet


In [None]:
!echo "{\"username\":\"amit\",\"key\":\"e88\"}" > /root/.kaggle/kaggle.json

In [5]:
!kaggle datasets download -d lexset/synthetic-asl-alphabet

Downloading synthetic-asl-alphabet.zip to /content
100% 6.58G/6.58G [05:19<00:00, 25.8MB/s]
100% 6.58G/6.58G [05:19<00:00, 22.1MB/s]


In [15]:
!mkdir datasets

In [None]:
!unzip synthetic-asl-alphabet.zip -d ./datasets/

# Clone GitHub repo
**URL:** https://github.com/AmitAaranya/ASL-Recognition-with-Deep-Learning-PyTorch

In [19]:
!git clone https://github.com/AmitAaranya/ASL-Recognition-with-Deep-Learning-PyTorch.git ./code/

Cloning into './code'...
remote: Enumerating objects: 32, done.[K
remote: Counting objects: 100% (32/32), done.[K
remote: Compressing objects: 100% (20/20), done.[K
remote: Total 32 (delta 7), reused 24 (delta 5), pack-reused 0[K
Receiving objects: 100% (32/32), 808.19 KiB | 1.64 MiB/s, done.
Resolving deltas: 100% (7/7), done.


In [20]:
!git --git-dir=./code/.git pull

Already up to date.


In [38]:
## Add the folder to system path, so we can import file directly
import sys
sys.path.append('/content/code')

# Exploratory Data Analysis(EDA)

In [18]:
# Imports
import torch
import numpy as np

## Load Data

In [24]:
from data_loader import load_datasets

size = 100
x_train, y_train = load_datasets(size = size)
x_test,  y_test = load_datasets(train_or_test_folder="Test_Alphabet",size = size)

print(f"Train dataset count: {len(x_train)} \nTest dataset count: {len(x_test)} ")

Train dataset count: 24300 
Test dataset count: 2700 


In [25]:
from data_loader import alphabet_to_num

def one_hot_coding(unique_labels, data):
    y_oh = torch.zeros(len(data),unique_labels)
    for i, class_word in enumerate(data):
        index = alphabet_to_num(class_word)
        y_oh[i][index] = 1

    return y_oh

y_train_oh = one_hot_coding(len(np.unique(y_train)),y_train)
y_test_oh = one_hot_coding(len(np.unique(y_train)),y_test)

In [26]:
x_train[0].shape,y_train.shape

((3, 50, 50), (24300,))

In [36]:
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.RMSprop(model.parameters(),lr=0.0005)
batch_size = 32

train_loader = DataLoader(TensorDataset(torch.tensor(x_train),torch.tensor(y_train_oh)),
                          batch_size=batch_size, shuffle=True)
test_loader = DataLoader(TensorDataset(torch.tensor(x_test),torch.tensor(y_test_oh)),
                          batch_size=batch_size)

  train_loader = DataLoader(TensorDataset(torch.tensor(x_train),torch.tensor(y_train_oh)),
  test_loader = DataLoader(TensorDataset(torch.tensor(x_test),torch.tensor(y_test_oh)),


## Define CNN model

In [34]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        # First convolutional layer
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=27, kernel_size=5, padding=2)
        # Max pooling layer
        self.pool = nn.MaxPool2d(kernel_size=4, stride=4)
        # Second convolutional layer
        self.conv2 = nn.Conv2d(in_channels=27, out_channels=9, kernel_size=5, padding=2)
        # Another max pooling layer
        self.pool2 = nn.MaxPool2d(kernel_size=4, stride=4)
        # Fully connected layer
        self.fc1 = nn.Linear(9 * 3 * 3, 27)  # 3x3 is the size after two max pooling layers

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        x = x.view(-1, 9 * 3 * 3)  # Flatten the output of conv2 layer
        x = self.fc1(x)
        return x

# Instantiate the model
model = ConvNet()

# Print the model architecture
print(model)

ConvNet(
  (conv1): Conv2d(3, 27, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool): MaxPool2d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(27, 9, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool2): MaxPool2d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=81, out_features=27, bias=True)
)


In [37]:
def train_model(model,criterion,optimizer,num_epochs=50):
    for epoch in range(num_epochs):
        running_loss = 0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {epoch_loss:.4f}")


train_model(model=model, criterion=criterion, optimizer=optimizer)

Epoch [1/50], Train Loss: 3.1756
Epoch [2/50], Train Loss: 2.6273
Epoch [3/50], Train Loss: 2.2609
Epoch [4/50], Train Loss: 2.0347
Epoch [5/50], Train Loss: 1.8718
Epoch [6/50], Train Loss: 1.7439
Epoch [7/50], Train Loss: 1.6357
Epoch [8/50], Train Loss: 1.5461
Epoch [9/50], Train Loss: 1.4670
Epoch [10/50], Train Loss: 1.4013
Epoch [11/50], Train Loss: 1.3381
Epoch [12/50], Train Loss: 1.2837
Epoch [13/50], Train Loss: 1.2336
Epoch [14/50], Train Loss: 1.1888
Epoch [15/50], Train Loss: 1.1478
Epoch [16/50], Train Loss: 1.1082
Epoch [17/50], Train Loss: 1.0743
Epoch [18/50], Train Loss: 1.0401
Epoch [19/50], Train Loss: 1.0107
Epoch [20/50], Train Loss: 0.9828
Epoch [21/50], Train Loss: 0.9591
Epoch [22/50], Train Loss: 0.9359
Epoch [23/50], Train Loss: 0.9133
Epoch [24/50], Train Loss: 0.8913
Epoch [25/50], Train Loss: 0.8732
Epoch [26/50], Train Loss: 0.8534
Epoch [27/50], Train Loss: 0.8358
Epoch [28/50], Train Loss: 0.8199
Epoch [29/50], Train Loss: 0.8041
Epoch [30/50], Train Lo

In [39]:
def test_model(model, test_loader):
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            _, labels_indices = torch.max(labels, 1)  # Convert one-hot labels to indices
            total += labels.size(0)
            correct += (predicted == labels_indices).sum().item()
    accuracy = correct / total
    print(f"Total accurate predictions: {correct} out of {total}")
    print(f"Accuracy on test set: {accuracy:.4f}")

test_model(model=model,test_loader=test_loader)

Total accurate predictions: 2175 out of 2700
Accuracy on test set: 0.8056
