## About

**Notebook overview**

This notebook involves the preprocessing a subset of the ArSL dataset. We follow the common preprocessing techniques found in the literature for classification of sign languages.

The following are the preprocessing techniques applied to the chosen subset of the dataset.

1. **Image resizing**: The images are resized to (3, 224, 224) where 3 represents the number of channels (RGB) in the image and (224, 224) represents the 2D dimensions of each image.

2. **Normalization**: Scaling pixel values to a range, typically between 0 and 1, helps in speeding up convergence during training.

3. **Data Augmentation**: To make the model robust to various orientations and scales, augmenting the dataset with transformed images (e.g., rotations, scaling, translations, flipping) is beneficial.

## Needed libraries

In [None]:
import numpy as np
import cv2
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import os
import torch
import torch.nn as nn
import torchvision.transforms.v2 as transforms
import torchvision.models
from  torch.utils.data import DataLoader, Dataset
from albumentations.pytorch import ToTensorV2
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

## Loading data from Google drive

### Mount the drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Loading images from the drive

In [None]:
# Load data from numpy arrays
destination_folder = "/content/drive/Shared drives/Computer Vision/Data/Resized_RGB_ArSL_dataset_numpy"

# Get list of .npy files in the directory
files = [file for file in os.listdir(destination_folder)]

# Initialize lists to store image arrays and labels
image_arrays = []
labels = []

# Read each .npy file and append its numpy array and label to the lists
for file in files:
    # Extract label from file name
    label = file.split('_')[0]
    # Load numpy array
    array = np.load(os.path.join(destination_folder, file))
    # Append to lists
    image_arrays.append(array)
    labels.append(label)

# Convert lists to numpy arrays
image_arrays = np.array(image_arrays)
labels = np.array(labels)

## Image resizing

### Converting numpy arrays to images and resizing then

In [None]:
images = []
# Convert each NumPy array to an image using PIL
for array in image_arrays:
    # Convert the array to an image and resize it
    image = Image.fromarray(array).resize((224, 224))
    images.append(image)

## Data splitting, Normalization, and Data Augmentation  

### Data splitting and one hot encoding

In [None]:
train_data, test_data, train_labels, test_labels = train_test_split(images, labels, test_size=0.2, stratify=labels)

In [None]:
# Transform images to tensors

transform = transforms.ToTensor()
train_data = [transform(img) for img in train_data]
test_data = [transform(img) for img in test_data]



In [None]:
# One hot encoding

label_encoder = LabelEncoder()
train_labels = label_encoder.fit_transform(train_labels)
test_labels = label_encoder.fit_transform(test_labels)

### Data Augmentation and Normalization

In [None]:
class MyDataset(Dataset):
    def __init__(self, x, y, transforms=None):
        self.x = x
        self.y = y
        self.transforms = transforms

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        x = self.x[idx]
        y = self.y[idx]

        if self.transforms:
            x = self.transforms(x)

        return x, y

In [None]:
# Can add multiple transformations to enhance the training of the model

train_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(30),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

test_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])



In [None]:
trainset = MyDataset(train_data, train_labels, train_transforms)
trainloader_ = DataLoader(trainset, batch_size=5, shuffle=True)

testset = MyDataset(test_data, test_labels, test_transforms)
testloader_ = DataLoader(testset, batch_size=5, shuffle=False)

## Models Fine Tuning

### Functions

In [None]:
def train(model, dataloader, loss, optimizer, device, scheduler=None):
    model.train()
    acc = []
    lss_history = []
    for _ , (data, labels) in enumerate(dataloader):
        data = data.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        pred = model(data)
        lss = loss(pred, labels)

        lss.backward()
        optimizer.step()
        if scheduler is not None:
            scheduler.step()

        # acc calculations
        lss_history.append(lss.item())
        acc.append(((pred.argmax(axis = 1) == labels).type(torch.float)).mean().item())
    return np.mean(lss_history) ,np.mean(acc)

# function to validate the model
def validate(model, dataloader, device, loss_func):
    model.eval()
    loss_values = []
    acc_values = []
    with torch.no_grad():
      for _, (data, labels) in enumerate(dataloader):
        data = data.to(device)
        labels = labels.to(device)
        pred = model(data)
        loss = loss_func(pred, labels)
        loss_values.append(loss.item())
        acc_value = (pred.argmax(axis = 1) == labels).type(torch.float32)
        acc_values.append(acc_value.mean().item())
    return np.mean(loss_values), np.mean(acc_values)

In [None]:
def tune_model(epochs, model, train_dataloader, test_dataloader, loss_func, optimizer, device, scheduler=None):
    for epoch in range(epochs):
      train_loss, train_acc = train(model, train_dataloader, loss_func, optimizer, device, scheduler)
      test_loss, test_acc = validate(model, test_dataloader, device, loss_func)
      print(f"Epoch : {epoch + 1} || Train loss : {train_loss:5.3f} || Train accuracy : {train_acc:5.3f}", end="")
      print(f" || Test loss : {test_loss:5.3f} || Test accuracy : {test_acc:5.3f}")

### Models

#### Resnet18

In [None]:
model_resnet18 = torch.load("/content/drive/Shared drives/Computer Vision/Models (Before fine-tuning)/resnet18.pth" )
for name, param in model_resnet18.named_parameters():
    if "layer4" in name or "fc" in name:
        param.requires_grad = True
    else:
        param.requires_grad = False

model_resnet18.fc = nn.Linear(512, 8)
model_resnet18 = model_resnet18.to(device)
print(model_resnet18)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [None]:
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_resnet18.parameters(), lr=0.0001)

In [None]:
tune_model(10, model_resnet18, trainloader_, testloader_, loss, optimizer, device)
torch.save(model_resnet18, "/content/drive/Shared drives/Computer Vision/Trained models/resnet18.pth")

Epoch : 1 || Train loss : 0.859 || Train accuracy : 0.715 || Test loss : 0.232 || Test accuracy : 0.948
Epoch : 2 || Train loss : 0.375 || Train accuracy : 0.884 || Test loss : 0.178 || Test accuracy : 0.938
Epoch : 3 || Train loss : 0.285 || Train accuracy : 0.911 || Test loss : 0.123 || Test accuracy : 0.975
Epoch : 4 || Train loss : 0.215 || Train accuracy : 0.941 || Test loss : 0.087 || Test accuracy : 0.980
Epoch : 5 || Train loss : 0.192 || Train accuracy : 0.941 || Test loss : 0.089 || Test accuracy : 0.980
Epoch : 6 || Train loss : 0.151 || Train accuracy : 0.960 || Test loss : 0.110 || Test accuracy : 0.980
Epoch : 7 || Train loss : 0.136 || Train accuracy : 0.962 || Test loss : 0.074 || Test accuracy : 0.985
Epoch : 8 || Train loss : 0.112 || Train accuracy : 0.971 || Test loss : 0.079 || Test accuracy : 0.983
Epoch : 9 || Train loss : 0.079 || Train accuracy : 0.979 || Test loss : 0.089 || Test accuracy : 0.975
Epoch : 10 || Train loss : 0.104 || Train accuracy : 0.970 || Te

#### Resnet50

In [None]:
model_resnet50 = torch.load("/content/drive/Shared drives/Computer Vision/Models (Before fine-tuning)/resnet50.pth")

In [None]:
for name, param in model_resnet50.named_parameters():
    if "fc" in name:
        param.requires_grad = True
    else:
        param.requires_grad = False

model_resnet50.fc = nn.Linear(2048, 8)
print(model_resnet50)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [None]:
model_resnet50 = model_resnet50.to(device)

In [None]:
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_resnet50.parameters(), lr=0.0001)
# scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)
tune_model(10, model_resnet50, trainloader_, testloader_, loss, optimizer, device)
torch.save(model_resnet50, "/content/drive/Shared drives/Computer Vision/Trained models/resnet50.pth")

Epoch : 1 || Train loss : 0.405 || Train accuracy : 0.880 || Test loss : 0.446 || Test accuracy : 0.854
Epoch : 2 || Train loss : 0.407 || Train accuracy : 0.885 || Test loss : 0.397 || Test accuracy : 0.869
Epoch : 3 || Train loss : 0.419 || Train accuracy : 0.876 || Test loss : 0.484 || Test accuracy : 0.832
Epoch : 4 || Train loss : 0.412 || Train accuracy : 0.880 || Test loss : 0.440 || Test accuracy : 0.847
Epoch : 5 || Train loss : 0.407 || Train accuracy : 0.882 || Test loss : 0.420 || Test accuracy : 0.862
Epoch : 6 || Train loss : 0.398 || Train accuracy : 0.879 || Test loss : 0.438 || Test accuracy : 0.857
Epoch : 7 || Train loss : 0.402 || Train accuracy : 0.880 || Test loss : 0.423 || Test accuracy : 0.849
Epoch : 8 || Train loss : 0.424 || Train accuracy : 0.876 || Test loss : 0.455 || Test accuracy : 0.852
Epoch : 9 || Train loss : 0.410 || Train accuracy : 0.881 || Test loss : 0.444 || Test accuracy : 0.854
Epoch : 10 || Train loss : 0.396 || Train accuracy : 0.888 || Te

#### MobileNetV2

In [None]:
model_MobileNetV2 = torch.load("/content/drive/Shared drives/Computer Vision/Models (Before fine-tuning)/MobileNetV2.pth")
for name, param in model_MobileNetV2.named_parameters():
    if "classifier" in name:
        param.requires_grad = True
    else:
        param.requires_grad = False

model_MobileNetV2.classifier[1] = nn.Linear(1280, 8, bias=True)
print(model_MobileNetV2)

MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=

In [None]:
model_MobileNetV2 = model_MobileNetV2.to(device)

In [None]:
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_MobileNetV2.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)
tune_model(10, model_MobileNetV2, trainloader_, testloader_, loss, optimizer, device, scheduler)
torch.save(model_MobileNetV2, "/content/drive/Shared drives/Computer Vision/Trained models/MobileNetV2.pth")

Epoch : 1 || Train loss : 1.853 || Train accuracy : 0.337 || Test loss : 1.588 || Test accuracy : 0.417
Epoch : 2 || Train loss : 1.545 || Train accuracy : 0.510 || Test loss : 1.341 || Test accuracy : 0.607
Epoch : 3 || Train loss : 1.337 || Train accuracy : 0.611 || Test loss : 1.141 || Test accuracy : 0.689
Epoch : 4 || Train loss : 1.215 || Train accuracy : 0.636 || Test loss : 1.045 || Test accuracy : 0.691
Epoch : 5 || Train loss : 1.116 || Train accuracy : 0.673 || Test loss : 0.964 || Test accuracy : 0.711
Epoch : 6 || Train loss : 1.080 || Train accuracy : 0.661 || Test loss : 0.860 || Test accuracy : 0.746
Epoch : 7 || Train loss : 1.014 || Train accuracy : 0.668 || Test loss : 0.860 || Test accuracy : 0.753
Epoch : 8 || Train loss : 0.987 || Train accuracy : 0.689 || Test loss : 0.801 || Test accuracy : 0.728
Epoch : 9 || Train loss : 0.942 || Train accuracy : 0.685 || Test loss : 0.751 || Test accuracy : 0.778
Epoch : 10 || Train loss : 0.934 || Train accuracy : 0.692 || Te