<a href="https://colab.research.google.com/github/Serag11/Machine-Learning/blob/main/Sign_Language_Image_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install torchinfo

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchinfo
from sklearn.metrics import f1_score
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from tqdm.auto import tqdm
import torchvision.models as models # Import torchvision models



In [None]:
!pip install opendatasets
import pandas as pd
import opendatasets as od
od.download('https://www.kaggle.com/datasets/datamunge/sign-language-mnist')


In [None]:
df_train = pd.read_csv('/content/sign-language-mnist/sign_mnist_train/sign_mnist_train.csv')
df_test  = pd.read_csv('/content/sign-language-mnist/sign_mnist_test/sign_mnist_test.csv')


In [None]:
# Extract pixel data from the training dataframe
pixel_data = df_train.drop('label', axis=1).values

# Reshape the pixel data into 28x28 images
images = pixel_data.reshape(-1, 28, 28)

# Display a few reconstructed images
fig, axes = plt.subplots(1, 5, figsize=(15, 3))
for i, ax in enumerate(axes):
    ax.imshow(images[i], cmap='gray')
    ax.set_title(f"Label: {df_train.iloc[i]['label']}")
    ax.axis('off')
plt.tight_layout()
plt.show()


In [None]:
def evaluate(data_loader: DataLoader):
  model.eval()
  all_labels = []
  all_predictions = []
  correct = 0
  total = 0
  total_loss = 0.0

  with torch.no_grad():
      for images, labels in data_loader:
          images = images.to(device)
          labels = labels.to(device)

          outputs = model(images)
          loss = criterion(outputs, labels)
          total_loss += loss.item()
          _, predicted = torch.max(outputs, 1)

          all_labels.extend(labels.cpu().numpy())
          all_predictions.extend(predicted.cpu().numpy())

          total += labels.size(0)
          correct += (predicted == labels).sum().item()

  f1 = f1_score(all_labels, all_predictions, average='weighted')
  accuracy = 100 * correct / total
  avg_loss = total_loss / len(data_loader)


  return avg_loss, accuracy, f1

In [None]:


# Define a custom Dataset class for Sign Language MNIST
class SignLanguageMNISTDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.labels = dataframe['label'].values
        # Drop the 'label' column to get pixel data
        self.pixel_data = dataframe.drop('label', axis=1).values
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        image = self.pixel_data[idx].reshape(28, 28).astype(np.uint8) # Reshape to 28x28 and ensure uint8 for ToTensor
        label = self.labels[idx]

        # Apply transforms if any
        if self.transform:
            image = self.transform(image)

        return image, label

# Transforms
# We convert to PIL Image first because ToTensor expects PIL Image or NumPy ndarray (H x W x C)
# Our image is H x W, so we pass it as is, and ToTensor will handle it as 1-channel.
transform = transforms.Compose([
    transforms.ToPILImage(), # Convert numpy array to PIL Image
    transforms.ToTensor()    # Convert PIL Image to PyTorch Tensor (adds channel dimension)
])

# Create custom datasets
train_data = SignLanguageMNISTDataset(df_train, transform=transform)
test_data = SignLanguageMNISTDataset(df_test, transform=transform)

# Dataloaders
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader  = DataLoader(test_data,  batch_size=64, shuffle=False)

# 1. Using custom CNN

In [None]:
class CustomCNN(nn.Module):
  def __init__(self):
    super().__init__()
    self.features = nn.Sequential(
        nn.Conv2d(1, 32, kernel_size=3, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),    # 14x14x32
        nn.Conv2d(32, 64, kernel_size=3, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),  #7x7x64
        nn.Conv2d(64, 128, kernel_size=3, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2) #3x3x128
    )


    self.classifier = nn.Sequential(
        nn.Linear(128 * 3 * 3, 512),
        nn.ReLU(),
        nn.Linear(512, 25)
    )

  def forward(self, x):
    x = self.features(x)
    x = torch.flatten(x, 1) # Flatten all dimensions except batch
    x = self.classifier(x)
    return x



In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CustomCNN().to(device)
torchinfo.summary(model, input_size=(1, 1 ,28, 28))


In [None]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)


In [None]:

for epoch in range(3):   # change num epochs
    model.train()
    total_loss = 0

    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{10}"):
        images = images.to(device)
        labels = labels.to(device)

        # Forward
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_train_loss = total_loss/len(train_loader)
    val_loss, val_acc, val_f1 = evaluate(test_loader)
    print(f"Epoch [{epoch+1}/10], Train Loss: {avg_train_loss:.4f}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_acc:.2f}%")



In [None]:
_ , accuracy , f1 = evaluate(test_loader)
print(f"Accuracy: {accuracy:.2f}%, F1 Score: {f1:.4f}")

# 2. Using data augmentation

In [None]:
augmetation_transform = transforms.Compose([
    transforms.ToPILImage(), # Convert numpy array to PIL Image
    transforms.RandomRotation(10), # Rotate by a maximum of 10 degrees
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), shear=10), # Translate and shear
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), # Apply color jitter
    transforms.ToTensor(),    # Convert PIL Image to PyTorch Tensor (adds channel dimension)
])
# Create custom datasets
train_data_augmentation = SignLanguageMNISTDataset(df_train, transform=augmetation_transform)
test_data_augmentation = SignLanguageMNISTDataset(df_test, transform=augmetation_transform)

# Dataloaders
train_loader_augmentation = DataLoader(train_data_augmentation, batch_size=64, shuffle=True)
test_loader_augmentation  = DataLoader(test_data_augmentation,  batch_size=64, shuffle=False)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CustomCNN().to(device)
torchinfo.summary(model, input_size=(1, 1 ,28, 28))


In [None]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)


In [None]:

for epoch in range(10):   # change num epochs
    model.train()
    total_loss = 0

    for images, labels in tqdm(train_loader_augmentation, desc=f"Epoch {epoch+1}/{10}"):
        images = images.to(device)
        labels = labels.to(device)

        # Forward
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_train_loss = total_loss/len(train_loader_augmentation)
    val_loss, val_acc, val_f1 = evaluate(test_loader_augmentation)
    print(f"Epoch [{epoch+1}/10], Train Loss: {avg_train_loss:.4f}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_acc:.2f}%")


In [None]:
_ , accuracy , f1 = evaluate(test_loader_augmentation)
print(f"Accuracy: {accuracy:.2f}%, F1 Score: {f1:.4f}")

# 3. Using transfer learning

In [None]:

alexnet_transform = transforms.Compose([
    transforms.ToPILImage(), # Convert numpy array to PIL Image
    transforms.Resize((224, 224)), # Resize images to 224x224 for AlexNet
    transforms.ToTensor(),    # Convert PIL Image to PyTorch Tensor (adds channel dimension)
    transforms.Lambda(lambda x: x.repeat(3, 1, 1))
])
# Create custom datasets
train_data_resnet = SignLanguageMNISTDataset(df_train, transform=alexnet_transform)
test_data_resnet = SignLanguageMNISTDataset(df_test, transform=alexnet_transform)

# Dataloaders
train_loader_resnet = DataLoader(train_data_resnet, batch_size=64, shuffle=True)
test_loader_resnet  = DataLoader(test_data_resnet,  batch_size=64, shuffle=False)

In [None]:
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 25)

model = model.to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.fc.parameters(), lr=1e-3)

In [None]:
for epoch in range(3):   # change num epochs
    model.train()
    total_loss = 0

    for images, labels in tqdm(train_loader_resnet, desc=f"Epoch {epoch+1}/{10}"):
        images = images.to(device)
        labels = labels.to(device)

        # Forward
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_train_loss = total_loss/len(train_loader_resnet)
    val_loss, val_acc, val_f1 = evaluate(test_loader_resnet)
    print(f"Epoch [{epoch+1}/10], Train Loss: {avg_train_loss:.4f}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_acc:.2f}%")



# 4. Using architecture (ResNet-50)

In [None]:


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = models.resnet50(weights=None)

model.fc = nn.Linear(2048, 25)

model = model.to(device)
# torchinfo.summary(model, input_size=(1, 3 ,224, 224)) # Update input_size to reflect 1 channel and 224x224


In [None]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)


In [None]:


for epoch in range(3):   # change num epochs
    model.train()
    total_loss = 0

    for images, labels in tqdm(train_loader_resnet, desc=f"Epoch {epoch+1}/{10}"):
        images = images.to(device)
        labels = labels.to(device)

        # Forward
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_train_loss = total_loss/len(train_loader_resnet)
    val_loss, val_acc, val_f1 = evaluate(test_loader_resnet)
    print(f"Epoch [{epoch+1}/10], Train Loss: {avg_train_loss:.4f}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_acc:.2f}%")



In [None]:
_ , accuracy , f1 = evaluate(test_loader_augmentation)
print(f"Accuracy: {accuracy:.2f}%, F1 Score: {f1:.4f}")