In [2]:
import os
from PIL import Image
import torch
import torch.nn as nn 
import torch.optim as optim 
import torch.nn.functional as F
from torch.utils.data import DataLoader 
from torch.utils.data import Dataset
import torchvision.transforms as transforms
from torchvision import models

# Create Dataset Class

In [5]:
class SignLanguageDigits(Dataset):
    
    def __init__(self, root_dir, shape, train=True, transform=None):
        self.root_dir = root_dir
        self.transform = transform or transforms.Compose([
            transforms.Resize(shape),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
        self.image_paths = []
        self.labels = []
        
        # Split data into train/test (80/20)
        for label in range(10):
            digit_dir = os.path.join(root_dir, str(label))
            images = [img for img in os.listdir(digit_dir) if img.endswith('.JPG')]
            split_idx = int(0.8 * len(images))   
            
            if train:
                images = images[:split_idx]
            else:
                images = images[split_idx:]
                
            for img_name in images:
                self.image_paths.append(os.path.join(digit_dir, img_name))
                self.labels.append(label)
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')  # Ensure 3 channels
        label = self.labels[idx]
        
        if self.transform:
            image = self.transform(image)
            
        return image, label
    


batch_size = 64


# Replace the MNIST dataset loading with:
train_dataset = SignLanguageDigits(
    root_dir='Sign-Language-Digits-Dataset/Dataset',
    shape=(64, 64),  # Resize to 64x64
    train=True
)

test_dataset = SignLanguageDigits(
    root_dir='Sign-Language-Digits-Dataset/Dataset',
    shape=(64, 64),
    train=False
)

train_loader = DataLoader(
    dataset=train_dataset, 
    batch_size=batch_size, 
    shuffle=True
)

test_loader = DataLoader(
    dataset=test_dataset,
    batch_size=batch_size,
    shuffle=False
)

# Create and Train NN

In [13]:
class NN(nn.Module):

    def __init__(self, input_size, num_classes): # constructor of NN with its attributes

        super(NN, self).__init__() # calling constructor of base class  

        self.fc1 = nn.Linear(input_size, 512)
        self.fc2 = nn.Linear(512, 128)
        self.fc3 = nn.Linear(128, num_classes)

        # callable objects

    def forward(self, x):  # we must provid imp of forward () of nn.Module in our subclass

        x = F.relu(self.fc1(x)) # //can do F.softmax(self.fc1(x)) 
        x = F.relu(self.fc2(x))
        x = self.fc3(x)  #         x = F.softmax(self.fc3(x), dim=1)
        return x


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

input_size = 12288 # 3x64x64 = 12,288 size of sign images (RGB)

num_classes = 10

learning_rate = 0.001

num_epochs = 10


# create NN object and move it to device

model = NN(input_size=input_size, num_classes=num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):

    print(f"Epoch: {epoch}")

    for batch_idx, (data, targets) in enumerate(train_loader):
        # Get data to cuda if possible
        data = data.to(device=device)
        targets = targets.to(device=device)

        data = data.reshape(data.shape[0], -1) #[64,3x64x64]=[64, 12288]

        # forward propagation
        scores = model(data) #automatically call the forward method,

        loss = criterion(scores, targets) # compute cost/loss on 64 example

        # zero previous gradients
        optimizer.zero_grad()
       
        # back-propagation
        loss.backward()

        # gradient descent or adam step
        optimizer.step()


Epoch: 0
Epoch: 1
Epoch: 2
Epoch: 3
Epoch: 4
Epoch: 5
Epoch: 6
Epoch: 7
Epoch: 8
Epoch: 9


# Create and Train CNN

In [14]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)   # 3 input image channels (RGB), 6 output channels, 5x5 square convolution kernel
        self.conv2 = nn.Conv2d(6, 16, 5)  # 6 input channels (RGB), 16 output channels, 5x5 square convolution kernel
        # Calculate the correct size for the first linear layer
        # For 64x64 input:
        # After conv1 (5x5 kernel): 64-5+1 = 60x60
        # After pool1 (2x2): 30x30
        # After conv2 (5x5 kernel): 30-5+1 = 26x26
        # After pool2 (2x2): 13x13
        # So final size is 16*13*13 = 2704
        self.fc1 = nn.Linear(16 * 13 * 13, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
    


# Training setup for the CNN
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Create CNN model
cnn_model = Net().to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(cnn_model.parameters(), lr=0.001)

num_epochs = 10

for epoch in range(num_epochs):
    print(f"Epoch: {epoch}")
    
    for batch_idx, (data, targets) in enumerate(train_loader):
        # Get data to device
        data = data.to(device=device)
        targets = targets.to(device=device)
        
        # Forward pass - no need to reshape for CNN!
        scores = cnn_model(data)
        loss = criterion(scores, targets)
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


Epoch: 0
Epoch: 1
Epoch: 2
Epoch: 3
Epoch: 4
Epoch: 5
Epoch: 6
Epoch: 7
Epoch: 8
Epoch: 9


# Test NN Accuracy

In [15]:
# Test accuracy function for NN
def check_NN_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval() # 1. our model deactivates all the layers (eg.batch normalization/dropout)
    with torch.no_grad(): #2.  not make computational graph
        for x, y in loader:
            #print (x.shape)
            x = x.to(device=device)
            y = y.to(device=device)
           
            x = x.reshape(x.shape[0], -1)
            
            scores = model(x)
                      
            _, predictions = scores.max(1) #. it return max value and its index, 1 mean see column-wise 
            
            num_correct += (predictions == y).sum() # compare prediction with y, if equal sum them to count the number of same values
            num_samples += predictions.size(0)  #64, get no of samples
            
        print(f"Got {num_correct} / {num_samples} with accuracy" f" {float(num_correct) / float(num_samples) * 100:.2f}")

        
print ("NN Test accuracy: ")
check_NN_accuracy(test_loader, model)

NN Test accuracy: 
Got 324 / 418 with accuracy 77.51


# Test CNN Accuracy

In [16]:
# Test accuracy function for CNN
def check_CNN_accuracy(loader, cnn_model):
    num_correct = 0
    num_samples = 0
    cnn_model.eval()
    
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)
            
            # No reshaping needed for CNN
            scores = cnn_model(x)
            _, predictions = scores.max(1)
            
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)
            
    print(f"Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}%")

# Test the CNN
print("CNN Test accuracy:")
check_CNN_accuracy(test_loader, cnn_model)

CNN Test accuracy:
Got 366 / 418 with accuracy 87.56%


In [6]:
# Define ResNet-specific transforms
resnet_transform = transforms.Compose([
    transforms.Resize(256),          # First resize to 256x256
    transforms.CenterCrop(224),      # Then crop to 224x224
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],  # ImageNet stats
                         std=[0.229, 0.224, 0.225])
])

# Create datasets with ResNet transforms
resnet_train_dataset = SignLanguageDigits(
    root_dir='Sign-Language-Digits-Dataset/Dataset',
    shape=(64, 64),
    train=True,
    transform=resnet_transform 
)

resnet_test_dataset = SignLanguageDigits(
    root_dir='Sign-Language-Digits-Dataset/Dataset',
    shape=(64, 64),
    train=False,
    transform=resnet_transform
)

# Create dataloaders
resnet_train_loader = DataLoader(resnet_train_dataset, batch_size=64, shuffle=True)
resnet_test_loader = DataLoader(resnet_test_dataset, batch_size=64, shuffle=False)

# ResNet with all freezed layers except last

In [18]:
# Load pre-trained ResNet and modify it
resnet1_model = models.resnet18(pretrained=True)

# Freeze all layers
for param in resnet1_model.parameters():
    param.requires_grad = False

# Modify final layer for 10 classes
num_features = resnet1_model.fc.in_features
resnet1_model.fc = nn.Linear(num_features, 10)  # 10 output classes

# Training setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
resnet1_model = resnet1_model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(resnet1_model.fc.parameters(), lr=0.001)  # Only train final layer

# Training loop
for epoch in range(10):
    resnet1_model.train()
    running_loss = 0.0
    for images, labels in resnet_train_loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = resnet1_model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    print(f'Epoch {epoch+1}')

# Evaluation
def check_accuracy(loader, model):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f"Got {correct} / {total} with accuracy {float(correct)/float(total)*100:.2f}%")

print("ResNet(all freezed layers) Test Accuracy:")
check_accuracy(resnet_test_loader, resnet1_model)



Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9
Epoch 10
ResNet(all freezed layers) Test Accuracy:
Got 371 / 418 with accuracy 88.76%


# ResNet with half freezed layers

In [19]:

resnet2_model = models.resnet18(pretrained=True)
# Get a list Of all the layers in the model
layers = list(resnet2_model .children())
# Determine the halfway point
halfway = len(layers) // 2
# Freeze the first half of the layers
for layer in layers[:halfway]:
    for param in layer.parameters():
        param.requires_grad = False

# Leave the second half of the layers unfrozen for fine-tuning
for layer in layers[halfway:]:
    for param in layer.parameters():
        param.requires_grad = True

# replace the last fully connected layer (fc) with a new one for 10-class classification
num_features = resnet2_model.fc.in_features
resnet2_model.fc = nn.Linear(num_features, 10) # 10 output classes


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
resnet2_model = resnet2_model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnet2_model.parameters(), lr=1e-4) 

# Training loop
for epoch in range(10):
    resnet2_model.train()
    running_loss = 0.0
    for images, labels in resnet_train_loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = resnet2_model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    print(f'Epoch {epoch+1}')

# Evaluation
def check_accuracy(loader, model):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f"Got {correct} / {total} with accuracy {float(correct)/float(total)*100:.2f}%")

print("ResNet 2 Test Accuracy:")
check_accuracy(resnet_test_loader, resnet2_model)

Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9
Epoch 10
ResNet 2 Test Accuracy:
Got 413 / 418 with accuracy 98.80%


# ResNet with only last Freezed layer

In [12]:
# Load pre-trained ResNet-18
resnet3_model = models.resnet18(pretrained=True)

# Unfreeze ALL layers for full fine-tuning
for param in resnet3_model.parameters():
    param.requires_grad = True  # All weights will be updated

# Replace final layer for 10-class classification
num_features = resnet3_model.fc.in_features
resnet3_model.fc = nn.Linear(num_features, 10)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
resnet3_model = resnet3_model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnet3_model.parameters(), lr=1e-4)

# Training loop
for epoch in range(10):
    resnet3_model.train()
    running_loss = 0.0
    for images, labels in resnet_train_loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = resnet3_model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    print(f'Epoch {epoch+1}')

# Evaluation
def check_accuracy(loader, model):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f"Got {correct} / {total} with accuracy {float(correct)/float(total)*100:.2f}%")

print("ResNet 3 Test Accuracy:")
check_accuracy(resnet_test_loader, resnet3_model)




Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9
Epoch 10
ResNet 3 Test Accuracy:
Got 413 / 418 with accuracy 98.80%
