Features: Sign Language 
Label: english words or alphabet
aim: translate sign into english 
scenario: upload sign video and translate into english transcript

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

train_data = pd.read_csv("sign_mnist_train.csv")
print(train_data.head())
print(train_data.shape)
test_data = pd.read_csv("sign_mnist_test.csv")
unique_classes = set(train_data['label'].unique())
num_classes = len(unique_classes)
print(f'Number of unique classes: {num_classes}')

   label  pixel1  pixel2  pixel3  pixel4  pixel5  pixel6  pixel7  pixel8  \
0      3     107     118     127     134     139     143     146     150   
1      6     155     157     156     156     156     157     156     158   
2      2     187     188     188     187     187     186     187     188   
3      2     211     211     212     212     211     210     211     210   
4     13     164     167     170     172     176     179     180     184   

   pixel9  ...  pixel775  pixel776  pixel777  pixel778  pixel779  pixel780  \
0     153  ...       207       207       207       207       206       206   
1     158  ...        69       149       128        87        94       163   
2     187  ...       202       201       200       199       198       199   
3     210  ...       235       234       233       231       230       226   
4     185  ...        92       105       105       108       133       163   

   pixel781  pixel782  pixel783  pixel784  
0       206       204       20

In [3]:
# Separate labels and features
X_train = train_data.iloc[:, 1:].values
y_train = train_data.iloc[:, 0].values

X_test = test_data.iloc[:, 1:].values
y_test = test_data.iloc[:, 0].values

In [4]:
#[batch_size, num_channels, height, width]

X_train = X_train.reshape(-1, 1, 28, 28).astype('float32')
X_test = X_test.reshape(-1, 1, 28, 28).astype('float32')

In [5]:
class SignLanguageDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label



train_dataset = SignLanguageDataset(X_train, y_train)
test_dataset = SignLanguageDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [6]:
class SignLanguageCNN(nn.Module):
    def __init__(self):
        super(SignLanguageCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 25)
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = SignLanguageCNN()


In [7]:
# Assuming y_train and y_test are your target labels
# Check maximum and minimum values of labels
print(f"Min label: {min(y_train)}")
print(f"Max label: {max(y_train)}")

# Ensure all labels are within the range [0, num_classes-1]
assert min(y_train) >= 0 and max(y_train) <= num_classes - 1, "Labels are out of expected range"

# Similarly, check for test labels
assert min(y_test) >= 0 and max(y_test) <= num_classes - 1, "Labels are out of expected range"


Min label: 0
Max label: 24


AssertionError: Labels are out of expected range

In [None]:
##for CNN MODEL
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}')

# Testing loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {100 * correct / total}%')


Epoch 1, Loss: 0.5911601179923606
Epoch 2, Loss: 0.0524175427315742
Epoch 3, Loss: 0.009642490525285192
Epoch 4, Loss: 0.04539610152083722
Epoch 5, Loss: 8.81888339331496e-05
Epoch 6, Loss: 3.580096008168649e-05
Epoch 7, Loss: 2.159077800145992e-05
Epoch 8, Loss: 1.1856537746330823e-05
Epoch 9, Loss: 6.551769831745687e-06
Epoch 10, Loss: 4.132718135655574e-06
Accuracy: 89.36140546569995%


Accuracy: 89.36140546569995%

In [21]:
from torchvision import models
import torch.nn as nn
import torch.nn.functional as F

class SignLanguageResNet50(nn.Module):
    def __init__(self):
        super(SignLanguageResNet50, self).__init__()
        self.resnet50 = models.resnet50(pretrained=True)
        # Modify the first convolutional layer to accept single-channel (grayscale) input
        self.resnet50.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        # Replace the last fully connected layer to match the number of classes
        num_ftrs = self.resnet50.fc.in_features
        self.resnet50.fc = nn.Linear(num_ftrs, 25)

    def forward(self, x):
        x = self.resnet50(x)
        return x

model2 = SignLanguageResNet50()

EOFError: Ran out of input