# Load Dataset

In [8]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Define transforms
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

# Load dataset from the extracted folder
dataset = datasets.ImageFolder("extracted_data", transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Number of classes (signs)
num_classes = len(dataset.classes)
print("Classes:", dataset.classes)

Classes: ['afraid', 'agree', 'assistance', 'bad', 'become', 'college', 'doctor', 'from', 'pain', 'pray', 'secondary', 'skin', 'small', 'specific', 'stand', 'today', 'warn', 'which', 'work', 'you']


#  CNN Model

In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SignCNN(nn.Module):
    def __init__(self, num_classes):
        super(SignCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.fc1 = nn.Linear(64 * 30 * 30, 128)  # Adjust based on input size
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        return self.fc2(x)

model = SignCNN(num_classes)

In [10]:
model

SignCNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=57600, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=20, bias=True)
)

# training model

In [11]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 10

for epoch in range(epochs):
    running_loss = 0.0
    for inputs, labels in dataloader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss:.4f}")

torch.save(model.state_dict(), "sign_cnn.pth")
print("Training complete! Model saved as sign_cnn.pth")

Epoch 1/10, Loss: 553.2867
Epoch 2/10, Loss: 30.5949
Epoch 3/10, Loss: 9.3310
Epoch 4/10, Loss: 20.8603
Epoch 5/10, Loss: 11.6404
Epoch 6/10, Loss: 8.9879
Epoch 7/10, Loss: 10.1752
Epoch 8/10, Loss: 5.2609
Epoch 9/10, Loss: 0.1043
Epoch 10/10, Loss: 0.0167
Training complete! Model saved as sign_cnn.pth


In [None]:
import cv2
from torchvision import transforms
from PIL import Image
import torch

model.eval()
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor()
])

cap = cv2.VideoCapture(0)  # Open webcam

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Preprocess the frame
    img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    img_tensor = transform(img).unsqueeze(0)  # Add batch dimension

    with torch.no_grad():
        output = model(img_tensor)
        pred = torch.argmax(output, 1).item()
        label = dataset.classes[pred]

    # Show prediction on frame
    cv2.putText(frame, f"Prediction: {label}", (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.imshow("Real-time Sign Detection", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()