In [45]:
import torch
import numpy as np
from torch.utils.data import Dataset, Subset, DataLoader
from torchvision import transforms, datasets
from torch.utils.data import SubsetRandomSampler
from sklearn.preprocessing import LabelEncoder
#import torch_xla.distributed.parallel_loader as pl
#import torch_xla.core.xla_model as xm

#device = xm.xla_device()

class SignLanguageDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.data = datasets.ImageFolder(root_dir, transform=transform)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]
    
class NpyDataset(Dataset):
    def __init__(self, npy_file_path):
        # Load the dataset
        dataset_inputs = np.load(npy_file_path + '/X.npy', allow_pickle=True)
        dataset_labels = np.load(npy_file_path + '/Y.npy', allow_pickle=True)
        # Assuming your dataset has 'inputs' and 'labels' keys
         # Convert labels to numerical representation using LabelEncoder
        label_encoder = LabelEncoder()
        dataset_labels_int = label_encoder.fit_transform(np.ravel(dataset_labels))
        self.inputs = torch.tensor(dataset_inputs, dtype=torch.float32)
        self.labels = torch.tensor(dataset_labels_int, dtype=torch.long)

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, index):
        input_sample = self.inputs[index]
        label = self.labels[index]
        
        #input_sample = torch.tensor(input_sample).permute(2, 0, 1)
        return input_sample, label



In [46]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

dataset = NpyDataset('/kaggle/input/27-class-sign-language-dataset')

dataset.inputs = torch.tensor(dataset.inputs).permute(0, 3, 1, 2)

#dataset = SignLanguageDataset(root_dir='/kaggle/input/aslamerican-sign-language-aplhabet-dataset', transform=transform)

#print(len(dataset))
#reduce_ratio = 0.1
#smallset_indices = torch.randperm(len(dataset))[:int(len(dataset)* reduce_ratio)]
#dataset = Subset(dataset, smallset_indices)

print(len(dataset))

train_size = int(0.8 * len(dataset))
val_size = int(0.1 * len(dataset))
test_size = len(dataset) - train_size - val_size

print("Length:", train_size, val_size, test_size)

train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(
    dataset, [train_size, val_size, test_size]
)

#device = xm.xla_device()
#train_loader = pl.MpDeviceLoader(DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4), device)
#val_loader = pl.MpDeviceLoader(DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4), device)
#test_loader = pl.MpDeviceLoader(DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4), device)


train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, pin_memory=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, pin_memory=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, pin_memory=True, num_workers=4)


  dataset.inputs = torch.tensor(dataset.inputs).permute(0, 3, 1, 2)


22801
Length: 18240 2280 2281


In [47]:
import torch.nn as nn
from torchvision import models

class SignLanguageResNet(nn.Module):
    def __init__(self, num_classes):
        super(SignLanguageResNet, self).__init__()
        resnet18 = models.resnet18(weights=True)
        in_features = resnet18.fc.in_features
        resnet18.fc = nn.Linear(in_features, num_classes)
        self.resnet18 = resnet18

    def forward(self, x):
        return self.resnet18(x)


In [48]:
#model = SignLanguageResNet(num_classes=29)

model = SignLanguageResNet(num_classes=27).cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

criterion = nn.CrossEntropyLoss()
#optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
#optimizer = xm.optimizer.Optimizer(optimizer)




In [49]:
num_epochs = 10
count = 0

for epoch in range(num_epochs):
    # Training
    model.train()
    #para_loader = pl.ParallelLoader(train_loader, [device])
    
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs.cuda())
        loss = criterion(outputs, labels.cuda())
        loss.backward()
        optimizer.step()
        #xm.optimizer_step(optimizer, barrier=True)
    
    # Validation
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs.cuda())
            loss = criterion(outputs, labels.cuda())
            val_loss += loss.item()

            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels.cuda()).sum().item()

    avg_val_loss = val_loss / len(val_loader)
    val_accuracy = 100.0 * correct / total

    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}, Validation Loss: {avg_val_loss}, Validation Accuracy: {val_accuracy}%')

torch.save(model.state_dict(), 'sign_language_resnet_model.pth')

Epoch 1/10, Loss: 0.44647058844566345, Validation Loss: 0.3206897309670846, Validation Accuracy: 90.26315789473684%
Epoch 2/10, Loss: 0.1408340036869049, Validation Loss: 0.15844487440254953, Validation Accuracy: 95.6140350877193%
Epoch 3/10, Loss: 0.03273290768265724, Validation Loss: 0.10534250543504539, Validation Accuracy: 97.28070175438596%
Epoch 4/10, Loss: 0.4026080071926117, Validation Loss: 0.5145644024014473, Validation Accuracy: 87.54385964912281%
Epoch 5/10, Loss: 0.05722338706254959, Validation Loss: 0.09538260785241921, Validation Accuracy: 96.97368421052632%
Epoch 6/10, Loss: 0.1284734606742859, Validation Loss: 0.11185922760826846, Validation Accuracy: 97.14912280701755%
Epoch 7/10, Loss: 0.013536369428038597, Validation Loss: 0.07345504533602959, Validation Accuracy: 98.15789473684211%
Epoch 8/10, Loss: 0.12999612092971802, Validation Loss: 0.1194199787827933, Validation Accuracy: 97.5%
Epoch 9/10, Loss: 0.02975156344473362, Validation Loss: 0.0952602108526561, Validat

In [50]:
# Testing Loop
model.eval()
test_correct = 0
test_total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs.cuda())
        _, predicted = outputs.max(1)
        test_total += labels.size(0)
        test_correct += predicted.eq(labels.cuda()).sum().item()

test_accuracy = 100.0 * test_correct / test_total
print(f'Test Accuracy: {test_accuracy}%')

Test Accuracy: 97.85181937746603%


In [55]:
import torch
from PIL import Image
import torchvision.transforms as transforms

# Assuming 'model' is your trained SignLanguageResNet model
# Assuming the model is already on the GPU (model.cuda() was called)

# Load the image
image_path = '/kaggle/input/imagem/test2.jpg'
image = Image.open(image_path)

# Apply transformation

input_tensor = transform(image)
input_batch = input_tensor.unsqueeze(0)  # Add batch dimension

# Move input to GPU
input_batch = input_batch.cuda()

# Perform inference
#model.eval()
with torch.no_grad():
    output = model(input_batch)

# Interpret the output (assuming it's a classification task)
predicted_class = torch.argmax(output).item()
print("Predicted Class:", predicted_class)


Predicted Class: 10
