In [1]:
#imports
import os
import random
import torch
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
import os
import random
import shutil

In [10]:
#The Kaggle dataset we use is too big, so I just take 100 images of each item

# Define the path to the original directory and the new directory
original_dir = "asl_alphabet"
new_dir = "asl_alphabet_1000"

# Create the new directory if it doesn't already exist
if not os.path.exists(new_dir):
    os.makedirs(new_dir)

# Loop through each subdirectory in the original directory
for subdir in os.listdir(original_dir):
    subdir_path = os.path.join(original_dir, subdir)
    if os.path.isdir(subdir_path):
        # Create a new subdirectory in the new directory
        new_subdir_path = os.path.join(new_dir, subdir)
        if not os.path.exists(new_subdir_path):
            os.makedirs(new_subdir_path)
        # Get a list of all the images in the subdirectory
        images = os.listdir(subdir_path)
        # Shuffle the list of images and select the first 50
        random.shuffle(images)
        selected_images = images[:1000]
        # Move the selected images to the new subdirectory
        for image in selected_images:
            src = os.path.join(subdir_path, image)
            dst = os.path.join(new_subdir_path, image)
            shutil.copyfile(src, dst)


In [11]:
#this performs the nerual network 
data_dir = "asl_alphabet_1000"

# Define the transformations to be applied to the data
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load the data
dataset = torchvision.datasets.ImageFolder(root=data_dir, transform=transform)

# Split the data into training and testing sets
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

# Define the DataLoader for the training and testing sets
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

# Define the neural network architecture
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = torch.nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.pool = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.conv2 = torch.nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.fc1 = torch.nn.Linear(32 * 16 * 16, 256)
        self.fc2 = torch.nn.Linear(256, 32)

    def forward(self, x):
        x = self.conv1(x)
        x = torch.nn.functional.relu(x)
        x = self.pool(x)
        x = self.conv2(x)
        x = torch.nn.functional.relu(x)
        x = self.pool(x)
        x = x.view(-1, 32 * 16 * 16)
        x = self.fc1(x)
        x = torch.nn.functional.relu(x)
        x = self.fc2(x)
        return x

In [12]:
# Define the loss function and optimizer
net = Net()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

# Train the neural network
num_epochs = 10
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        #print(labels)
        optimizer.zero_grad()
        outputs = net(inputs)
        # print("print")
        # print(outputs.shape,labels.shape)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if i % 100 == 99:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

print('Finished Training')

[1,   100] loss: 3.433
[1,   200] loss: 3.362
[1,   300] loss: 3.304
[1,   400] loss: 3.241
[1,   500] loss: 3.131
[1,   600] loss: 3.078
[1,   700] loss: 2.998
[1,   800] loss: 2.859
[1,   900] loss: 2.769
[2,   100] loss: 2.675
[2,   200] loss: 2.520
[2,   300] loss: 2.395
[2,   400] loss: 2.336
[2,   500] loss: 2.218
[2,   600] loss: 2.142
[2,   700] loss: 2.072
[2,   800] loss: 1.986
[2,   900] loss: 1.934
[3,   100] loss: 1.811
[3,   200] loss: 1.736
[3,   300] loss: 1.681
[3,   400] loss: 1.607
[3,   500] loss: 1.551
[3,   600] loss: 1.499
[3,   700] loss: 1.456
[3,   800] loss: 1.470
[3,   900] loss: 1.440
[4,   100] loss: 1.367
[4,   200] loss: 1.289
[4,   300] loss: 1.264
[4,   400] loss: 1.280
[4,   500] loss: 1.186
[4,   600] loss: 1.185
[4,   700] loss: 1.179
[4,   800] loss: 1.106
[4,   900] loss: 1.127
[5,   100] loss: 1.084
[5,   200] loss: 1.061
[5,   300] loss: 1.034
[5,   400] loss: 1.031
[5,   500] loss: 0.955
[5,   600] loss: 0.968
[5,   700] loss: 0.957
[5,   800] 

In [13]:
# Test the neural network
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %d %%' % (100 * correct / total))

Accuracy of the network on the test images: 83 %


In [14]:
#save the neural network 
torch.save(net.state_dict(), 'translater.pth')

In [5]:
import torch
import torchvision.transforms as transforms
from PIL import Image

items = ['A', 'B', 'C', 'D', "del", 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', "nothing", 'O', 'P', 'Q', 'R', 'S', "space", 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
# Load the PyTorch model
model = Net()
model.load_state_dict(torch.load('translater.pth'))
model.eval()


# Load and preprocess the image
image_path = 'export1.JPG'
image = Image.open(image_path)
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])
image = transform(image)

# Get the prediction
model.eval()
with torch.no_grad():
    output = model(image)
predicted_class = torch.argmax(output).item()

print(f"The predicted class is {items[predicted_class]}")

The predicted class is L
