In [1]:
#imports
import os
import random
import torch
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
import os
import random
import shutil

In [2]:
#The Kaggle dataset we use is too big, so I just take 100 images of each item

# Define the path to the original directory and the new directory
original_dir = "asl_alphabet"
new_dir = "asl_alphabet_100"

# Create the new directory if it doesn't already exist
if not os.path.exists(new_dir):
    os.makedirs(new_dir)

# Loop through each subdirectory in the original directory
for subdir in os.listdir(original_dir):
    subdir_path = os.path.join(original_dir, subdir)
    if os.path.isdir(subdir_path):
        # Create a new subdirectory in the new directory
        new_subdir_path = os.path.join(new_dir, subdir)
        if not os.path.exists(new_subdir_path):
            os.makedirs(new_subdir_path)
        # Get a list of all the images in the subdirectory
        images = os.listdir(subdir_path)
        # Shuffle the list of images and select the first 50
        random.shuffle(images)
        selected_images = images[:100]
        # Move the selected images to the new subdirectory
        for image in selected_images:
            src = os.path.join(subdir_path, image)
            dst = os.path.join(new_subdir_path, image)
            shutil.copyfile(src, dst)


In [3]:
#this performs the nerual network 
data_dir = "asl_alphabet_100"

# Define the transformations to be applied to the data
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load the data
dataset = torchvision.datasets.ImageFolder(root=data_dir, transform=transform)

# Split the data into training and testing sets
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

# Define the DataLoader for the training and testing sets
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

# Define the neural network architecture
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = torch.nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.pool = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.conv2 = torch.nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.fc1 = torch.nn.Linear(32 * 16 * 16, 256)
        self.fc2 = torch.nn.Linear(256, 32)

    def forward(self, x):
        x = self.conv1(x)
        x = torch.nn.functional.relu(x)
        x = self.pool(x)
        x = self.conv2(x)
        x = torch.nn.functional.relu(x)
        x = self.pool(x)
        x = x.view(-1, 32 * 16 * 16)
        x = self.fc1(x)
        x = torch.nn.functional.relu(x)
        x = self.fc2(x)
        return x

# Define the loss function and optimizer
net = Net()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

# Train the neural network
num_epochs = 10
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = net(inputs)
        # print("print")
        # print(outputs.shape,labels.shape)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if i % 100 == 99:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

print('Finished Training')

[1,   100] loss: 3.453
[1,   200] loss: 3.416
[1,   300] loss: 3.372
[1,   400] loss: 3.321
[1,   500] loss: 3.233
[1,   600] loss: 3.135
[2,   100] loss: 2.924
[2,   200] loss: 2.849
[2,   300] loss: 2.710
[2,   400] loss: 2.581
[2,   500] loss: 2.500
[2,   600] loss: 2.392
[3,   100] loss: 2.167
[3,   200] loss: 2.146
[3,   300] loss: 2.014
[3,   400] loss: 1.955
[3,   500] loss: 1.867
[3,   600] loss: 1.804
[4,   100] loss: 1.660
[4,   200] loss: 1.599
[4,   300] loss: 1.558
[4,   400] loss: 1.546
[4,   500] loss: 1.509
[4,   600] loss: 1.473
[5,   100] loss: 1.344
[5,   200] loss: 1.334
[5,   300] loss: 1.275
[5,   400] loss: 1.260
[5,   500] loss: 1.231
[5,   600] loss: 1.228
[6,   100] loss: 1.163
[6,   200] loss: 1.101
[6,   300] loss: 1.115
[6,   400] loss: 1.068
[6,   500] loss: 1.103
[6,   600] loss: 1.050
[7,   100] loss: 0.988
[7,   200] loss: 0.975
[7,   300] loss: 0.988
[7,   400] loss: 0.933
[7,   500] loss: 0.924
[7,   600] loss: 0.903
[8,   100] loss: 0.890
[8,   200] 

In [None]:
# Test the neural network
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %d %%' % (100 * correct / total))

In [5]:
#save the neural network 
torch.save(net.state_dict(), 'translater.pth')

In [None]:
##YOLO hand detection

In [3]:
import torch
import cv2
import numpy as np

# Load YOLOv5 model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True, force_reload=True)

Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to /Users/jakesilver/.cache/torch/hub/master.zip
YOLOv5 🚀 2023-4-29 Python-3.9.12 torch-2.0.0 CPU

Fusing layers... 


[31m[1mrequirements:[0m /Users/jakesilver/.cache/torch/hub/requirements.txt not found, check failed.


YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


In [4]:
# Load input image
img = cv2.imread('IMG-4794.JPG')

# Detect objects in the input image using YOLOv5
results = model(img)
print(results)
# Filter out all the detected objects except for the hand
hand_results = results.pred[results.pred[:, 5] == 0] # Assumes hand class has index 0

# Extract the bounding box coordinates for the hand
x1, y1, x2, y2 = hand_results[0][:4].int().tolist()

# Extract the region of the input image containing the hand
hand_region = img[y1:y2, x1:x2]

# Save the extracted hand region as a new image
cv2.imwrite('hand_image.jpg', hand_region)

image 1/1: 3088x2320 (no detections)
Speed: 5.6ms pre-process, 275.9ms inference, 1.8ms NMS per image at shape (1, 3, 640, 480)


TypeError: list indices must be integers or slices, not tuple

In [14]:
!pip install -r /Users/jakesilver/.cache/torch/hub/requirements.txt

[0m[31mERROR: Could not open requirements file: [Errno 2] No such file or directory: '/Users/jakesilver/.cache/torch/hub/requirements.txt'[0m[31m
[0m

In [18]:
!pip install -r https://raw.githubusercontent.com/ultralytics/yolov5/master/requirements.txt


[0m