In [1]:
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [2]:
import zipfile
import requests
from pathlib import Path

data_path = Path('data/')
image_path = data_path/ 'asl_alphabet'

if image_path.is_dir():
  print('Folder already exists')
else:
  print('making dir')
  image_path.mkdir(parents=True, exist_ok=True)

with open(data_path/'archive.zip', 'wb') as f:
  request = requests.get('https://github.com/Loscano/ASL_alphabet/raw/refs/heads/main/archive.zip')
  f.write(request.content)

with zipfile.ZipFile(data_path/ 'archive.zip', 'r') as zip_ref:
  zip_ref.extractall(image_path)

making dir


In [3]:
# Download helper functions from Learn PyTorch repo (if not already downloaded)
if Path("helper_functions.py").is_file():
  print("helper_functions.py already exists, skipping download")
else:
  print("Downloading helper_functions.py")
  # Note: you need the "raw" GitHub URL for this to work
  request = requests.get("https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/main/helper_functions.py")
  with open("helper_functions.py", "wb") as f:
    f.write(request.content)

# Import accuracy metric
from helper_functions import accuracy_fn

Downloading helper_functions.py


In [4]:
data_path = Path('data/')
image_path = data_path/ 'asl_alphabet'
train_path = image_path / 'asl_alphabet_train' / 'asl_alphabet_train'
test_path = image_path / 'asl_alphabet_test' / 'asl_alphabet_test'
print(f'{train_path}, {test_path}')

data/asl_alphabet/asl_alphabet_train/asl_alphabet_train, data/asl_alphabet/asl_alphabet_test/asl_alphabet_test


In [5]:
import os
for root, dirs, files in os.walk(test_path):
    print(f"Root: {root}")
    print(f"Dirs: {dirs}")
    print(f"Files: {files}")

Root: data/asl_alphabet/asl_alphabet_test/asl_alphabet_test
Dirs: []
Files: ['U_test.jpg', 'V_test.jpg', 'B_test.jpg', 'N_test.jpg', 'Y_test.jpg', 'O_test.jpg', 'M_test.jpg', 'J_test.jpg', 'L_test.jpg', 'Z_test.jpg', 'G_test.jpg', 'K_test.jpg', 'H_test.jpg', 'X_test.jpg', 'nothing_test.jpg', 'T_test.jpg', 'D_test.jpg', 'S_test.jpg', 'E_test.jpg', 'F_test.jpg', 'C_test.jpg', 'A_test.jpg', 'W_test.jpg', 'Q_test.jpg', 'space_test.jpg', 'R_test.jpg', 'P_test.jpg', 'I_test.jpg']


In [6]:
from torchvision import datasets, transforms

transform = transforms.Compose([
    transforms.Resize((255,255)),
    transforms.RandomHorizontalFlip(p=.5),
    transforms.ToTensor()
    ]
)

In [7]:
from random import shuffle

full_data = datasets.ImageFolder(
    root = train_path,
    transform=transform,
    target_transform=None
)

In [8]:
test_len = int(len(full_data)*.2)
train_len = int(len(full_data) - test_len)

train_set, test_set = torch.utils.data.random_split(full_data, [train_len, test_len])

In [9]:
from torch.utils.data import DataLoader
train_dataloader = DataLoader(
    dataset=train_set,
    batch_size=32,
    shuffle=True,
    num_workers=os.cpu_count()
)
test_dataloader = DataLoader(
    dataset=test_set,
    batch_size=32,
    shuffle=False,
    num_workers=os.cpu_count()
)

In [10]:
class model(torch.nn.Module):
  def __init__(self, input_shape, hidden_units, output_shape):
    super().__init__()
    self.block_1 = torch.nn.Sequential(
        torch.nn.Conv2d(in_channels=input_shape, out_channels=hidden_units, kernel_size=3, padding=1,stride=1),
        torch.nn.ReLU(),
        torch.nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, padding=1, stride=1),
        torch.nn.ReLU(),
        torch.nn.MaxPool2d(kernel_size=2, stride=2)
    )
    self.block_2 = torch.nn.Sequential(
        torch.nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, padding=1,stride=1),
        torch.nn.ReLU(),
        torch.nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, padding=1, stride=1),
        torch.nn.ReLU(),
        torch.nn.MaxPool2d(kernel_size=2, stride=2)
    )
    self.classifier = torch.nn.Sequential(
        torch.nn.Flatten(),
        torch.nn.Linear(in_features=hidden_units*63*63,out_features=output_shape)
    )

  def forward(self, x):
    x = self.block_1(x)
    x = self.block_2(x)
    x = self.classifier(x)
    return x

asl_model = model(input_shape=3, hidden_units=10, output_shape=29)
asl_model = asl_model.to(device)

In [12]:
from torchvision import datasets, transforms
transform = transforms.Compose([
    transforms.Resize([255,255]),
    transforms.ToTensor()
])

In [13]:
from PIL import Image
from io import BytesIO
import numpy as np

def read_file_as_image(data):
    if isinstance(data, (bytes, BytesIO)):
      image = Image.open(BytesIO(data))
    # If the input is a file path
    elif isinstance(data, str):
        image = Image.open(data)
    else:
        raise ValueError("Input must be a byte stream or a valid file path.")

    imageAsTensor = transform(image)
    imageAsTensor = imageAsTensor.unsqueeze(dim=0)  # Add batch dimension
    return imageAsTensor



def predict(image_data):

    input_tensor = read_file_as_image(image_data)
    input_tensor = input_tensor.to(device)  # Move to the correct device

    with torch.inference_mode():
        output = modelASL(input_tensor)  # Forward pass through the model

    # Apply softmax to get probabilities
    probabilities = torch.nn.functional.softmax(output, dim=1)

    # Get the predicted class
    predicted_class = torch.argmax(output, dim=1)

    # Move the tensors to the CPU and convert to NumPy arrays
    probabilities = probabilities.cpu().detach().numpy()
    predicted_class = predicted_class.cpu().detach().numpy()

    # Normalize probabilities (rounding for better readability)
    probabilities = np.round(probabilities, 4)  # Round the probabilities here

    # Convert to lists for easier use
    probabilities_list = probabilities.tolist()
    predicted_class_list = predicted_class.tolist()

    return predicted_class_list, probabilities_list

In [14]:
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=asl_model.parameters(), lr=0.001)

In [15]:
def train_step(model: torch.nn.Module,
               data_loader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer):
  model.train()
  train_loss, train_acc = 0,0
  for batch, (X,y) in enumerate(data_loader):
    X, y = X.to(device), y.to(device)
    model = model.to(device)

    y_pred = model(X)

    loss = loss_fn(y_pred, y)
    train_loss += loss
    train_acc += accuracy_fn(y_true=y, y_pred=y_pred.argmax(dim=1))

    optimizer.zero_grad()

    loss.backward()

    optimizer.step()

  train_loss /= len(data_loader)
  train_acc /= len(data_loader)
  print(f'Train loss: {train_loss}, Train acc: {train_acc}')
  return train_loss, train_acc

def test_step(model: torch.nn.Module,
              data_loader: torch.utils.data.DataLoader,
              loss_fn: torch.nn.Module):
  model.eval()

  test_loss, test_acc = 0,0
  with torch.inference_mode():
    for batch, (X,y) in enumerate(data_loader):
      X, y = X.to(device), y.to(device)

      model = model.to(device)

      y_pred = model(X)

      loss = loss_fn(y_pred, y)
      test_loss += loss
      test_acc += accuracy_fn(y_true=y, y_pred=y_pred.argmax(dim=1))

    test_loss /= len(data_loader)
    test_acc /= len(data_loader)
    print(f'Test loss: {test_loss}, Test acc: {test_acc}')
    return test_loss, test_acc


In [16]:
def train(model: torch.nn.Module,
          train_dataloader: torch.utils.data.DataLoader,
          test_dataloader: torch.utils.data.DataLoader,
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module = torch.nn.CrossEntropyLoss(),
          epochs: int = 5):
    # 2. Create empty results dictionary
    results = {"train_loss": [],
        "train_acc": [],
        "test_loss": [],
        "test_acc": []
    }

    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model=model,
                                           data_loader= train_dataloader,
                                           loss_fn=loss_fn,
                                           optimizer=optimizer)
        test_loss, test_acc = test_step(model=model,
                                        data_loader=test_dataloader,
                                        loss_fn=loss_fn)
        results["train_loss"].append(train_loss.item() if isinstance(train_loss, torch.Tensor) else train_loss)
        results["train_acc"].append(train_acc.item() if isinstance(train_acc, torch.Tensor) else train_acc)
        results["test_loss"].append(test_loss.item() if isinstance(test_loss, torch.Tensor) else test_loss)
        results["test_acc"].append(test_acc.item() if isinstance(test_acc, torch.Tensor) else test_acc)


    return results

In [17]:
from timeit import default_timer as timer
from tqdm.auto import tqdm

torch.manual_seed(42)
epoch = 5

start_time = timer()
NUM_EPOCHS = 5

# Train model_0
model_0_results = train(model=asl_model,
                        train_dataloader=train_dataloader,
                        test_dataloader=test_dataloader,
                        optimizer=optimizer,
                        loss_fn=loss_fn,
                        epochs=NUM_EPOCHS)

# End the timer and print out how long it took
end_time = timer()
print(f"Total training time: {end_time-start_time:.3f} seconds")

  0%|          | 0/5 [00:00<?, ?it/s]

Train loss: 1.3592159748077393, Train acc: 58.57902298850575
Test loss: 0.4751603901386261, Test acc: 83.92501531862746
Train loss: 0.30888399481773376, Train acc: 89.84626436781609
Test loss: 0.29589512944221497, Test acc: 89.86098345588235
Train loss: 0.17757301032543182, Train acc: 94.16091954022988
Test loss: 0.3368496596813202, Test acc: 89.36504289215686
Train loss: 0.12820425629615784, Train acc: 95.78448275862068
Test loss: 0.24202346801757812, Test acc: 92.29090073529412
Train loss: 0.09717188030481339, Train acc: 96.8146551724138
Test loss: 0.1601981669664383, Test acc: 94.5101868872549
Total training time: 1230.765 seconds


In [None]:
torch.save(asl_model.state_dict(), "model.pth")
print("Model saved successfully.")

Model saved successfully.
