In [1]:
%%capture
!pip install kaggle

In [2]:
%%capture
!kaggle

In [3]:
%%capture
!cp -f ./kaggle.json ../root/.kaggle/kaggle.json 
!chmod 600 /root/.kaggle/kaggle.json

In [4]:
%%capture
!kaggle datasets download -d grassknoted/asl-alphabet
!unzip asl-alphabet.zip

In [5]:
import torch
import torchvision
import torchvision.datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
import PIL
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True



import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim

In [6]:

test_image_transforms = transforms.Compose([
    transforms.Resize(32),
    transforms.ToTensor()
])

In [7]:
data = torchvision.datasets.ImageFolder('/content/asl_alphabet_train/asl_alphabet_train/', transform = test_image_transforms)


In [8]:
n = len(data)  # total number of examples
n_test = int(0.1 * n)  # take ~10% for test
test_set = torch.utils.data.Subset(data, range(n_test))  # take first 10%
train_set = torch.utils.data.Subset(data, range(n_test, n))  # take the rest   


In [9]:
train_data_loader = DataLoader(train_set, batch_size = 128, shuffle = True)
test_data_loader = DataLoader(test_set, batch_size = 128)

In [10]:
# (32, 3, 224, 224)
# (Batch, Channels, Height, Width)
class ConvBlock(nn.Module):
  def __init__(self, in_channels, out_channels, kernel_size):
    super().__init__()
    self.model = nn.Sequential(
        *[
          nn.Conv2d(in_channels, out_channels, kernel_size),
          nn.ReLU(),
          nn.BatchNorm2d(out_channels)
        ]
    ) 
  def forward(self, x):
    return self.model(x)
NUM_LETTERS = 29
class SignLanguageModel(nn.Module):
  def __init__(self):
    super().__init__()
    self.model = nn.Sequential(*[
                                 ConvBlock(3, 16, 3),
                                 ConvBlock(16, 32, 3),
                                 ConvBlock(32, 64, 1),
                                 nn.AdaptiveAvgPool2d(1) # (B, c, 1, 1)
    ])
    self.Linear = nn.Linear(64, NUM_LETTERS)
  def forward(self, x):
    pred = self.model(x) # (B, C, 1, 1) 
    B = pred.shape[0]
    C = pred.shape[1] 
    pred = pred.view(B, C) # (B, C) 
    return self.Linear(pred) # (B, num_classes0) -inf -> inf(0 -> 1)

In [18]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class Trainer:
  '''
  Trainer class to Train a binary classifier , just copied, not yet edited
  '''
  def __init__(self, model):
    self.device = device
    self.model = model.to(self.device)


    self.optimizer = optim.Adam(self.model.parameters(), lr = 5e-4) # 1e-4 -> 1e-3

    self.loss_function = nn.CrossEntropyLoss() # -inf -> inf.
  
  def save(self):
    # saves a model with the trainers
    torch.save(self.model.state_dict(), './model.pth')


  def training_step(self, x, y):
    self.model.train()
    self.optimizer.zero_grad()
    outputs = self.model(x) 

    loss = self.loss_function(outputs, y)
    
    loss.backward()
    self.optimizer.step()
  def evaluation_step(self, x, y):
    self.model.eval()
    with torch.no_grad():
      output = self.model(x)
    loss = self.loss_function(output, y) 
  
    output = F.softmax(output, dim = -1) # -inf -> inf 
    # 0-> 1. (1 / 1 + e ^ -x) 
    output = torch.argmax(output, dim = -1) # (B, ) 
    
    accuracy = output == y
    tp = torch.sum(accuracy)
    all = y.shape[0]
    return loss, tp / all
  def train_model(self, train_dataloader, num_steps):
    count = 0
    for x, y in tqdm(train_dataloader):
      x = x.float().to(self.device)
      y = y.long().to(self.device)
      self.training_step(x, y)
      count += 1
      if count >= num_steps:
        break
    
      


  def evaluate_model(self, eval_dataloader, num_steps):
    
    sum_loss = 0
    sum_accuracy = 0
    count = 0
    for x, y in tqdm(eval_dataloader):
      x = x.float().to(self.device)
      y = y.long().to(self.device)
      loss, acc = self.evaluation_step(x, y) 
      sum_loss = sum_loss + loss
      sum_accuracy = sum_accuracy + acc
      count += 1
      if count >= num_steps:
        break

    sum_loss = sum_loss / count
    sum_accuracy = sum_accuracy / count
    print(sum_loss, sum_accuracy) 
  def train_whole_model(self, num_epochs, train_dataloader, eval_dataloader):
    for epoch in range(num_epochs):
      print("--------TRAINING---------")
      self.train_model(train_dataloader, 100)
      print("--------EVALUATION-------")
      self.evaluate_model(eval_dataloader, 10)

In [12]:
model = SignLanguageModel()
trainer = Trainer(model)

In [13]:
from tqdm.notebook import tqdm

In [16]:
trainer.train_whole_model(10, train_data_loader, train_data_loader)

--------TRAINING---------


  0%|          | 0/612 [00:00<?, ?it/s]

--------EVALUATION-------


  0%|          | 0/612 [00:00<?, ?it/s]

tensor(1.3394, device='cuda:0') tensor(0.6414, device='cuda:0')
--------TRAINING---------


  0%|          | 0/612 [00:00<?, ?it/s]

--------EVALUATION-------


  0%|          | 0/612 [00:00<?, ?it/s]

tensor(1.5293, device='cuda:0') tensor(0.5211, device='cuda:0')
--------TRAINING---------


  0%|          | 0/612 [00:00<?, ?it/s]

--------EVALUATION-------


  0%|          | 0/612 [00:00<?, ?it/s]

tensor(1.1943, device='cuda:0') tensor(0.6797, device='cuda:0')
--------TRAINING---------


  0%|          | 0/612 [00:00<?, ?it/s]

--------EVALUATION-------


  0%|          | 0/612 [00:00<?, ?it/s]

tensor(1.1248, device='cuda:0') tensor(0.6641, device='cuda:0')
--------TRAINING---------


  0%|          | 0/612 [00:00<?, ?it/s]

--------EVALUATION-------


  0%|          | 0/612 [00:00<?, ?it/s]

tensor(1.1488, device='cuda:0') tensor(0.6562, device='cuda:0')
--------TRAINING---------


  0%|          | 0/612 [00:00<?, ?it/s]

--------EVALUATION-------


  0%|          | 0/612 [00:00<?, ?it/s]

tensor(1.1069, device='cuda:0') tensor(0.6805, device='cuda:0')
--------TRAINING---------


  0%|          | 0/612 [00:00<?, ?it/s]

--------EVALUATION-------


  0%|          | 0/612 [00:00<?, ?it/s]

tensor(1.0660, device='cuda:0') tensor(0.6914, device='cuda:0')
--------TRAINING---------


  0%|          | 0/612 [00:00<?, ?it/s]

--------EVALUATION-------


  0%|          | 0/612 [00:00<?, ?it/s]

tensor(0.9751, device='cuda:0') tensor(0.7125, device='cuda:0')
--------TRAINING---------


  0%|          | 0/612 [00:00<?, ?it/s]

--------EVALUATION-------


  0%|          | 0/612 [00:00<?, ?it/s]

tensor(0.9918, device='cuda:0') tensor(0.7070, device='cuda:0')
--------TRAINING---------


  0%|          | 0/612 [00:00<?, ?it/s]

--------EVALUATION-------


  0%|          | 0/612 [00:00<?, ?it/s]

tensor(0.8126, device='cuda:0') tensor(0.7844, device='cuda:0')


In [17]:
trainer.save()