In [1]:
from google.colab import drive
drive.mount('/content/drive/', force_remount=True)

Mounted at /content/drive/


In [3]:
path = "/content/drive/MyDrive/EE541_project/"

In [4]:
import os
import torch
import pandas as pd
import numpy as np
import torch.nn as nn
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils, datasets
from torch.utils.data import random_split
from functools import partial
from dataclasses import dataclass
from collections import OrderedDict
import torchvision
import torch.optim as optim

In [5]:
random_seed = 123 # set random seed for reproductivity
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)

In [6]:
train_root = os.path.join(path, "data/asl/train/")
test_root = os.path.join(path, "data/asl/test/")

In [7]:
device = (torch.device('cuda') if torch.cuda.is_available()
                  else torch.device('cpu'))
print(f"Training on device {device}.")

Training on device cuda.


## Preprocessing nomalization
### find the original train data (under the random seed 42) statistics including means and variances of 3 channels. And resize to 3x224x224 for resnet50

In [8]:
# following code running time is long, so I command it
# dataset = datasets.ImageFolder(root = train_root, transform=transforms.ToTensor())
# train_split = 0.8 # split train and validation as 8:2
# train_size = int(train_split * len(dataset))
# val_size = len(dataset) - train_size
# train_dataset, val_dataset = random_split(dataset, [train_size, val_size],
#                                       generator=torch.manual_seed(random_seed)) 
# imgs = torch.stack([img_t for img_t, _ in train_dataset], dim=3)
# mean = imgs.view(3, -1).mean(dim=1)
# std = imgs.view(3, -1).std(dim=1)
print("means is [0.5190, 0.4991, 0.5139], variances is [0.2283, 0.2557, 0.2639]")

means is [0.5190, 0.4991, 0.5139], variances is [0.2283, 0.2557, 0.2639]


In [9]:
dataset = datasets.ImageFolder(
    root = train_root, 
    transform = transforms.Compose([transforms.ToTensor(),
                                    transforms.Resize(224),
                                    transforms.Normalize((0.5190, 0.4991, 0.5139),
                                                         (0.2283, 0.2557, 0.2639))])
    )
batch_size = 16 
train_split = 0.8 # split train and validation as 8:2

train_size = int(train_split * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size]) # already set random seed in the beginning
print(f"All data size is {len(dataset)}")
print(f"train size is {train_size}")
print(f"validation size is {val_size}")

All data size is 87000
train size is 69600
validation size is 17400


In [10]:
print(len(dataset.classes[:]))
print(dataset.class_to_idx)

29
{'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5, 'G': 6, 'H': 7, 'I': 8, 'J': 9, 'K': 10, 'L': 11, 'M': 12, 'N': 13, 'O': 14, 'P': 15, 'Q': 16, 'R': 17, 'S': 18, 'T': 19, 'U': 20, 'V': 21, 'W': 22, 'X': 23, 'Y': 24, 'Z': 25, 'del': 26, 'nothing': 27, 'space': 28}


In [11]:
batch_size = 128

model = torchvision.models.resnet18(pretrained=True)
input_num_fc = model.fc.in_features
model.fc = nn.Linear(input_num_fc, 29, bias=True)
path_store_net = path+'resnet.pt'
if os.path.exists(path_store_net):
  print("resnet.pt exists")
  model.load_state_dict(torch.load(path_store_net))
model = model.to(device=device)

optimizer = optim.SGD(model.parameters(), lr = 0.001, momentum=0.9)

loss_fn = nn.CrossEntropyLoss()

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


  0%|          | 0.00/44.7M [00:00<?, ?B/s]

In [12]:
import time
n_epochs = 5
loss_train_list = []
acc_train_list = []

loss_val_list = []
acc_val_list = []
acc_val_max = 0

n_stop = 0

for epoch in range(1, n_epochs+1):
  loss_train = 0.0
  correct = 0
  total = 0
  start_time = time.time()
  print(f"Epoch {epoch} Training start:")
  for batch_i, (imgs, labels) in enumerate(train_loader):
    imgs, labels = imgs.to(device), labels.to(device)
    outputs = model(imgs)

    loss = loss_fn(outputs, labels)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    _, predicted = torch.max(outputs, dim=1)
    total += labels.shape[0]
    correct += int((predicted == labels).sum())
    loss_train += loss.item()
    if (batch_i) % 20 == 0:
      print ('Epoch {:.2f}, Step {}, Loss: {:.4f}'
        .format(epoch, batch_i, loss.item()))
      #torch.save(model.state_dict(), path_store_net)
  
  loss_train_list.append(loss_train/len(train_loader))
  acc_train_list.append(100 * correct/total)
  end_time = time.time()
  print('Duration: {} minutes, Epoch {}, Training loss {:.4f}, Training accuracy {:2.3f}%'
    .format((end_time - start_time)/60 ,epoch,
      loss_train/len(train_loader), 100*correct/total))
  
  loss_val = 0.0
  correct = 0
  total = 0
  with torch.no_grad():
    model.eval()
    for imgs, labels in val_loader:
      imgs, labels = imgs.to(device), labels.to(device)
      outputs = model(imgs)
      _, predicted = torch.max(outputs, dim=1)
      total += labels.shape[0]
      correct += int((predicted == labels).sum())
      loss_val += loss.item()

    acc_val = 100*correct/total
    loss_val_list.append(loss_val/len(val_loader))#loss per batch
    acc_val_list.append(acc_val)
    print('Epoch {}, Validation loss {:.4f}, Validation accuracy {:2.3f}%'
      .format(epoch, loss_val/len(val_loader), acc_val))
    
    if acc_val > acc_val_max:
      acc_val_max = acc_val
      torch.save(model.state_dict(), path+'resnet.pt')
      print('Detect Improvement, Save model')
    else:
      n_stop += 1

  if(n_stop == 2):
    break
    
  model.train()



Epoch 1 Training start:
Epoch 1, Step 0, Loss: 3.5166
Epoch 1, Step 20, Loss: 3.0404
Epoch 1, Step 40, Loss: 2.5532
Epoch 1, Step 60, Loss: 2.0278
Epoch 1, Step 80, Loss: 1.5272
Epoch 1, Step 100, Loss: 1.0668
Epoch 1, Step 120, Loss: 0.7144
Epoch 1, Step 140, Loss: 0.6011
Epoch 1, Step 160, Loss: 0.4705
Epoch 1, Step 180, Loss: 0.3434
Epoch 1, Step 200, Loss: 0.3067
Epoch 1, Step 220, Loss: 0.2595
Epoch 1, Step 240, Loss: 0.2292
Epoch 1, Step 260, Loss: 0.1959
Epoch 1, Step 280, Loss: 0.1342
Epoch 1, Step 300, Loss: 0.1255
Epoch 1, Step 320, Loss: 0.1329
Epoch 1, Step 340, Loss: 0.1281
Epoch 1, Step 360, Loss: 0.1109
Epoch 1, Step 380, Loss: 0.0930
Epoch 1, Step 400, Loss: 0.0773
Epoch 1, Step 420, Loss: 0.0606
Epoch 1, Step 440, Loss: 0.0573
Epoch 1, Step 460, Loss: 0.0574
Epoch 1, Step 480, Loss: 0.0577
Epoch 1, Step 500, Loss: 0.0706
Epoch 1, Step 520, Loss: 0.0590
Epoch 1, Step 540, Loss: 0.0499
Duration: 377.1287308533986 minutes, Epoch 1, Training loss 0.5954, Training accuracy 

In [13]:
import matplotlib.pyplot as plt
from PIL import Image
def predict(img_path, transform):
    # load the image and return the predicted breed
    img = Image.open(img_path)
    img_tensor = transform(img).to(device).unsqueeze(0)
    model.eval()
    output = model(img_tensor)
    _, index = torch.max(output, 1)
    index = index.cpu()
    plt.imshow(img)
    plt.show()
    return dataset.classes[index]

In [17]:
transform = transforms.Compose([transforms.ToTensor(),
                                    transforms.Resize(224),
                                    transforms.Normalize((0.5190, 0.4991, 0.5139),
                                                         (0.2283, 0.2557, 0.2639))])
i = 1
for file in os.listdir(test_root):
    path = test_root+file
    print("Image "+str(i)+" start")
    prediction = predict(path,transform)
    true_label = file.split('_')[0]
    if prediction == true_label:
        print(f'That is correct - True Label = {true_label} and Prediction : {prediction}')
    else:
        print(f'That is Wrong - True Label = {true_label} and Prediction : {prediction}. However, {prediction} looks like : ')
        img_path = test_root+prediction+'_test.jpg'
        img = Image.open(img_path)
        plt.imshow(img)
        plt.show()
    print('end')
    i+=1

Output hidden; open in https://colab.research.google.com to view.

In [None]:
sample = torch.randn(1, 3, 224, 224).to(device=device)
model(sample).size()

In [None]:
import matplotlib.pyplot as plt

def show_batch(imgs_batched):
    """Show image for a batch of samples."""

    grid = utils.make_grid(imgs_batched)
    plt.imshow(grid.numpy().transpose((1, 2, 0)))
    plt.title('Batch from dataloader')

i_batch = 0
for img, label in train_loader:
    print(i_batch, img.size(), label.size())

    # observe 4th batch and stop.
    if i_batch == 3:
        plt.figure()
        show_batch(img)
        plt.axis('off')
        plt.ioff()
        plt.show()
        break
    i_batch += 1