In [1]:
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm # Displays a progress bar
from math import sqrt
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchsummary import summary
from torchvision import datasets, transforms
from torch.utils.data import Dataset, Subset, DataLoader, random_split
import pandas as pd
from PIL import Image
import os
# import data as dataset


In [2]:
if torch.cuda.is_available():
    print("Using the GPU. You are good to go!")
    device = 'cuda'
else:
    print("Using the CPU. Overall speed may be slowed down")
    device = 'cpu'

Using the CPU. Overall speed may be slowed down


In [197]:
class HandSignDataset(Dataset):
    def __init__(self, csv_file, root_dir, partition, transform=None):
        self.df = pd.read_csv(csv_file, delimiter=';')
        self.df = self.df[self.df['partition'] == partition]
        self.root_dir = root_dir
        self.transform = transform
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        filename = self.df.iloc[idx, self.df.columns.get_loc('filename')]
        num_frames = self.df.iloc[idx, self.df.columns.get_loc('number_of_frames')]
        label = self.df.iloc[idx, self.df.columns.get_loc('Label')]
        start_l = self.df.iloc[idx, self.df.columns.get_loc('start_l')]
        end_l = self.df.iloc[idx, self.df.columns.get_loc('end_l')]
        
        images = []
        for j in range(start_l, end_l):
            image_path = os.path.join(self.root_dir, filename, f"{j+1:04d}.jpg")
            bbox_path = os.path.join("BBox", filename, f"{j+1:04d}.txt")
            try:
                with open(bbox_path) as f:
                    bbox_info = f.readline().split(',')
                    print(bbox_info)
                x0, y0, x1, y1, _ = bbox_info
                x0, y0, x1, y1 = int(x0), int(y0), int(x1), int(y1)

                image = Image.open(image_path).convert('RGB')
                print("HERE")
                image = image.crop((x0, y0, x1, y1))
            except FileNotFoundError:
                image = Image.open(image_path).convert('RGB')

            if self.transform:
                image = self.transform(image)
            print(image.shape)
            images.append(image)
            
 
        return images, label


In [198]:
# Define the transformation(s) to be applied to the images
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
#     transforms.Normalize(mean=[0.485, 0.456, 0.406],
#                          std=[0.229, 0.224, 0.225])
])

# Call the create_dataset function to create a PyTorch dataset
test_dataset = HandSignDataset(csv_file='output.csv', root_dir='ChicagoFSWild-Frames', partition='test',transform=transform)
train_dataset = HandSignDataset(csv_file='output.csv', root_dir='ChicagoFSWild-Frames', partition='train',transform=transform)
val_dataset = HandSignDataset(csv_file='output.csv', root_dir='ChicagoFSWild-Frames', partition='dev',transform=transform)


In [199]:
# print(train_dataset[0])

In [200]:
batch_size=32
# Create a data loader for the dataset

# trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, collate_fn=custom_collate)
testloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True)



In [201]:
for batch in testloader:
    print(batch.shape)

torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
['314', '88', '394', '188', '1\n']
HERE
torch.Size([3, 224, 224])
['319', '90', '399', '190', '1\n']
HERE
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
['214', '101', '279', '161', '1\n']
HERE
torch.Size([3, 224, 2

RuntimeError: each element in list of batch should be of equal size

In [84]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        ##############################################################################
        # TODO: Design your own network, define layers here.                          #
        # Here We provide a sample of two-layer fc network from HW4 Part3.           #
        # Your solution, however, should contain convolutional layers.               #
        # Refer to PyTorch documentations of torch.nn to pick your layers.           #
        # (https://pytorch.org/docs/stable/nn.html)                                  #
        # Some common choices: Linear, Conv2d, ReLU, MaxPool2d, AvgPool2d, Dropout   #
        # If you have many layers, use nn.Sequential() to simplify your code         #
        ##############################################################################
        # from 28x28 input image to hidden layer of size 256
#         self.fc1 = nn.Linear(28*28, 8) 
        self.conv1 =nn.Conv2d(in_channels = 1,out_channels = 16,padding = 2, kernel_size = (5,5),stride = (2,2))
        self.pool =nn.MaxPool2d(kernel_size = 2, stride = 2)
        self.conv2 =nn.Conv2d(in_channels = 16,out_channels = 64, padding = 2,kernel_size = (5,5),stride = (2,2))
        self.conv3 =nn.Conv2d(in_channels = 64,out_channels = 8, padding = 2,kernel_size = (5,5),stride = (2,2))
        self.fc1 = nn.Linear(in_features = 8, out_features = 10) 

        self.init_weights()
        ##############################################################################
        #                             END OF YOUR CODE                               #
        ##############################################################################
    def init_weights(self):
        """Initialize all model parameters (weights and biases) in all layers to desired distributions"""

        torch.manual_seed(42)
        for conv in [self.conv1, self.conv2, self.conv3]:
            C_in = conv.weight.size(1)
            nn.init.normal_(conv.weight, 0.0, 1 / sqrt(5 * 5 * C_in))
            nn.init.constant_(conv.bias, 0.0)

        ## TODO: initialize the parameters for [self.fc1]
        nn.init.normal_(self.fc1.weight, 0.0, sqrt(1/self.fc1.weight.size(1)))
        nn.init.constant_(self.fc1.bias, 0.0)
        ##
        
    def forward(self, x):
        ##############################################################################
        # TODO: Design your own network, implement forward pass here                 # 
        ##############################################################################
        
        x = x.to(device)
        # Flatten each image in the batch
        z= self.pool(F.relu(self.conv1(x)))
        z= self.pool(F.relu(self.conv2(z)))
        z = F.relu(self.conv3(z))

        # z=z.permute(*torch.arange(z.ndim - 1, -1, -1))

        z=torch.flatten(z, start_dim=1)

        # print("after resize: ",z.shape)
        z=self.fc1(z)
  
  
        ##
        

        # The loss layer will be applied outside Network class
        return z
        ##############################################################################
        #                             END OF YOUR CODE                               #
        ##############################################################################

model = Network().to(device)
criterion = nn.CrossEntropyLoss() # Specify the loss layer
print('Your network:')
print(summary(model, (1,28,28), device=device)) # visualize your model

##############################################################################
# TODO: Modify the lines below to experiment with different optimizers,      #
# parameters (such as learning rate) and number of epochs.                   #
##############################################################################
# Set up optimization hyperparameters
learning_rate = 1e-3
weight_decay = 1e-5
num_epoch = 20  # TODO: Choose an appropriate number of training epochs
optimizer = optim.Adam(model.parameters(), lr=learning_rate,
                       weight_decay=weight_decay) # Try different optimizers
##############################################################################
#                             END OF YOUR CODE                               #
##############################################################################

Your network:
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 14, 14]             416
         MaxPool2d-2             [-1, 16, 7, 7]               0
            Conv2d-3             [-1, 64, 4, 4]          25,664
         MaxPool2d-4             [-1, 64, 2, 2]               0
            Conv2d-5              [-1, 8, 1, 1]          12,808
            Linear-6                   [-1, 10]              90
Total params: 38,978
Trainable params: 38,978
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.04
Params size (MB): 0.15
Estimated Total Size (MB): 0.19
----------------------------------------------------------------
None


In [85]:
%%time
def train(model, trainloader, valloader, num_epoch=10):  # Train the model
    print("Start training...")
    trn_loss_hist = []
    trn_acc_hist = []
    val_acc_hist = []
    model.train()  # Set the model to training mode
    for i in range(num_epoch):
        running_loss = []
        print('-----------------Epoch = %d-----------------' % (i+1))
        for batch, label in tqdm(trainloader):
            batch = batch.to(device)
            label = label.to(device)
            optimizer.zero_grad()  # Clear gradients from the previous iteration
            # This will call Network.forward() that you implement
            pred = model(batch)
            loss = criterion(pred, label)  # Calculate the loss
            running_loss.append(loss.item())
            loss.backward()  # Backprop gradients to all tensors in the network
            optimizer.step()  # Update trainable weights
        print("\n Epoch {} loss:{}".format(i+1, np.mean(running_loss)))

        # Keep track of training loss, accuracy, and validation loss
        trn_loss_hist.append(np.mean(running_loss))
        trn_acc_hist.append(evaluate(model, trainloader))
        print("\n Evaluate on validation set...")
        val_acc_hist.append(evaluate(model, valloader))
    print("Done!")
    return trn_loss_hist, trn_acc_hist, val_acc_hist


def evaluate(model, loader):  # Evaluate accuracy on validation / test set
    model.eval()  # Set the model to evaluation mode
    correct = 0
    with torch.no_grad():  # Do not calculate grident to speed up computation
        for batch, label in tqdm(loader):
            batch = batch.to(device)
            label = label.to(device)
            pred = model(batch)
            correct += (torch.argmax(pred, dim=1) == label).sum().item()
        acc = correct/len(loader.dataset)
        print("\n Evaluation accuracy: {}".format(acc))
        return acc


trn_loss_hist, trn_acc_hist, val_acc_hist = train(model, trainloader,
                                                  valloader, num_epoch)

##############################################################################
# TODO: Note down the evaluation accuracy on test set                        #
##############################################################################
print("\n Evaluate on test set")
evaluate(model, testloader)

Start training...
-----------------Epoch = 1-----------------


  0%|                                                    | 0/10 [00:00<?, ?it/s]


RuntimeError: stack expects each tensor to be equal size, but got [720, 1280, 3] at entry 0 and [360, 640, 3] at entry 1