In [13]:
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm # Displays a progress bar
from math import sqrt
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchsummary import summary
from torchvision import datasets, io, models, ops, transforms, utils
from torch.utils.data import Dataset, Subset, DataLoader, random_split
import pandas as pd
from PIL import Image
# from torchvision import datasets, io, models, ops, transforms, utils
import os
# import data as dataset


In [14]:
if torch.cuda.is_available():
    print("Using the GPU. You are good to go!")
    device = 'cuda'
else:
    print("Using the CPU. Overall speed may be slowed down")
    device = 'cpu'
device = 'cpu'

Using the GPU. You are good to go!


In [15]:
df = pd.read_csv('output.csv', delimiter=';')

In [16]:
# df[df['partition']=='dev']
df[df['filename']=='deafvideo_4/mattref2005_5169']

Unnamed: 0.1,Unnamed: 0,filename,url,start_time,number_of_frames,width,height,label_proc,label_raw,label_notes,partition,signer,Label,start_l,end_l
23,1800,deafvideo_4/mattref2005_5169,http://www.deafvideo.tv/805784,0:03:20.420000,6,640,360,iat,iat,iat?,train,37,a,2,3


In [17]:
def file_exists(filename):
    folders = ['avg_dev', 'avg_test', 'avg_train']
    filename = filename.replace('/','_')+".png"
    # print(filename)
    for folder in folders:
        if os.path.exists(os.path.join(folder, filename)):
            return True
    return False

class HandSignDataset(Dataset):
    def __init__(self, csv_file, root_dir, partition, transform=None):
        self.df = pd.read_csv(csv_file, delimiter=';')
        self.df = self.df[self.df['partition'] == partition]
        self.df = self.df[self.df['filename'].apply(file_exists)]

    # define a function to check if a file exists in any of the folders

        self.root_dir = root_dir
        self.transform = transform
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        filename = self.df.iloc[idx, self.df.columns.get_loc('filename')]
        filename_img = self.df.iloc[idx, self.df.columns.get_loc('filename')].replace('/','_')
        label = self.df.iloc[idx, self.df.columns.get_loc('Label')]
        label = ord(label) - 97
        label = torch.tensor(label).long()
        # if label != 'a' and label != 'b' and label != 'c' and label != 'd':
        #     print(label)
        image_path = os.path.join(self.root_dir, filename_img+".png")
        bbox_path = os.path.join("BBox", filename, "0000.txt")
        
        try:
            with open(bbox_path) as f:
                bbox_info = f.readline().split(',')
                print("bbox_info",bbox_info)
            x0, y0, x1, y1, _ = bbox_info
            x0, y0, x1, y1 = int(x0), int(y0), int(x1), int(y1)

            image = Image.open(image_path).convert('RGB')
            image = image.crop((x0, y0, x1, y1))
        except FileNotFoundError:
            image = Image.open(image_path).convert('RGB')
        if(np.sum(image)==0):
            print('ALL ZERO')

        if self.transform:
            image = self.transform(image)
        # print('image name: ',filename_img," | shape:",image.shape," | label: ",label)
#         utils.save_image(img, f"/ImageOutput/{filename_img}_T.png")
        # img1 = img1.numpy() # TypeError: tensor or list of tensors expected, got <class 'numpy.ndarray'>
#         save_image(img, filename +'_T.png')
        return image, label

In [18]:
# Define the transformation(s) to be applied to the images
transform = transforms.Compose([
    transforms.Resize((360, 640)),
    transforms.ToTensor(),
#     transforms.Normalize(mean=[0.485, 0.456, 0.406],
#                          std=[0.229, 0.224, 0.225])
])

# Call the create_dataset function to create a PyTorch dataset
test_dataset = HandSignDataset(csv_file='output.csv', root_dir='avg_test', partition='test',transform=transform)
train_dataset = HandSignDataset(csv_file='output.csv', root_dir='avg_train', partition='train',transform=transform)
val_dataset = HandSignDataset(csv_file='output.csv', root_dir='avg_dev', partition='dev',transform=transform)


In [19]:
batch_size = 32
# Create a data loader for the dataset

# trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, collate_fn=custom_collate)
testloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

In [20]:
for batch in trainloader:
    img = batch[0]
    label = batch[1]

In [21]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        ##############################################################################
        # TODO: Design your own network, define layers here.                          #
        # Here We provide a sample of two-layer fc network from HW4 Part3.           #
        # Your solution, however, should contain convolutional layers.               #
        # Refer to PyTorch documentations of torch.nn to pick your layers.           #
        # (https://pytorch.org/docs/stable/nn.html)                                  #
        # Some common choices: Linear, Conv2d, ReLU, MaxPool2d, AvgPool2d, Dropout   #
        # If you have many layers, use nn.Sequential() to simplify your code         #
        ##############################################################################
        # from 28x28 input image to hidden layer of size 256
        # self.fc1 = nn.Linear(28*28, 8) 
        self.conv1 = nn.Conv2d(in_channels = 3,out_channels = 16,padding = 2, kernel_size = (5,5),stride = (2,2))
        self.pool = nn.MaxPool2d(kernel_size = 2, stride = 2)
        self.conv2 = nn.Conv2d(in_channels = 16,out_channels = 64, padding = 2,kernel_size = (5,5),stride = (2,2))
        self.conv3 = nn.Conv2d(in_channels = 64,out_channels = 8, padding = 2,kernel_size = (5,5),stride = (2,2))
        self.fc_1 = nn.Linear(in_features = 1760, out_features =800) 
        self.fc_2 = nn.Linear(in_features=800, out_features=200)
        self.fc_3 = nn.Linear(in_features=200, out_features=4)
        
        self.init_weights()
        ##############################################################################
        #                             END OF YOUR CODE                               #
        ##############################################################################
    def init_weights(self):
        """Initialize all model parameters (weights and biases) in all layers to desired distributions"""

        torch.manual_seed(42)

        for conv in [self.conv1, self.conv2, self.conv3]:
            C_in = conv.weight.size(1)
            nn.init.normal_(conv.weight, 0.0, 1 / sqrt(5 * 5 * C_in))
            nn.init.constant_(conv.bias, 0.0)

        ## TODO: initialize the parameters for [self.fc_1]

        nn.init.normal_(self.fc_1.weight, 0.0, sqrt(1/self.fc_1.weight.size(1)))
        nn.init.constant_(self.fc_1.bias, 0.0)
        ##

    def forward(self, x):
        ##############################################################################
        # TODO: Design your own network, implement forward pass here                 # 
        ##############################################################################
        
        N, C, H, W = x.shape

        ## TODO: forward pass
        z= self.pool(F.relu(self.conv1(x)))
        z= self.pool(F.relu(self.conv2(z)))
        z = F.relu(self.conv3(z))

        # z=z.permute(*torch.arange(z.ndim - 1, -1, -1))
        # print("before flatten: ", z.shape)
        z=torch.flatten(z, start_dim=1)

        # print("after resize: ",z.shape) # 32 x 1760
        z = self.fc_1(z)
        z = self.fc_2(z)
        z = self.fc_3(z)
        # print("final shape: ", z.shape)

        return z


model = Network().to(device)
criterion = nn.CrossEntropyLoss() # Specify the loss layer
print('Your network:')
# print(summary(model, (3,28,28), device=device)) # visualize your model

##############################################################################
# TODO: Modify the lines below to experiment with different optimizers,      #
# parameters (such as learning rate) and number of epochs.                   #
##############################################################################
# Set up optimization hyperparameters
learning_rate = 1e-3
weight_decay = 1e-3
num_epoch = 20  # TODO: Choose an appropriate number of training epochs
optimizer = optim.Adam(model.parameters(), lr=learning_rate,
                       weight_decay=weight_decay) # Try different optimizers
##############################################################################
#                             END OF YOUR CODE                               #
##############################################################################

RuntimeError: false INTERNAL ASSERT FAILED at "../c10/cuda/CUDAGraphsC10Utils.h":73, please report a bug to PyTorch. Unknown CUDA graph CaptureStatus32765

In [None]:
import torch
import torchvision.transforms as transforms
import torchvision.models as models
from PIL import Image

# Load the ResNet-18 model pretrained on ImageNet
resnet18 = models.resnet18(pretrained=True)



In [None]:
transform = transforms.Compose([
    transforms.Resize((360, 640)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

# Call the create_dataset function to create a PyTorch dataset
test_dataset = HandSignDataset(csv_file='output.csv', root_dir='avg_test', partition='test',transform=transform)
train_dataset = HandSignDataset(csv_file='output.csv', root_dir='avg_train', partition='train',transform=transform)
val_dataset = HandSignDataset(csv_file='output.csv', root_dir='avg_dev', partition='dev',transform=transform)

batch_size = 32
# Create a data loader for the dataset

# trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, collate_fn=custom_collate)
testloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

learning_rate = 1e-3
weight_decay = 1e-3
num_epoch = 10  # TODO: Choose an appropriate number of training epochs
optimizer = optim.Adam(model.parameters(), lr=learning_rate,
                       weight_decay=weight_decay) # Try different optimizers

In [None]:
%%time
def train(model, trainloader, valloader, num_epoch=1):  # Train the model
    print("Start training...")
    trn_loss_hist = []
    trn_acc_hist = []
    val_acc_hist = []
    model.train()  # Set the model to training mode
    for i in range(num_epoch):
        running_loss = []
        print('-----------------Epoch = %d-----------------' % (i+1))
        for batch, label in trainloader:
            # print('batch: ', len(batch))
            # print("type: ", type(batch))
            batch = batch.to(device)
            # print(batch.shape)
            # print('batch:' , lenbatch)
            # print("label: ", label)
            label = label.to(device)
            # print('3')
            optimizer.zero_grad()  # Clear gradients from the previous iteration
            # This will call Network.forward() that you implement
            pred = model(batch)
            loss = criterion(pred, label)  # Calculate the loss
            running_loss.append(loss.item())
            loss.backward()  # Backprop gradients to all tensors in the network
            optimizer.step()  # Update trainable weights
        print("\n Epoch {} loss:{}".format(i+1, np.mean(running_loss)))

        # Keep track of training loss, accuracy, and validation loss
        trn_loss_hist.append(np.mean(running_loss))
        trn_acc_hist.append(evaluate(model, trainloader))
        print("\n Evaluate on validation set...")
        val_acc_hist.append(evaluate(model, valloader))
    print("Done!")
    return trn_loss_hist, trn_acc_hist, val_acc_hist


def evaluate(model, loader):  # Evaluate accuracy on validation / test set
    model.eval()  # Set the model to evaluation mode
    correct = 0
    with torch.no_grad():  # Do not calculate grident to speed up computation
        for batch, label in loader:
            batch = batch.to(device)
            # print(batch)
            label = label.to(device)
            pred = model(batch)
            correct += (torch.argmax(pred, dim=1) == label).sum().item()
        acc = correct/len(loader.dataset)
        print("\n Evaluation accuracy: {}".format(acc))
        return acc


trn_loss_hist, trn_acc_hist, val_acc_hist = train(model, trainloader,
                                                  valloader, num_epoch)

##############################################################################
# TODO: Note down the evaluation accuracy on test set                        #
##############################################################################
print("\n Evaluate on test set")
evaluate(model, testloader)

Start training...
-----------------Epoch = 1-----------------


../aten/src/ATen/native/cuda/Loss.cu:242: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [14,0,0] Assertion `t >= 0 && t < n_classes` failed.


RuntimeError: CUDA error: CUBLAS_STATUS_EXECUTION_FAILED when calling `cublasSgemm( handle, opa, opb, m, n, k, &alpha, a, lda, b, ldb, &beta, c, ldc)`

In [None]:
ord('b') - 97  

1