In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd "/content/drive/My Drive/CS3244-ML"

/content/drive/My Drive/CS3244-ML


In [None]:
import torch
import torchvision.transforms as transforms
import torch.utils.data as data
from PIL import Image
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

In [None]:
pathToSamplePosters = './SamplePosters/'
pathToCSV = './CNNProcessedData.csv'

# Store all images into a dict for faster access

In [None]:
# import os
import pickle

# imageDict = {}
# for i, imageFilename in enumerate(os.listdir(pathToSamplePosters)):
#     if i % 10000 == 0:
#         print(f'Processed {i} images')
#     imageDict[imageFilename] = Image.open(pathToSamplePosters + imageFilename).convert('RGB')
    
# with open('images.dat', 'wb') as outfile:
#     pickle.dump(imageDict, outfile, protocol=4)

with open('images.dat', 'rb') as handle:
    imageDict = pickle.load(handle)

# Custom data loader

In [None]:
from torch.utils.data.sampler import SubsetRandomSampler

class MyCustomDataset(torch.utils.data.Dataset):
    # __init__ function is where the initial logic happens like reading a csv,
    # assigning transforms etc.
    def __init__(self, csv_path):
        # Transforms
        self.resize = transforms.Resize((224, 224))
        self.to_tensor = transforms.ToTensor()
        self.data_info = pd.read_csv(csv_path)
        self.image_arr = np.asarray(self.data_info.iloc[:, 0])
        self.label_arr = np.asarray(self.data_info.iloc[:, 2:], dtype=np.float32) # columns 1 to N
        self.data_len = len(self.data_info.index)
        self.normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])


    # __getitem__ function returns the data and labels. This function is
    # called from dataloader like this
    def __getitem__(self, index):
        # Get image name from the pandas df
        single_image_name = self.image_arr[index]
        # Open image
        
        img_as_img = imageDict[single_image_name]
        img_cropped = self.resize(img_as_img)
        img_as_tensor = self.to_tensor(img_cropped)
        img_as_tensor_normalized = self.normalize(img_as_tensor)

        # Get label(class) of the image based on the cropped pandas column
        single_image_label = self.label_arr[index]
        return (single_image_name, img_as_tensor_normalized, single_image_label)

    def __len__(self):
        return self.data_len

dataset = MyCustomDataset(pathToCSV)

validation_split = .05
dataset_size = len(dataset)
indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))
np.random.seed(42)
np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]

train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)

print(f'Training dataset set size: {len(train_sampler)}, Test dataset set size: {len(valid_sampler)}')
# dataset.__getitem__(10)[1]

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

trainloader = torch.utils.data.DataLoader(dataset, batch_size=64,
                                          num_workers=8, sampler=train_sampler)
testloader = torch.utils.data.DataLoader(dataset, batch_size=500,
                                          num_workers=8, sampler=valid_sampler)


Training dataset set size: 24414, Test dataset set size: 1284
cuda:0


# Define CNN model

In [None]:
import torch
import torch.nn as nn
import numpy as np
import torch.optim as optim
from torch.autograd import Variable
from matplotlib import pyplot as plt
import torch.nn.functional as F

classes = ["Adventure", "Comedy", "Action", "Romance", "Drama", "Crime", "Thriller", "Horror", "Mystery", "Documentary"]
classCounts = [2342, 8663, 3847, 3965, 12986, 3319, 3643, 2745, 1520, 3049]
sumClassCounts = 25698
weights = (torch.FloatTensor([(sumClassCounts - x) / x for x in classCounts])).to(device)


import torchvision
## Load the model based on RESNET18
modelSaveLoadPath = '/content/drive/My Drive/CS3244-ML/model_gpu_top8dropoutV2.5.1'
net = torchvision.models.resnet18(pretrained=True)

# modelSaveLoadPath = '/content/drive/My Drive/CS3244-ML/model_gpu_top8dropoutV2.5.2'
# net = torchvision.models.resnet152(pretrained=True)

## freeze the layers
# for param in net.parameters():
#    param.requires_grad = False

# Modify the last layer
num_ftrs = net.fc.in_features
net.fc = nn.Sequential(
                      nn.Linear(num_ftrs, 1024), 
                      nn.ReLU(), 
                      nn.Dropout(0.5),
                      nn.Linear(1024, 256), 
                      nn.ReLU(), 
                      nn.Dropout(0.5),
                      nn.Linear(256, len(classes)))

net = net.to(device)
print(net)

criterion = nn.BCEWithLogitsLoss(pos_weight=weights)
optimizer = torch.optim.Adam(net.parameters(), lr=0.0004, weight_decay = 0.00004)

Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /root/.cache/torch/hub/checkpoints/resnet18-5c106cde.pth


HBox(children=(FloatProgress(value=0.0, max=46827520.0), HTML(value='')))


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
 

In [None]:
curMaxF1Score = 0.54

# Train model

In [None]:
from sklearn.metrics import hamming_loss
from sklearn.metrics import f1_score

dataiter = iter(testloader)
testdata = dataiter.next()

testposterId, testinputs, testlabels = testdata
testinputs = testinputs.to(device)
testlabels = testlabels.to(device)

net.train()

for epoch in range(300):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        posterId, inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 200 == 199:    # print every 200 mini-batches
            net.eval()
            with torch.no_grad():
                testoutputs = net(testinputs)
                testloss = criterion(testoutputs, testlabels)
                testoutputs = np.vectorize(lambda x : 1 if x > 0 else 0)(testoutputs.cpu())
                hamLoss = hamming_loss(np.array(testlabels.cpu()), testoutputs)
                f1score = f1_score(np.array(testlabels.cpu()), testoutputs, average='micro')
                fullyCorrect = 0
                for j in range(len(testlabels)):
                    if (f'%s' % [classes[i] for i, x in enumerate(testoutputs[j].tolist()) if x == 1]) == (f'%s' % [classes[i] for i, x in enumerate(testlabels[j].tolist()) if x == 1]):
                      fullyCorrect += 1
                print(f'Epoch[{epoch + 1} Batch {i + 1}] Training loss: {running_loss / 200}, Validation loss: {testloss}, Hamming loss: {hamLoss}, F1 Score: {f1score}, 100% correct: {fullyCorrect / len(testlabels)}')

            net.train()
            if f1score > curMaxF1Score:
                print(f"Saving model with F1 Score: {f1score}, Hamming loss: {hamLoss}")
                curMaxF1Score = f1score
                torch.save(net.state_dict(), modelSaveLoadPath)
                
            running_loss = 0.0

print('Finished Training')

In [None]:
import torchvision
import matplotlib
from sklearn.metrics import hamming_loss
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score

# net = Net(len(classes))
net.load_state_dict(torch.load(modelSaveLoadPath, map_location="cuda:0"))
net.to(device)
net.eval()

demoloader = torch.utils.data.DataLoader(dataset, batch_size=1284, num_workers=8, sampler=valid_sampler)

demoloader2 = torch.utils.data.DataLoader(dataset, batch_size=1, num_workers=8, sampler=valid_sampler)

In [None]:
def imshow(img):
    npimg = np.transpose(img.cpu(), (1, 2, 0))
    plt.imshow(npimg)
    plt.show()

dataiter = iter(demoloader)
posterId, inputs, labels = dataiter.next()
inputs = inputs.to(device)
labels = labels.to(device)

with torch.no_grad():
    outputs = net(inputs)
outputs = np.vectorize(lambda x : 1 if x > 0 else 0)(outputs.cpu())

# print images
# imshow(torchvision.utils.make_grid(inputs))

hamLoss = hamming_loss(np.array(labels.cpu()), outputs)
f1score = f1_score(np.array(labels.cpu()), outputs, average='micro')
recall = recall_score(np.array(labels.cpu()), outputs, average='micro')
precision = precision_score(np.array(labels.cpu()), outputs, average='micro')
atLeastOneCorrect = sum([1 if any([True if (outputs[i][j] == 1 and labels[i][j] == 1) else False for j in range(len(classes))]) else 0 for i in range(len(labels))]) / len(labels)
fullyCorrect = 0
for j in range(len(labels)):
    if (f'%s' % [classes[i] for i, x in enumerate(outputs[j].tolist()) if x == 1]) == (f'%s' % [classes[i] for i, x in enumerate(labels[j].tolist()) if x == 1]):
      fullyCorrect += 1

print(f'Hamming loss: {hamLoss}, F1 Score: {f1score}, 100% accuracy: {fullyCorrect / len(labels)}, At leach one positive correct: {atLeastOneCorrect}, Recall: {recall}, Precision: {precision}')

print('Predicted: ', ' '.join(f'%35s' % [classes[i] for i, x in enumerate(outputs[j].tolist()) if x == 1] for j in range(len(labels))))
print('GroundTruth: ', ' '.join('%35s' % [classes[i] for i, x in enumerate(labels[j].tolist()) if x == 1] for j in range(len(labels))))

Hamming loss: 0.1998442367601246, F1 Score: 0.5248148148148148, 100% accuracy: 0.12149532710280374, At leach one positive correct: 0.8029595015576324, Recall: 0.6092003439380912, Precision: 0.46096291476903056
Predicted:              ['Adventure', 'Action']                 ['Drama', 'Horror']       ['Comedy', 'Action', 'Crime'] ['Action', 'Drama', 'Crime', 'Thriller']      ['Action', 'Romance', 'Drama']                ['Romance', 'Drama']                          ['Comedy']                     ['Documentary']                     ['Documentary']      ['Comedy', 'Romance', 'Drama']                ['Romance', 'Drama']      ['Comedy', 'Romance', 'Drama']     ['Action', 'Drama', 'Thriller']      ['Comedy', 'Romance', 'Drama']      ['Comedy', 'Romance', 'Drama']              ['Thriller', 'Horror']      ['Comedy', 'Romance', 'Drama']     ['Drama', 'Thriller', 'Horror']              ['Action', 'Thriller']                 ['Action', 'Drama']                ['Romance', 'Drama']                  

In [None]:
def imshow(img):
    # npimg = np.transpose(img, (1, 2, 0))
    plt.imshow(img)
    plt.show()

net.eval()
dataiter = iter(demoloader2)
posterId, input, label = dataiter.next()
input = input.to(device)
label = label.to(device)

with torch.no_grad():
    output = torch.sigmoid(net(input))[0] * 100

imshow(imageDict[posterId[0]])
print('Predicted: ', ' '.join(f'%35s' % [classes[i] for i, x in enumerate(output.tolist())]))
print('GroundTruth: ', [classes[idx] for idx, elem in enumerate(label.tolist()[0] if elem == 1]))