In [1]:
import numpy as np

import lab_utils

import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim

import PIL
from PIL import Image
import matplotlib.pyplot as plt

from torch.utils.data import Dataset, DataLoader
from skimage import io, transform
import os
import sys

import torchvision.transforms as transforms
import torchvision.models as models

import csv

from tqdm import tqdm as tqdm

### Model Define

In [2]:
class MovieGenreClassifier(nn.Module):
    def __init__(self, nclass, nlabel):
        super(MovieGenreClassifier, self).__init__()
        self.main = nn.Sequential(
            nn.Linear(nclass, 80),
            nn.ReLU(),
            nn.Linear(80, nlabel),
        )

    def forward(self, input):
        return self.main(input)

def extractObjectFeature(image, id2objects, objectTable):
    objdetect = [0] * len(objectTable)
    objects = id2objects.get(image, [])
    
    for obj in objects:
        objdetect[objectTable[obj.split(':')[0]]] += float(obj.split(':')[1].strip('%')) / 100
        
    return objdetect
    
def extractLabel(image, id2genre, genresTable):
    genres = id2genre[image]
    genres = genres.split('|')
    labelVec = torch.zeros(1, 23)
    for genre in genres:
        if genre in genresTable:
            labelVec[0][genresTable[genre]] = 1
    
    return labelVec

### Custom Dataset Define

In [3]:
loader = transforms.Compose([
    transforms.ToTensor()])  # transform it into a torch tensor

class MyDataset(Dataset):
    def __init__(self, root, csvfile, transform=None):
        self.root = root
        self.transform = transform
        self.csvfile = open(csvfile, 'rb')
        
        reader = csv.reader(self.csvfile)

        id2genre = {}
        id2objects = {}
        for row in reader:
            if row[0] != "":
                id2genre[row[0] + ".jpg"] = row[4]
                id2objects[row[0] + ".jpg"] = row[6:]
        
        self.csvfile = open(csvfile, 'rb')
        
        reader = csv.reader(self.csvfile)
        
        genres = {}
        objects = {}
        for row in reader:
            genre = row[4].split('|')
            for ele in genre:
                if ele != '':
                    genres[ele] = genres.get(ele, 0) + 1
            objs = row[6:]
            for obj in objs:
                if obj != '':
                    objects[obj.split(':')[0]] = objects.get(obj.split(':')[0], 0) + 1

        for ele in list(genres):
            if (genres[ele] < 100):
                del genres[ele]        

        genresTable = {}
        objectTable = {}
        
        count = 0
        
        for ele in list(objects):
            objectTable[ele] = count
            count += 1
        
        count = 0
        for ele in list(genres):
            genresTable[ele] = count
            count += 1
                
        self.dataset = []
        self.labels = []
            
        for img in tqdm(os.listdir(self.root)):
            image = extractObjectFeature(img, id2objects, objectTable)
            
            self.dataset.append(image)
            self.labels.append(extractLabel(img, id2genre, genresTable))

    def __len__(self):
        return len(os.listdir(self.root))

    def __getitem__(self, idx):
        image = self.dataset[idx]
        label = self.labels[idx]
        
        if self.transform is not None:
            image = self.transform(image)

        image = torch.FloatTensor(image)       
        
        image = image.view(1, -1)
        
        return image, label
                               
trainset = MyDataset(root='/home/ubuntu/notebooks/dataset/train',
                     csvfile='/home/ubuntu/notebooks/Movie-Genre-Classification-from-Movie-Poster/Dataset/NewMovieGenre.csv')
valset = MyDataset(root='/home/ubuntu/notebooks/dataset/validation/',
                   csvfile='/home/ubuntu/notebooks/Movie-Genre-Classification-from-Movie-Poster/Dataset/NewMovieGenre.csv')

100%|██████████| 26985/26985 [00:00<00:00, 59596.91it/s]
100%|██████████| 1891/1891 [00:00<00:00, 72488.09it/s]


### Train Model

In [4]:
trainLoader = torch.utils.data.DataLoader(trainset, batch_size = 128, 
                                          shuffle = True, num_workers = 0)

valLoader = torch.utils.data.DataLoader(valset, batch_size = 128,
                                        shuffle = True, num_workers = 0)

def train_model(network, criterion, optimizer, trainLoader, valLoader, n_epochs = 10, use_gpu = False):
    if use_gpu:
        network = network.cuda()
        criterion = criterion.cuda()
        
    # Training loop.
    for epoch in range(0, n_epochs):
        correct = 0.0
        cum_loss = 0.0
        counter = 0

        # Make a pass over the training data.
        t = tqdm(trainLoader, desc = 'Training epoch %d' % epoch)
        network.train()  # This is important to call before training!
        for (i, (inputs, labels)) in enumerate(t):
            
            # Wrap inputs, and targets into torch.autograd.Variable types.
            inputs = Variable(inputs)
            labels = Variable(labels)
            
            if use_gpu:
                inputs = inputs.cuda()
                labels = labels.cuda()

            # Forward pass:
            outputs = network(inputs)
            loss = criterion(outputs, labels)

            # Backward pass:
            optimizer.zero_grad()
            # Loss is a variable, and calling backward on a Variable will
            # compute all the gradients that lead to that Variable taking on its
            # current value.
            loss.backward() 

            # Weight and bias updates.
            optimizer.step()
            
            # logging information.
            cum_loss += loss.data[0]
            
            outlabels = (outputs.data.exp() / (outputs.data.exp() + 1)).round()
            
            correct += ((labels.data * outlabels).sum(2).sum(1) / (labels.data + outlabels).clamp(0, 1).sum(2).sum(1)).sum()
            counter += inputs.size(0)
            t.set_postfix(loss = cum_loss / (1 + i), accuracy = 100 * correct / counter)

        # Make a pass over the validation data.
        correct = 0.0
        cum_loss = 0.0
        counter = 0
        t = tqdm(valLoader, desc = 'Validation epoch %d' % epoch)
        network.eval()  # This is important to call before evaluating!
        for (i, (inputs, labels)) in enumerate(t):
            # Wrap inputs, and targets into torch.autograd.Variable types.
            inputs = Variable(inputs)
            labels = Variable(labels)
            
            if use_gpu:
                inputs = inputs.cuda()
                labels = labels.cuda()

            
            # Forward pass:
            outputs = network(inputs)
            loss = criterion(outputs, labels)

            # logging information.
            cum_loss += loss.data[0]
            outlabels = (outputs.data.exp() / (outputs.data.exp() + 1)).round()
            
            correct += ((labels.data * outlabels).sum(2).sum(1) / (labels.data + outlabels).clamp(0, 1).sum(2).sum(1)).sum()
            counter += inputs.size(0)
            t.set_postfix(loss = cum_loss / (1 + i), accuracy = 100 * correct / counter)

train = True

if train == True:
    classifier = MovieGenreClassifier(79, 23)
    optimizer = optim.Adam(classifier.parameters(), lr = 0.001)
    criterion = nn.MultiLabelSoftMarginLoss()
    # Train the previously defined model.
    train_model(classifier, criterion, optimizer, trainLoader, valLoader, n_epochs = 20, use_gpu = True)
    torch.save(classifier, "/home/ubuntu/notebooks/Movie-Genre-Classification-from-Movie-Poster/object_detect.model")
else:
    classifier = torch.load("/home/ubuntu/notebooks/Movie-Genre-Classification-from-Movie-Poster/object_detect.model")

Training epoch 0: 100%|██████████| 211/211 [00:04<00:00, 47.38it/s, accuracy=10.3, loss=0.389]
Validation epoch 0: 100%|██████████| 15/15 [00:00<00:00, 71.65it/s, accuracy=10.2, loss=0.273]
Training epoch 1: 100%|██████████| 211/211 [00:04<00:00, 51.63it/s, accuracy=12.4, loss=0.262]
Validation epoch 1: 100%|██████████| 15/15 [00:00<00:00, 71.00it/s, accuracy=9.17, loss=0.261]
Training epoch 2: 100%|██████████| 211/211 [00:04<00:00, 50.56it/s, accuracy=16.8, loss=0.256]
Validation epoch 2: 100%|██████████| 15/15 [00:00<00:00, 71.50it/s, accuracy=18.5, loss=0.258]
Training epoch 3: 100%|██████████| 211/211 [00:04<00:00, 51.20it/s, accuracy=19.9, loss=0.255]
Validation epoch 3: 100%|██████████| 15/15 [00:00<00:00, 70.12it/s, accuracy=20.5, loss=0.258]
Training epoch 4: 100%|██████████| 211/211 [00:04<00:00, 50.98it/s, accuracy=20.9, loss=0.253]
Validation epoch 4: 100%|██████████| 15/15 [00:00<00:00, 71.65it/s, accuracy=21.1, loss=0.257]
Training epoch 5: 100%|██████████| 211/211 [00:04<

### Evaluation on Test Dataset

In [5]:
def Evaluation(root, img, classifier, id2objects, objectTable, id2genres, genresTable):
    classifier.eval()

    feature = extractObjectFeature(img, id2objects, objectTable)
    feature = Variable(torch.FloatTensor(feature).view(1, -1).cuda())
    
    outputs = classifier(feature)
    max_scores, max_labels = outputs.data.max(1)
    if (max_scores > 0).cpu().numpy():
        outlabels = (outputs.data.exp() / (outputs.data.exp() + 1)).round()
    else:
        outlabels = (outputs.data.exp() / (outputs.data.exp() + 1)).round()
        outlabels[0][max_labels] = 1
        
    if (outlabels.sum(1).sum() > 3):
        outlabels[0] = torch.zeros(outlabels[0].size()[0])
        outlabels[0][outputs.topk(3)[1][0].data] = 1
    
    labels = extractLabel(img, id2genres, genresTable).cuda()
    
    correct = ((labels * outlabels).sum(1) / (labels + outlabels).clamp(0, 1).sum(1)).sum()    
    
    return correct, outlabels, labels

csvf = "/home/ubuntu/notebooks/Movie-Genre-Classification-from-Movie-Poster/Dataset/NewMovieGenre.csv"

csvfile = open(csvf, 'rb')

reader = csv.reader(csvfile)

id2genre = {}
id2objects = {}
for row in reader:
    if row[0] != "":
        id2genre[row[0] + ".jpg"] = row[4]
        id2objects[row[0] + ".jpg"] = row[6:]

csvfile = open(csvf, 'rb')

reader = csv.reader(csvfile)

genres = {}
objects = {}
for row in reader:
    genre = row[4].split('|')
    for ele in genre:
        if ele != '':
            genres[ele] = genres.get(ele, 0) + 1
    objs = row[6:]
    for obj in objs:
        if obj != '':
            objects[obj.split(':')[0]] = objects.get(obj.split(':')[0], 0) + 1

for ele in list(genres):
    if (genres[ele] < 100):
        del genres[ele]        

genresTable = {}
objectTable = {}

count = 0

for ele in list(objects):
    objectTable[ele] = count
    count += 1

count = 0
for ele in list(genres):
    genresTable[ele] = count
    count += 1


correct = 0

for img in tqdm(os.listdir("/home/ubuntu/notebooks/dataset/test")):
    correct += Evaluation("/home/ubuntu/notebooks/dataset/test", 
                          img, 
                          classifier,
                          id2objects,
                          objectTable,
                          id2genre,
                          genresTable)[0]

print "Accuracy on Test Dataset : " + str(100 * correct / len(os.listdir("/home/ubuntu/notebooks/dataset/test"))) + "%"

100%|██████████| 9654/9654 [00:59<00:00, 163.04it/s]

Accuracy on Test Dataset : 27.702679622%



