In [None]:
from PIL import Image
import torch, lab_utils, random
from torchvision.datasets import CIFAR10 
import torchvision.transforms as transforms
from torch.autograd import Variable
import torch.nn as nn 
import torch.optim as optim
import torchvision.models as models
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from PIL import Image
import json, string
import os  
import torch.utils.data as data
import skimage.transform as pictransform
from tqdm import tqdm_notebook as tqdm
import torch.nn.functional as F
%matplotlib inline
import random



In [None]:
class nn_CrossEntropyLoss(object): 
    # Forward pass -log softmax(input_{label})
    def forward(self, inputs, labels):
        max_val = inputs.max()  # This is to avoid variable overflows.
        exp_inputs = (inputs - max_val).exp()
        # This is different than in the previous lab. Avoiding for loops here.
        denominators = exp_inputs.sum(1).repeat(inputs.size(1), 1).t()
        self.predictions = torch.mul(exp_inputs, 1 / denominators)
        # Check what gather does. Just avoiding another for loop.
        return -self.predictions.log().gather(1, labels.view(-1, 1)).mean()
    
    # Backward pass 
    def backward(self, inputs, labels):
        grad_inputs = self.predictions.clone()
        # Ok, Here we will use a for loop (but it is avoidable too).
        for i in range(0, inputs.size(0)):
            grad_inputs[i][labels[i]] = grad_inputs[i][labels[i]] - 1
        return grad_inputs 

# Input: 4 vectors of size 10.
testInput = torch.Tensor(4, 10).normal_(0, 0.1)
# labels: 4 labels indicating the correct class for each input.
labels = torch.LongTensor([3, 4, 4, 8])

# Forward and Backward passes:
loss_softmax = nn_CrossEntropyLoss()
loss = loss_softmax.forward(testInput, labels)
gradInputs = loss_softmax.backward(testInput, labels)

In [None]:
class nn_Linear(object):
    def __init__(self, inputSize, outputSize):
        self.weight = torch.Tensor(inputSize, outputSize).normal_(0, 0.01)
        self.gradWeight = torch.Tensor(inputSize, outputSize)
        self.bias = torch.Tensor(outputSize).zero_()
        self.gradBias = torch.Tensor(outputSize)
    
    # Forward pass, inputs is a matrix of size batchSize x inputSize
    def forward(self, inputs):
        # This one needs no change, it just becomes matrix x matrix multiplication
        # as opposed to just vector x matrix multiplication as we had before.
        return torch.matmul(inputs, self.weight) + self.bias
    
    # Backward pass, in addition to compute gradients for the weight and bias.
    # It has to compute gradients with respect to inputs. 
    def backward(self, inputs, gradOutput):
        self.gradWeight = torch.matmul(inputs.t(), gradOutput)
        self.gradBias = gradOutput.sum(0)
        return torch.matmul(gradOutput, self.weight.t())

In [None]:
class nn_ReLU(object):
    # pytorch has an element-wise max function.
    def forward(self, inputs):
        outputs = inputs.clone()
        outputs[outputs < 0] = 0
        return outputs
    
    # Make sure the backward pass is absolutely clear.
    def backward(self, inputs, gradOutput):
        gradInputs = gradOutput.clone()
        gradInputs[inputs < 0] = 0
        return gradInputs

In [None]:
def train_model(network, criterion, optimizer, trainLoader, valLoader, n_epochs = 10, use_gpu = False):

    if use_gpu:
        network = network.cuda()
        criterion = criterion.cuda()
        
    # Training loop.
    for epoch in range(0, n_epochs):
        correct = 0.0
        cum_loss = 0.0
        counter = 0

        # Make a pass over the training data.
        t = tqdm(trainLoader, desc = 'Training epoch %d' % epoch)
        network.train()  # This is important to call before training!
        for (i, (inputs, labels)) in enumerate(t):

            # Wrap inputs, and targets into torch.autograd.Variable types.
            inputs = Variable(inputs)
            labels = Variable(labels)
            
            if use_gpu:
                inputs = inputs.cuda()
                labels = labels.cuda()

            # Forward pass:
            outputs = network(inputs)
            loss = criterion(outputs, labels)

            # Backward pass:
            optimizer.zero_grad()
            # Loss is a variable, and calling backward on a Variable will
            # compute all the gradients that lead to that Variable taking on its
            # current value.
            loss.backward() 

            # Weight and bias updates.
            optimizer.step()

            # logging information.
            cum_loss += loss.data[0]
            max_scores, max_labels = outputs.data.max(1)
            correct += (max_labels == labels.data).sum()
            counter += inputs.size(0)
            t.set_postfix(loss = cum_loss / (1 + i), accuracy = 100 * correct / counter)

        # Make a pass over the validation data.
        correct = 0.0
        cum_loss = 0.0
        counter = 0
        t = tqdm(valLoader, desc = 'Validation epoch %d' % epoch)
        network.eval()  # This is important to call before evaluating!
        for (i, (inputs, labels)) in enumerate(t):

            # Wrap inputs, and targets into torch.autograd.Variable types.
            inputs = Variable(inputs)
            labels = Variable(labels)
            
            if use_gpu:
                inputs = inputs.cuda()
                labels = labels.cuda()

            # Forward pass:
            outputs = network(inputs)
            loss = criterion(outputs, labels)

            # logging information.
            cum_loss += loss.data[0]
            max_scores, max_labels = outputs.data.max(1)
            correct += (max_labels == labels.data).sum()
            counter += inputs.size(0)
            t.set_postfix(loss = cum_loss / (1 + i), accuracy = 100 * correct / counter)


In [None]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
       # Convolutional layers.
        self.conv1 = nn.Conv2d(3, 10, 5)
        self.conv2 = nn.Conv2d(10, 32, 5)
        
        # Linear layers.
        self.fc1 = nn.Linear(32*42*64, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 2)

    def forward(self, x):
        out = F.relu(self.conv1(x))
        out = F.max_pool2d(out, 2)
        out = F.relu(self.conv2(out))
        out = F.max_pool2d(out, 2)
        # This flattens the output of the previous layer into a vector.
        out = out.view(out.size(0), -1) 
        out = F.relu(self.fc1(out))
        out = F.relu(self.fc2(out))
        out = self.fc3(out)
        return out

In [None]:
class AlexNet(nn.Module):

    def __init__(self, num_classes=2):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 4 * 7, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [None]:
class MyDataset(data.Dataset):
    def __init__(self, images, labels,imgTransform):
        self.images = images
        self.labels = labels
        self.transform = imgTransform  


    def __getitem__(self, index):
        img, target = self.images[index], self.labels[index]
        img=imgTransform(img)
        return img, target

    def __len__(self):
        return len(self.images)

In [None]:
#Genres contains genre's name and its index
#Genres_Movies contains movieids for every genre, the index coprresponds to index in Genres
Genre_Movies=[]
Genres={}
my_file=open('input.csv','r')
count=0
for line in my_file:
    ele=line.strip().split(',')
    if ele[0] not in Genres:
        Genres[ele[0]]=count
        count+=1
        Genre_Movies.append([])
        Genre_Movies[Genres[ele[0]]].append(ele[1])
    else:
        Genre_Movies[Genres[ele[0]]].append(ele[1])
my_file.close()

In [None]:
#extract test dataset
Test_Movies = []
for i in range(len(Genres)):
    length = len(Genre_Movies[i])
    Test_Movies.append([])
    for j in range(int(length/5)):
        a = random.sample(Genre_Movies[i],1)
        Genre_Movies[i].remove(a[0])
        Test_Movies[i].append(a[0])

In [None]:
trainImgList=[]
valImgList=[]
trainLabelList=[]
valLabelList=[]
for index, gen in enumerate(Genres):
    if index==1:
        trainLabel=[]
        valLabel=[]
        trainImg=[]
        valImg=[]
        #Add image with label 1
        for i, mvid in enumerate(Genre_Movies[Genres[gen]]):
            if i<5500:
                img=Image.open('./posters/'+mvid+'.jpg').convert('RGB')
                if i<500:
                    valImg.append(img)
                    valLabel.append(1)
                else:
                    trainImg.append(img)
                    trainLabel.append(1)
            else:
                break
        if len(trainImg)<5000:
            temp=random.sample(Genre_Movies[Genres[gen]],5000-len(trainImg))
            for mvid in temp:
                img=Image.open('./posters/'+mvid+'.jpg').convert('RGB')
                trainImg.append(img)
        while(len(trainLabel)<5000): trainLabel.append(1)
        #Add image with label 0
        label0Img=[]
        for i in Genre_Movies:
            if i!=Genre_Movies[Genres[gen]]:
                for j in i:
                    if j not in Genre_Movies[Genres[gen]]:
                        label0Img.append(j)
        samples=random.sample(label0Img,5500)
        for i, mvid in enumerate(samples):
            img=Image.open('./posters/'+mvid+'.jpg').convert('RGB')
            if i<500:
                valImg.append(img)
                valLabel.append(0)
            else:
                trainImg.append(img)
                trainLabel.append(0)        
        trainImgList.append(trainImg)
        valImgList.append(valImg)
        trainLabelList.append(trainLabel)
        valLabelList.append(valLabel)

            
    

In [None]:
#Settings
learningRate = 0.03
criterion = nn.CrossEntropyLoss()
imgTransform = transforms.Compose([transforms.Scale((182,268)),
                                   transforms.ToTensor(),
                                   transforms.Normalize((0.4914, 0.4822, 0.4465), 
                                                        (0.2023, 0.1994, 0.2010))])

#classifier for 9 genres
networks=[]
for i in range(len(Genres)):
    network = LeNet()
    optimizer = optim.SGD(network.parameters(), lr = learningRate, momentum=0.85)
    mytrainset=MyDataset(trainImgList[i],trainLabelList[i],imgTransform)
    myvalset=MyDataset(valImgList[i],valLabelList[i],imgTransform)

    mytrainLoader = torch.utils.data.DataLoader(mytrainset, batch_size = 64, 
                                          shuffle = True, num_workers = 0)
    myvalLoader = torch.utils.data.DataLoader(myvalset, batch_size = 64, 
                                        shuffle = False, num_workers = 0)

    train_model(network, criterion, optimizer, mytrainLoader, myvalLoader, n_epochs = 1, use_gpu = False)
    networks.append(network)
    print "network %d is done"%(i)


In [None]:
#Test network
Test_Sum=0.0
Correct_Sum=0.0
preprocessFn = transforms.Compose([transforms.ToTensor(), 
                                   transforms.Normalize(mean = [0.485, 0.456, 0.406], 
                                                        std=[0.229, 0.224, 0.225])])
for i in range(len(Genres)):
    Test_dic={}
    for mvid in Test_Movies[i]:
        Test_dic[mvid]=1
    for index, l in enumerate(Test_Movies):
        if index!=i:
            for mvid in l: 
                if mvid not in Test_dic:
                    Test_dic[mvid]=0
    print len(Test_dic)
    networks[i].eval()  
    for index, l in enumerate(Test_Movies):
        for mvid in l:
            image = Image.open('./posters/'+mvid+'.jpg').convert('RGB') 
            inputVar =  Variable(preprocessFn(image).unsqueeze(0))
            Predictions = F.softmax(network(inputVar))
            Predictions = Predictions.data
            max_score, max_label = Predictions.max(1)
            if max_label.numpy()==Test_dic[mvid]:
                Correct_Sum+=1
            Test_Sum+=1
            break
    print "The accuracy of classifier %d is %f"%(i,Correct_Sum/Test_Sum)
    