This code is for Style Feature Extraction from a given image

In [1]:
import numpy as np

import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim

import PIL
from PIL import Image
import matplotlib.pyplot as plt

from torch.utils.data import Dataset, DataLoader
from skimage import io, transform
import os
import sys

import torchvision.transforms as transforms
import torchvision.models as models

### A Simple Example For Multi-Label Classification

In [54]:
import torch
import torch.nn as nn
import numpy as np
import torch.optim as optim
from torch.autograd import Variable

# (1, 0) => target labels 0+2
# (0, 1) => target labels 1
# (1, 1) => target labels 3
train = []
labels = []
for i in range(10000):
    category = (np.random.choice([0, 1]), np.random.choice([0, 1]))
    if category == (1, 0):
        train.append([np.random.uniform(0.1, 1), 0])
        labels.append([1, 0, 1])
    if category == (0, 1):
        train.append([0, np.random.uniform(0.1, 1)])
        labels.append([0, 1, 0])
    if category == (0, 0):
        train.append([np.random.uniform(0.1, 1), np.random.uniform(0.1, 1)])
        labels.append([0, 0, 1])

class _classifier(nn.Module):
    def __init__(self, nlabel):
        super(_classifier, self).__init__()
        self.main = nn.Sequential(
            nn.Linear(2, 64),
            nn.ReLU(),
            nn.Linear(64, nlabel),
        )

    def forward(self, input):
        return self.main(input)

nlabel = len(labels[0]) # => 3
classifier = _classifier(nlabel)

optimizer = optim.Adam(classifier.parameters())
criterion = nn.MultiLabelSoftMarginLoss()

print train[0]

epochs = 5
for epoch in range(epochs):
    losses = []
    for i, sample in enumerate(train):
        inputv = Variable(torch.FloatTensor(sample)).view(1, -1)
        labelsv = Variable(torch.FloatTensor(labels[i])).view(1, -1)
        
        output = classifier(inputv)
        loss = criterion(output, labelsv)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        losses.append(loss.data.mean())
    print('[%d/%d] Loss: %.3f' % (epoch+1, epochs, np.mean(losses)))

[0.8038541011774724, 0]
[1/5] Loss: 0.096
[2/5] Loss: 0.006
[3/5] Loss: 0.001
[4/5] Loss: 0.000
[5/5] Loss: 0.000


In [18]:
print classifier(Variable(torch.FloatTensor([0.1, 0.1])).view(1, -1))

Variable containing:
-29.7755 -31.0071   2.0508
[torch.FloatTensor of size 1x3]



### Define Style Feature Extraction Network With VGG

In [17]:
use_cuda = torch.cuda.is_available()
dtype = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor

cnn = models.vgg19(pretrained=True).features

# move it to the GPU if possible:
if use_cuda:
    cnn = cnn.cuda()
    
style_layers = ['conv_1', 'conv_2', 'conv_3', 'conv_4', 'conv_5']

networks = []

for i in range(5):
    model = nn.Sequential()
    networks.append(model)

indexs = [1, 3, 6, 8, 11]

for n in range(5):
    count = 0
    for layer in list(cnn)[:indexs[n]]:
        networks[n].add_module(str(count), layer)
        count += 1

for network in networks:
    print network

Sequential (
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
Sequential (
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU (inplace)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
Sequential (
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU (inplace)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU (inplace)
  (4): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
Sequential (
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU (inplace)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU (inplace)
  (4): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU (inplace)
  (7): Conv2d(128, 128, kerne

### Data Preprocessing

In [40]:
import os

trainroot = "/home/ubuntu/notebooks/dataset/train"
valroot = "/home/ubuntu/notebooks/dataset/validation/"
testroot = "/home/ubuntu/notebooks/dataset/test/"
invalidroot = "/home/ubuntu/notebooks/dataset/invalid"

for img in os.listdir(testroot):
    try:
        image = Image.open(os.path.join(testroot, img))
    except IOError:
        os.rename(os.path.join(testroot, img), os.path.join(invalidroot, img))

### Define Gram Matrix to Calculate Style Feature

In [19]:
loader = transforms.Compose([
    transforms.Scale((182, 268)),
    transforms.ToTensor()])  # transform it into a torch tensor

def image_loader(image_name):
    image = Image.open(image_name)    
    image = Variable(loader(image))
    # fake batch dimension required to fit network's input dimensions
    image = image.unsqueeze(0)
    return image

class GramMatrix(nn.Module):
    def forward(self, input):
        a, b, c, d = input.size()  # a=batch size(=1)
        # b=number of feature maps
        # (c,d)=dimensions of a f. map (N=c*d)

        features = input.view(a * b, c * d)  # resise F_XL into \hat F_XL

        G = torch.mm(features, features.t())  # compute the gram product

        # we 'normalize' the values of the gram matrix
        # by dividing by the number of element in each feature maps.
        return G.div(a * b * c * d)

gram = GramMatrix()
if use_cuda:
    gram = gram.cuda()
testImg = image_loader("/home/ubuntu/notebooks/dataset/train/100024.jpg").cuda()
output = networks[0](testImg)
gramOutput = gram(output)
print testImg
print output
print gramOutput

Variable containing:
( 0 , 0 ,.,.) = 
  0.6118  0.6078  0.6039  ...   0.6118  0.6118  0.6118
  0.6078  0.6039  0.6039  ...   0.6118  0.6118  0.6118
  0.6039  0.6039  0.6000  ...   0.6078  0.6078  0.6078
           ...             ⋱             ...          
  0.5373  0.5412  0.5608  ...   0.2157  0.2196  0.2196
  0.5412  0.5373  0.5529  ...   0.2196  0.2235  0.2235
  0.5529  0.5451  0.5529  ...   0.2275  0.2314  0.2314

( 0 , 1 ,.,.) = 
  0.7608  0.7569  0.7529  ...   0.7529  0.7529  0.7529
  0.7569  0.7529  0.7529  ...   0.7529  0.7529  0.7529
  0.7529  0.7529  0.7490  ...   0.7490  0.7490  0.7490
           ...             ⋱             ...          
  0.4431  0.4471  0.4667  ...   0.1608  0.1647  0.1647
  0.4471  0.4431  0.4588  ...   0.1647  0.1686  0.1686
  0.4588  0.4510  0.4588  ...   0.1608  0.1647  0.1647

( 0 , 2 ,.,.) = 
  0.8118  0.8078  0.8039  ...   0.8000  0.8000  0.8000
  0.8078  0.8039  0.8039  ...   0.8000  0.8000  0.8000
  0.8039  0.8039  0.8000  ...   0.7961  0.7961

### Process Genres

In [4]:
import csv
genres = {}

csvfile = open('/home/ubuntu/notebooks/Movie-Genre-Classification-from-Movie-Poster/Dataset/NewMovieGenre.csv', 'rb')
reader = csv.reader(csvfile)

for row in reader:
    genre = row[4].split('|')
    for ele in genre:
        if ele != '':
            genres[ele] = genres.get(ele, 0) + 1

for ele in list(genres):
    if (genres[ele] < 100):
        del genres[ele]
        
print genres
print len(genres)

genresTable = {}

count = 0
for ele in list(genres):
    genresTable[ele] = count
    count += 1

    
    
print genresTable

{'Sci-Fi': 2020, 'Crime': 5270, 'Romance': 6224, 'Animation': 1750, 'Music': 1292, 'Comedy': 12682, 'War': 1173, 'Horror': 3990, 'Film-Noir': 403, 'Adventure': 3829, 'Thriller': 4816, 'Western': 853, 'Mystery': 2395, 'Short': 1066, 'Drama': 20053, 'Action': 5331, 'Documentary': 3882, 'Musical': 845, 'History': 1426, 'Family': 2100, 'Fantasy': 2012, 'Sport': 707, 'Biography': 1999}
23
{'Sci-Fi': 0, 'Crime': 1, 'Romance': 2, 'Animation': 3, 'Music': 4, 'Comedy': 5, 'War': 6, 'Horror': 7, 'Film-Noir': 8, 'Adventure': 9, 'Thriller': 10, 'Western': 11, 'Mystery': 12, 'Short': 13, 'Drama': 14, 'Action': 15, 'Documentary': 16, 'Musical': 17, 'History': 18, 'Family': 19, 'Fantasy': 20, 'Sport': 21, 'Biography': 22}


### Genres Preprocessing

In [45]:
import csv
import os

csvfile = open('/home/ubuntu/notebooks/Movie-Genre-Classification-from-Movie-Poster/Dataset/NewMovieGenre.csv', 'rb')
reader = csv.reader(csvfile)

id2genre = {}

for row in reader:
    if row[0] != "":
        id2genre[row[0] + ".jpg"] = row[4]

trainroot = "/home/ubuntu/notebooks/dataset/train"
valroot = "/home/ubuntu/notebooks/dataset/validation/"
testroot = "/home/ubuntu/notebooks/dataset/test/"
invalidroot = "/home/ubuntu/notebooks/dataset/invalid"
tmproot = "/home/ubuntu/notebooks/dataset/tmp"

for img in os.listdir(valroot):
    if img in id2genre and id2genre[img] != "":
        pass
    else:
        os.rename(os.path.join(valroot, img), os.path.join(tmproot, img))

In [66]:
class MyDataset(Dataset):
    def __init__(self, root, csvfile, transform=None):
        self.root = root
        self.transform = transform
        self.csvfile = open(csvfile, 'rb')
        
        reader = csv.reader(self.csvfile)

        id2genre = {}

        for row in reader:
            if row[0] != "":
                id2genre[row[0] + ".jpg"] = row[4]
        
        self.csvfile = open(csvfile, 'rb')
        
        reader = csv.reader(self.csvfile)
        
        genres = {}
        for row in reader:
            genre = row[4].split('|')
            for ele in genre:
                if ele != '':
                    genres[ele] = genres.get(ele, 0) + 1

        for ele in list(genres):
            if (genres[ele] < 100):
                del genres[ele]        

        genresTable = {}

        count = 0
        for ele in list(genres):
            genresTable[ele] = count
            count += 1
                
        self.dataset = []
        self.labels = []
        
        count = 0
        for img in os.listdir(self.root):
            image = io.imread(os.path.join(self.root, img))
            count += 1
            
#             try:
#                 feature = extractStyleFeature(Variable(loader(Image.fromarray(image))).cuda().unsqueeze(0))
                
#             except RuntimeError:
#                 print img, count
            
            self.dataset.append(image)
            self.labels.append(extractLabel(img, id2genre, genresTable))

    def __len__(self):
        return len(os.listdir(self.root))

    def __getitem__(self, idx):
        image = self.dataset[idx]
        label = self.labels[idx]
        
        image = Image.fromarray(image)
        
        if self.transform is not None:
            image = self.transform(image)

        image = Variable(loader(image))
        
        image = image.cuda()
        # fake batch dimension required to fit network's input dimensions
        image = image.unsqueeze(0)        
        
        feature = extractStyleFeature(image)
        
        return feature.data, label
                               
trainset = MyDataset(root='/home/ubuntu/notebooks/dataset/train',
                     csvfile='/home/ubuntu/notebooks/Movie-Genre-Classification-from-Movie-Poster/Dataset/NewMovieGenre.csv')
valset = MyDataset(root='/home/ubuntu/notebooks/dataset/validation/',
                   csvfile='/home/ubuntu/notebooks/Movie-Genre-Classification-from-Movie-Poster/Dataset/NewMovieGenre.csv')

In [67]:
from tqdm import tqdm as tqdm

trainLoader = torch.utils.data.DataLoader(trainset, batch_size = 128, 
                                          shuffle = True, num_workers = 0)

valLoader = torch.utils.data.DataLoader(valset, batch_size = 128,
                                        shuffle = True, num_workers = 0)

def train_model(network, criterion, optimizer, trainLoader, valLoader, n_epochs = 10, use_gpu = False):
    if use_gpu:
        network = network.cuda()
        criterion = criterion.cuda()
        
    # Training loop.
    for epoch in range(0, n_epochs):
        correct = 0.0
        cum_loss = 0.0
        counter = 0

        # Make a pass over the training data.
        t = tqdm(trainLoader, desc = 'Training epoch %d' % epoch)
        network.train()  # This is important to call before training!
        for (i, (inputs, labels)) in enumerate(t):
            
            # Wrap inputs, and targets into torch.autograd.Variable types.
            inputs = Variable(inputs)
            labels = Variable(labels)
            
            if use_gpu:
                inputs = inputs.cuda()
                labels = labels.cuda()

            # Forward pass:
            outputs = network(inputs)
            loss = criterion(outputs, labels)

            # Backward pass:
            optimizer.zero_grad()
            # Loss is a variable, and calling backward on a Variable will
            # compute all the gradients that lead to that Variable taking on its
            # current value.
            loss.backward() 

            # Weight and bias updates.
            optimizer.step()
            
            # logging information.
            cum_loss += loss.data[0]
            outlabels = (outputs.data.exp() / (outputs.data.exp() + 1)).round()
            
            correct += ((labels.data * outlabels).sum(2).sum(1) / (labels.data + outlabels).clamp(0, 1).sum(2).sum(1)).sum()
            counter += inputs.size(0)
            t.set_postfix(loss = cum_loss / (1 + i), accuracy = 100 * correct / counter)

        # Make a pass over the validation data.
        correct = 0.0
        cum_loss = 0.0
        counter = 0
        t = tqdm(valLoader, desc = 'Validation epoch %d' % epoch)
        network.eval()  # This is important to call before evaluating!
        for (i, (inputs, labels)) in enumerate(t):
            # Wrap inputs, and targets into torch.autograd.Variable types.
            inputs = Variable(inputs)
            labels = Variable(labels)
            
            if use_gpu:
                inputs = inputs.cuda()
                labels = labels.cuda()

            # Forward pass:
            outputs = network(inputs)
            loss = criterion(outputs, labels)

            # logging information.
            cum_loss += loss.data[0]
            outlabels = (outputs.data.exp() / (outputs.data.exp() + 1)).round()
            
            correct += ((labels.data * outlabels).sum(2).sum(1) / (labels.data + outlabels).clamp(0, 1).sum(2).sum(1)).sum()
            counter += inputs.size(0)
            t.set_postfix(loss = cum_loss / (1 + i), accuracy = 100 * correct / counter)

classifier = MovieGenreClassifier(23)
optimizer = optim.Adam(classifier.parameters())
criterion = nn.MultiLabelSoftMarginLoss()
# Train the previously defined model.
train_model(classifier, criterion, optimizer, trainLoader, valLoader, n_epochs = 2, use_gpu = True)




Training epoch 0:   0%|          | 0/211 [00:00<?, ?it/s][A[A[A


Training epoch 0:   0%|          | 0/211 [00:05<?, ?it/s, accuracy=9.82, loss=0.691][A[A[A


Training epoch 0:   0%|          | 1/211 [00:05<18:29,  5.28s/it, accuracy=9.82, loss=0.691][A[A[A


Training epoch 0:   0%|          | 1/211 [00:10<36:18, 10.37s/it, accuracy=5.56, loss=0.586][A[A[A


Training epoch 0:   1%|          | 2/211 [00:10<18:04,  5.19s/it, accuracy=5.56, loss=0.586][A[A[A


Training epoch 0:   1%|          | 2/211 [00:15<26:56,  7.74s/it, accuracy=3.71, loss=0.517][A[A[A


Training epoch 0:   1%|▏         | 3/211 [00:15<17:52,  5.16s/it, accuracy=3.71, loss=0.517][A[A[A


Training epoch 0:   1%|▏         | 3/211 [00:20<23:46,  6.86s/it, accuracy=2.78, loss=0.47] [A[A[A


Training epoch 0:   2%|▏         | 4/211 [00:20<17:44,  5.14s/it, accuracy=2.78, loss=0.47][A[A[A


Training epoch 0:   2%|▏         | 4/211 [00:25<22:09,  6.42s/it, accuracy=2.22, loss=0.439][A[A[A


T

Training epoch 0:  37%|███▋      | 78/211 [06:43<11:28,  5.18s/it, accuracy=19.9, loss=0.285][A[A[A


Training epoch 0:  37%|███▋      | 78/211 [06:49<11:37,  5.24s/it, accuracy=19.9, loss=0.285][A[A[A


Training epoch 0:  37%|███▋      | 79/211 [06:49<11:23,  5.18s/it, accuracy=19.9, loss=0.285][A[A[A


Training epoch 0:  37%|███▋      | 79/211 [06:54<11:32,  5.24s/it, accuracy=19.9, loss=0.285][A[A[A


Training epoch 0:  38%|███▊      | 80/211 [06:54<11:18,  5.18s/it, accuracy=19.9, loss=0.285][A[A[A


Training epoch 0:  38%|███▊      | 80/211 [06:59<11:26,  5.24s/it, accuracy=19.9, loss=0.284][A[A[A


Training epoch 0:  38%|███▊      | 81/211 [06:59<11:13,  5.18s/it, accuracy=19.9, loss=0.284][A[A[A


Training epoch 0:  38%|███▊      | 81/211 [07:04<11:21,  5.24s/it, accuracy=20, loss=0.284]  [A[A[A


Training epoch 0:  39%|███▉      | 82/211 [07:04<11:07,  5.18s/it, accuracy=20, loss=0.284][A[A[A


Training epoch 0:  39%|███▉      | 82/211 [07:09<11:16,  

Training epoch 0:  73%|███████▎  | 155/211 [13:23<04:50,  5.18s/it, accuracy=21.5, loss=0.268][A[A[A


Training epoch 0:  73%|███████▎  | 155/211 [13:28<04:52,  5.22s/it, accuracy=21.5, loss=0.268][A[A[A


Training epoch 0:  74%|███████▍  | 156/211 [13:28<04:45,  5.18s/it, accuracy=21.5, loss=0.268][A[A[A


Training epoch 0:  74%|███████▍  | 156/211 [13:33<04:46,  5.22s/it, accuracy=21.5, loss=0.268][A[A[A


Training epoch 0:  74%|███████▍  | 157/211 [13:33<04:39,  5.18s/it, accuracy=21.5, loss=0.268][A[A[A


Training epoch 0:  74%|███████▍  | 157/211 [13:38<04:41,  5.22s/it, accuracy=21.5, loss=0.268][A[A[A


Training epoch 0:  75%|███████▍  | 158/211 [13:38<04:34,  5.18s/it, accuracy=21.5, loss=0.268][A[A[A


Training epoch 0:  75%|███████▍  | 158/211 [13:44<04:36,  5.22s/it, accuracy=21.5, loss=0.268][A[A[A


Training epoch 0:  75%|███████▌  | 159/211 [13:44<04:29,  5.18s/it, accuracy=21.5, loss=0.268][A[A[A


Training epoch 0:  75%|███████▌  | 159/211 [13

Training epoch 1:   2%|▏         | 5/211 [00:31<21:20,  6.22s/it, accuracy=18.7, loss=0.25] [A[A[A


Training epoch 1:   3%|▎         | 6/211 [00:31<17:42,  5.18s/it, accuracy=18.7, loss=0.25][A[A[A


Training epoch 1:   3%|▎         | 6/211 [00:36<20:39,  6.05s/it, accuracy=20.5, loss=0.251][A[A[A


Training epoch 1:   3%|▎         | 7/211 [00:36<17:37,  5.18s/it, accuracy=20.5, loss=0.251][A[A[A


Training epoch 1:   3%|▎         | 7/211 [00:41<20:07,  5.92s/it, accuracy=21.4, loss=0.249][A[A[A


Training epoch 1:   4%|▍         | 8/211 [00:41<17:31,  5.18s/it, accuracy=21.4, loss=0.249][A[A[A


Training epoch 1:   4%|▍         | 8/211 [00:46<19:43,  5.83s/it, accuracy=22.3, loss=0.249][A[A[A


Training epoch 1:   4%|▍         | 9/211 [00:46<17:26,  5.18s/it, accuracy=22.3, loss=0.249][A[A[A


Training epoch 1:   4%|▍         | 9/211 [00:51<19:22,  5.76s/it, accuracy=22.6, loss=0.248][A[A[A


Training epoch 1:   5%|▍         | 10/211 [00:51<17:21,  5.18s/it

Training epoch 1:  39%|███▉      | 83/211 [07:14<11:10,  5.24s/it, accuracy=23.2, loss=0.245][A[A[A


Training epoch 1:  40%|███▉      | 84/211 [07:14<10:57,  5.18s/it, accuracy=23.2, loss=0.245][A[A[A


Training epoch 1:  40%|███▉      | 84/211 [07:20<11:05,  5.24s/it, accuracy=23.2, loss=0.245][A[A[A


Training epoch 1:  40%|████      | 85/211 [07:20<10:52,  5.18s/it, accuracy=23.2, loss=0.245][A[A[A


Training epoch 1:  40%|████      | 85/211 [07:25<10:59,  5.24s/it, accuracy=23.3, loss=0.245][A[A[A


Training epoch 1:  41%|████      | 86/211 [07:25<10:47,  5.18s/it, accuracy=23.3, loss=0.245][A[A[A


Training epoch 1:  41%|████      | 86/211 [07:30<10:54,  5.24s/it, accuracy=23.3, loss=0.245][A[A[A


Training epoch 1:  41%|████      | 87/211 [07:30<10:41,  5.18s/it, accuracy=23.3, loss=0.245][A[A[A


Training epoch 1:  41%|████      | 87/211 [07:35<10:49,  5.24s/it, accuracy=23.3, loss=0.245][A[A[A


Training epoch 1:  42%|████▏     | 88/211 [07:35<10:36,

Training epoch 1:  76%|███████▌  | 160/211 [13:53<04:25,  5.21s/it, accuracy=23.7, loss=0.243][A[A[A


Training epoch 1:  76%|███████▋  | 161/211 [13:53<04:18,  5.18s/it, accuracy=23.7, loss=0.243][A[A[A


Training epoch 1:  76%|███████▋  | 161/211 [13:58<04:20,  5.21s/it, accuracy=23.7, loss=0.243][A[A[A


Training epoch 1:  77%|███████▋  | 162/211 [13:58<04:13,  5.18s/it, accuracy=23.7, loss=0.243][A[A[A


Training epoch 1:  77%|███████▋  | 162/211 [14:03<04:15,  5.21s/it, accuracy=23.7, loss=0.243][A[A[A


Training epoch 1:  77%|███████▋  | 163/211 [14:03<04:08,  5.18s/it, accuracy=23.7, loss=0.243][A[A[A


Training epoch 1:  77%|███████▋  | 163/211 [14:08<04:09,  5.21s/it, accuracy=23.7, loss=0.243][A[A[A


Training epoch 1:  78%|███████▊  | 164/211 [14:08<04:03,  5.18s/it, accuracy=23.7, loss=0.243][A[A[A


Training epoch 1:  78%|███████▊  | 164/211 [14:14<04:04,  5.21s/it, accuracy=23.7, loss=0.243][A[A[A


Training epoch 1:  78%|███████▊  | 165/211 [14

In [14]:
class MovieGenreClassifier(nn.Module):
    def __init__(self, nlabel):
        super(MovieGenreClassifier, self).__init__()
        self.main = nn.Sequential(
            nn.Linear(64 * 64 + 64 * 64 + 128 * 128 + 128 * 128 + 256 * 256, 64),
            nn.ReLU(),
            nn.Linear(64, nlabel),
        )

    def forward(self, input):
        return self.main(input)

def extractStyleFeature(image):
    features = []
    for network in networks:
        features.append(network(image))
    for i in range(len(features)):
        features[i] = gram(features[i]).view(1, -1)
    
    return torch.cat((features[0], features[1], features[2], features[3], features[4]), 1)
    
def extractLabel(image, id2genre, genresTable):
    genres = id2genre[image]
    genres = genres.split('|')
    labelVec = torch.zeros(1, 23)
    for genre in genres:
        if genre in genresTable:
            labelVec[0][genresTable[genre]] = 1
    
    return labelVec



In [68]:
print classifier

MovieGenreClassifier (
  (main): Sequential (
    (0): Linear (106496 -> 64)
    (1): ReLU ()
    (2): Linear (64 -> 23)
  )
)


In [80]:
classifier.eval()

testImg = io.imread("/home/ubuntu/notebooks/dataset/test/65481.jpg")

image = Image.fromarray(testImg)

image = Variable(loader(image))

image = image.cuda()
# fake batch dimension required to fit network's input dimensions
image = image.unsqueeze(0)        


csvfile = open('/home/ubuntu/notebooks/Movie-Genre-Classification-from-Movie-Poster/Dataset/NewMovieGenre.csv', 'rb')

reader = csv.reader(csvfile)

id2genre = {}

for row in reader:
    if row[0] != "":
        id2genre[row[0] + ".jpg"] = row[4]
        
csvfile = open('/home/ubuntu/notebooks/Movie-Genre-Classification-from-Movie-Poster/Dataset/NewMovieGenre.csv', 'rb')

reader = csv.reader(csvfile)

genres = {}
for row in reader:
    genre = row[4].split('|')
    for ele in genre:
        if ele != '':
            genres[ele] = genres.get(ele, 0) + 1

for ele in list(genres):
    if (genres[ele] < 100):
        del genres[ele]        

genresTable = {}

count = 0
for ele in list(genres):
    genresTable[ele] = count
    count += 1


feature = extractStyleFeature(image)
outputs = classifier(feature)
outlabels = (outputs.data.exp() / (outputs.data.exp() + 1)).round()
labels = extractLabel("65481.jpg", id2genre, genresTable).cuda()
    
    
print outputs
print outlabels
print labels

Variable containing:

Columns 0 to 9 
-4.1053 -2.1444 -1.4137 -4.3031 -3.2465  0.0336 -2.6041 -3.9113 -3.8751 -1.6949

Columns 10 to 19 
-3.2103 -2.7808 -3.1974 -3.9793  0.1150 -2.5837 -1.7629 -3.4365 -3.2501 -2.8773

Columns 20 to 22 
-3.0871 -3.2096 -2.4093
[torch.cuda.FloatTensor of size 1x23 (GPU 0)]



Columns 0 to 12 
    0     0     0     0     0     1     0     0     0     0     0     0     0

Columns 13 to 22 
    0     1     0     0     0     0     0     0     0     0
[torch.cuda.FloatTensor of size 1x23 (GPU 0)]



Columns 0 to 12 
    0     1     0     0     0     0     0     0     0     0     0     0     0

Columns 13 to 22 
    0     1     0     0     0     0     0     0     0     0
[torch.cuda.FloatTensor of size 1x23 (GPU 0)]

