In [1]:
import torch
torch.cuda.current_device()
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pytorch_lightning as pl
import pandas as pd
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

from PIL import Image
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

import re
from nltk.tokenize import wordpunct_tokenize

# Data

In [41]:
data_folder = '../dataset/'
with open(data_folder + 'genres.txt', 'r') as f:
    genres_list = [g.replace('\n','') for g in f.readlines()]
is_drop = '_dropped'
# is_drop = ''
movies_train = pd.read_csv(data_folder + 'movies_train' + is_drop + '.csv')
movies_test = pd.read_csv(data_folder + 'movies_test' + is_drop + '.csv')
movies_val = pd.read_csv(data_folder + 'movies_val' + is_drop + '.csv')

## Title

In [3]:
TITLE_MAX_LEN = 15
pad_token = '<PAD>'
unk_token = '<UNK>'

def tokenize(text):
    text = re.sub(r'[^\w\s]', '', text)
    text = text.lower()
    tokens = wordpunct_tokenize(text)
    return tokens

def create_vocab(dataset, column='title'):
    df = dataset.copy()
    titles = df[column].tolist()
    vocab = set()
    for title in titles:
        tokens = tokenize(title)
        vocab.update(tokens)
    vocab = list(vocab)
    vocab.append(pad_token)
    vocab.append(unk_token)
    return vocab

def onehot_vectorize(title, title2int):
    tokens = tokenize(title)
    tokens = tokens[:TITLE_MAX_LEN]
    while len(tokens) < TITLE_MAX_LEN:
        tokens.append(pad_token)
    title_vec = np.zeros((TITLE_MAX_LEN,len(title2int)), dtype=np.float32)
    for i, token in enumerate(tokens):
        if token in title2int:
            title_vec[i][title2int[token]] = 1
        else:
            title_vec[i][title2int[unk_token]] = 1
    return title_vec
    
def multihot_genres(genres,  genres_dict):
    genres = genres.strip('][').replace("'", "").split(', ')
    multi_hot = np.zeros(len(genres_dict))
    for genre in genres:
        if genre in genres_dict:
                multi_hot[genres_dict[genre]] = 1
    return multi_hot

## Poster

In [4]:
# Train the model with different image sizes
IMAGE_SIZE={"16": (16, 16),
            "24": (24, 24),
            "32": (32, 32),
            "40": (40, 40),
            "64": (64, 64),
            "72": (72, 72),
            "128": (128, 128),
            "224": (224, 224),
            "256": (256, 256)}
def getImgTransformer(size = '224'):
    return transforms.Compose([
            transforms.Resize(size=IMAGE_SIZE[size]),
            # transforms.CenterCrop(size=(224,224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                std=[0.229, 0.224, 0.225])
        ])

## Dataset

In [5]:
class titleDataset(Dataset):
    def __init__(self, df, posterTransformer):
        self.df = df

        # title process
        vocab = create_vocab(df, column='title')
        self.vocab_size = len(vocab)
        self.title2int = {word: i for i, word in enumerate(vocab)}

        # image process
        self.transformer = posterTransformer

        # genres process
        genres_list = ['Crime', 'Thriller', 'Fantasy', 'Horror', 'Sci-Fi', 'Comedy', 'Documentary', 'Adventure', 'Film-Noir', 'Animation', 'Romance', 'Drama', 'Western', 'Musical', 'Action', 'Mystery', 'War', "Children's"]
        self.genre2int = {genre: i for i, genre in enumerate(genres_list)} 

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        title = self.df.iloc[idx]['title']
        title_vec = onehot_vectorize(title, self.title2int)
        genres = self.df.iloc[idx]['genre']
        genres_vec = multihot_genres(genres, self.genre2int)
        img_path = self.df.iloc[idx]['img_path']
        if not os.path.exists(img_path):
            img_path = '../dataset/images/0.jpg'
        img = Image.open(img_path)
        if len(img.getbands()) == 1: # check if the image have only one channel
            trans = transforms.Grayscale(num_output_channels=3)
            img = trans(img) # convert image to a three-channel image
        img = self.transformer(img)
        return title_vec, img, genres_vec
    
    def merge_vocab(self, other):
        self.title2int.update(other.title2int)
        self.genre2int.update(other.genre2int)
        self.vocab_size = len(self.title2int)

In [42]:
NUM_WORKERS = os.cpu_count()
BATCH_SIZE = 32
posterTransformer = getImgTransformer(size='256')
train = titleDataset(movies_train, posterTransformer)
val = titleDataset(movies_val, posterTransformer)
test = titleDataset(movies_test, posterTransformer)
train.merge_vocab(val)
train.merge_vocab(test)
val.merge_vocab(train)
test.merge_vocab(train)
train_dataloader = DataLoader(train, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
val_dataloader = DataLoader(val, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
test_dataloader = DataLoader(test, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)

# Model

In [44]:
import models

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
print(torch.cuda.is_available())
print(train_dataloader.dataset.vocab_size)

titleParam = (train_dataloader.dataset.vocab_size, 128, 2, True, 18)
posterParam = (3,18)
model = models.theModel('LSTM', titleParam, 'DenseNet169', posterParam, 18, device=device)
model.to(device)

cuda:0
True
3542
LSTM 3542 128 2 True 18


theModel(
  (titleModel): LSTM(
    (core): LSTM(3542, 128, num_layers=2, bidirectional=True)
    (linear): Sequential(
      (0): Linear(in_features=256, out_features=64, bias=True)
      (1): ReLU()
      (2): Linear(in_features=64, out_features=32, bias=True)
      (3): ReLU()
      (4): Linear(in_features=32, out_features=18, bias=True)
    )
  )
  (posterModel): DenseNet169Model(
    (model): DenseNet(
      (features): Sequential(
        (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu0): ReLU(inplace=True)
        (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
        (denseblock1): _DenseBlock(
          (denselayer1): _DenseLayer(
            (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (relu1): ReLU(inplace=True)
            (conv1): Conv

# Train

In [45]:
NUM_EPOCHS = 20
trainer = pl.Trainer(max_epochs=NUM_EPOCHS)


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [None]:
trainer.fit(model, train_dataloader, val_dataloader)

# Infer and Metric

In [46]:
from metrics import normalize, P_at_K, AP_at_K, MAP_at_K

In [48]:
# trained_path = trainer.checkpoint_callback.best_model_path
trained_path = '../model/saved_models/denseNet169_256_drop.ckpt'
res = trainer.predict(model, dataloaders=test_dataloader, ckpt_path=trained_path)

pred = torch.cat([ep[0] for ep in res])
truth = torch.cat([ep[1] for ep in res])

pred_1 = normalize(pred, topk=True)
print(pred.shape)
print(pred_1.shape)
print(truth.shape)

Restoring states from the checkpoint path at ../model/saved_models/denseNet169_256_drop.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at ../model/saved_models/denseNet169_256_drop.ckpt


Predicting DataLoader 0: 100%|██████████| 21/21 [00:06<00:00,  3.20it/s]
torch.Size([654, 18])
torch.Size([654, 18])
torch.Size([654, 18])


In [49]:
print("--------------------------------------")
print('MAP@1 ', MAP_at_K(1, pred, truth))
print('MAP@2 ', MAP_at_K(2, pred, truth))
print('MAP@3 ', MAP_at_K(3, pred, truth))
print('MAP@4 ', MAP_at_K(4, pred, truth))

print("--------------------------------------")
from torchmetrics.functional.classification import multilabel_f1_score
from torchmetrics.classification import MultilabelF1Score


# f1arr = []
# for thres in range(1, 20):
#     thres /= 20
#     f1ma = MultilabelF1Score(num_labels=18, threshold=thres, average='macro')
#     # f1arr.append((thres,f1ma(pred, truth).tolist()))
#     print(thres, ' : ', f1ma(pred, truth).tolist())

thres=0.8
# print('mF1 - micro:    ', multilabel_f1_score(pred, truth, num_labels=18, threshold=thres, average='micro').tolist())
# print('mF1 - macro:    ', multilabel_f1_score(pred, truth, num_labels=18, threshold=thres, average='macro').tolist())
# print('mF1 - weighted: ', multilabel_f1_score(pred, truth, num_labels=18, threshold=thres, average='weighted').tolist())

f1ma = MultilabelF1Score(num_labels=18, threshold=thres, average='macro')
print('F1 macro :', f1ma(pred, truth).tolist())
f1mi = MultilabelF1Score(num_labels=18, threshold=thres, average='micro')
print('F1 micro :', f1mi(pred, truth).tolist())

print("--------------------------------------")
# from torchmetrics.functional.classification import multilabel_accuracy
from torchmetrics.classification import MultilabelAccuracy
acc = MultilabelAccuracy(num_labels=18, threshold=thres)
print('Accuracy :', acc(pred, truth).tolist())

print("--------------------------------------")
# from torchmetrics.functional.classification import multilabel_precision
from torchmetrics.classification import MultilabelPrecision
prec = MultilabelPrecision(num_labels=18, threshold=thres, average='macro')
print('Precision :', prec(pred, truth).tolist())

print("--------------------------------------")
# from torchmetrics.functional.classification import multilabel_recall
from torchmetrics.classification import MultilabelRecall
rec = MultilabelRecall(num_labels=18, threshold=thres, average='macro')
print('Recall :', rec(pred, truth).tolist())


--------------------------------------
MAP@1  0.4877675840978593
MAP@2  0.4308103975535168
MAP@3  0.3903329935440032
MAP@4  0.3588812436289512
--------------------------------------
F1 macro : 0.19717538356781006
F1 micro : 0.4106113016605377
--------------------------------------
Accuracy : 0.8697757124900818
--------------------------------------
Precision : 0.16523084044456482
--------------------------------------
Recall : 0.2966862916946411


In [40]:
#Self define accuracy:
for num in range(1,6):
    acc = 0
    for i in range(len(pred)):
        acc += P_at_K(num, pred[i], truth[i])
    print('P@{} :'.format(num), acc/len(pred))

P@1 : 0.45045045045045046
P@2 : 0.379021879021879
P@3 : 0.3110253110253123
P@4 : 0.25804375804375806
P@5 : 0.22754182754182742
