In [33]:
import os
import time
import copy
import json
import torch
import numpy as np
import torchvision
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

from PIL import Image
from tqdm import tqdm
from sklearn.metrics import f1_score
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split
from torchvision import datasets, models, transforms
from sklearn.preprocessing import MultiLabelBinarizer

In [4]:
path = '../input/48k-imdb-movies-data/Data'
images, names, genres, descriptions = [], [], [], []
all_directories = os.listdir(path)
for directory in all_directories:
    directories = os.listdir(os.path.join(path, directory))
    for dir_ in directories:
        file_path = os.path.join(path, directory, dir_, dir_ + '.json')
        with open(file_path) as file:
            movie = json.load(file)
            try:
                description = movie['description']
                genre = movie['genre']
                descriptions.append(description)
                images.append(dir_)
                names.append(movie['name'])
                genres.append(genre)
            except KeyError:
                continue

In [5]:
path = '../input/48k-imdb-movies-with-posters/Poster'
image_paths = []
all_directories = os.listdir(path)
for directory in all_directories:
    directories = os.listdir(os.path.join(path, directory))
    for dir_ in directories:
        file_path = os.path.join(path, directory, dir_, dir_ + '.jpg')
        image_paths.append(file_path)

In [6]:
image_paths = [path for path in image_paths if path != '../input/48k-imdb-movies-with-posters/Poster/2015/tt3317562/tt3317562.jpg']
image_paths = pd.Series(image_paths)

In [7]:
dict_ = dict(zip(image_paths.apply(lambda x: x.split('/')[5]), image_paths))

In [12]:
data = pd.DataFrame({'title': names,
                  'genres': genres,
                  'description': descriptions,
                  'image': images})
data.image = data.image.map(dict_)
data.dropna(inplace=True)
data = data.sample(frac=1)
data.reset_index(drop=True, inplace=True)
data = data.explode('genres')
data = data.groupby(['title', 'description', 'image']).agg({'genres': lambda x: x.tolist()}).reset_index()

In [13]:
mlb = MultiLabelBinarizer(sparse_output=True)
data = data.join(pd.DataFrame.sparse.from_spmatrix(
                mlb.fit_transform(data['genres']),
                index=data.index,
                columns=mlb.classes_)[targets])

In [14]:
data.drop(['title', 'description', 'genres'], axis=1, inplace=True)

In [10]:
targets = ['Action', 'Crime', 'Adventure', 'Thriller', 'Drama', 'Family',
           'Sport', 'Mystery', 'Western', 'History', 'Sci-Fi', 'Animation',
           'Documentary', 'Music', 'War', 'Biography', 'Musical', 'Superhero',
           'Horror', 'Short', 'Comedy', 'Fantasy', 'Romance', 'Film-Noir']

In [15]:
n = int(len(data) * 0.7)
train = data[:n]
test = data[n:]

In [17]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [24]:
num_classes = 24
batch_size = 16
input_size = 224
num_epochs = 15
feature_extract = True

In [19]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

In [20]:
def initialize_model(num_classes, feature_extract, use_pretrained=True):
    model_ft = models.resnet50(pretrained=use_pretrained)
    set_parameter_requires_grad(model_ft, feature_extract)
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Linear(num_ftrs, num_classes)
    input_size = 224
    return model_ft, input_size

In [21]:
class PostersDataset(Dataset):
    def __init__(self, paths, transform):
        self.images = paths.image.to_numpy()
        self.labels = paths[targets].to_numpy()
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = self.images[idx]
        image = Image.open(img_name)
        if image.mode != 'RGB':
            image = image.convert('RGB')
        label = self.labels[idx]
        return (self.transform(image), torch.from_numpy(label))

In [25]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.5), (0.5))
    ]),
    'test': transforms.Compose([
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize((0.5), (0.5))
    ]),
}

In [27]:
def my_collate_fn(data):
    return tuple(data)

In [28]:
train_dl = torch.utils.data.DataLoader(PostersDataset(train,
                                                      data_transforms['train']),
                                       batch_size=batch_size,
                                       shuffle=True,
                                       num_workers=2,
                                       collate_fn=my_collate_fn)
test_dl = torch.utils.data.DataLoader(PostersDataset(test,
                                                     data_transforms['test']),
                                      batch_size=batch_size,
                                      shuffle=True,
                                      num_workers=2,
                                      collate_fn=my_collate_fn)

In [34]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25):
    since = time.time()

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            f1_scores = []

            for data in tqdm(dataloaders[phase], total=len(dataloaders[phase])):
                inputs = torch.stack(tuple(t[0] for t in data)).to(device)
                labels = torch.stack(tuple(t[1] for t in data)).float().to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase=='train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                threshold = 0.5
                running_loss += loss.item() * inputs.size(0)
                f1_scores.append(f1_score(outputs.detach().cpu().numpy() > threshold,
                                          labels.detach().cpu().numpy(),
                                          average='micro'))

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, np.mean(f1_scores)))

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    return model

In [None]:
model_ft, input_size = initialize_model(num_classes, feature_extract, use_pretrained=True)
model_ft = model_ft.to(device)

params_to_update = model_ft.parameters()

print("Params to learn:")
if feature_extract:
    params_to_update = []
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            print("\t",name)
else:
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            print("\t",name)

criterion = nn.BCEWithLogitsLoss()
optimizer_ft = optim.AdamW(params_to_update, lr=0.01)
model_ft = train_model(model_ft,
                       {'train': train_dl, 'val': test_dl},
                       criterion,
                       optimizer_ft,
                       num_epochs=num_epochs)