In [None]:
from transformers import ConvNextFeatureExtractor, ConvNextForImageClassification
import pandas as pd

In [None]:
CATEGORIES = ["action", "adventure", "animation", "biography", "comedy", "crime", "documentary", "drama", "family",
              "fantasy", "film_noir", "history", "horror", "music", "musical", "mystery", "romance", "sci_fi",
              "short", "sport", "superhero", "thriller", "war", "western"]

In [None]:
df = pd.read_csv('data/preprocessed.csv')
df.head(5)

In [None]:
model_name_or_path = "facebook/convnext-tiny-224"

In [None]:
feature_extractor = ConvNextFeatureExtractor.from_pretrained(model_name_or_path)

In [None]:
import torch
import torchvision.transforms as transforms
import os
import pandas as pd
from skimage import io
from torch.utils.data import (
    Dataset,
    DataLoader
)

class FilmFeatureExtractor(object):
    def __call__(self, image, targets):
        sample = feature_extractor(image, return_tensors='pt')
        sample["labels"] = targets
        
        return sample

class FilmDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.annotations = pd.read_csv(csv_file)
        self.shape = self.annotations.shape
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        img_path = os.path.join(self.root_dir, self.annotations.iloc[index]['poster_path'])
        image = io.imread(img_path)
        labels = torch.tensor(self.annotations.iloc[index][CATEGORIES])
        data = self.transform(image, labels)
        
        return data

df = FilmDataset(
    csv_file="data/preprocessed.csv",
    root_dir="",
    transform=FilmFeatureExtractor(),
)


In [None]:
df.__getitem__(0)

## Train test split

In [None]:
train_size = int(0.4 * df.shape[0])
eval_size = int(0.4 * df.shape[0])
test_size = df.shape[0] - train_size - eval_size

print(df.shape[0], train_size, eval_size, test_size)

In [None]:
df_train, df_eval, df_test = torch.utils.data.random_split(df, [train_size, eval_size, test_size])

## Model

In [None]:
model = ConvNextForImageClassification.from_pretrained(
    model_name_or_path,
    num_labels=len(CATEGORIES),
    ignore_mismatched_sizes=True
)

In [None]:
from transformers import TrainingArguments
from transformers import EarlyStoppingCallback

training_args = TrainingArguments(
    output_dir="./data/img", 
    evaluation_strategy="steps"
)

In [None]:
import numpy as np
from datasets import load_metric

metric = load_metric("accuracy")

In [None]:
def compute_metrics(eval_pred):
    print(eval_pred)
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [None]:
import torch

def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'][0] for x in batch]),
        'labels': torch.stack([x['labels'][0] for x in batch])
    }

In [None]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=df_train,
    eval_dataset=df_eval,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
)

In [None]:
trainer.train()