In [8]:
# load results
import numpy as np

resnet50_predictions = np.load("resnet50_predictions.npz")["arr_0"]
resnet50_predictions

array([[1.36235785e-02, 3.86725925e-02, 2.27730751e-01, ...,
        3.41898296e-03, 6.84319704e-04, 1.10790820e-03],
       [3.36953532e-03, 3.53256636e-03, 3.32771924e-05, ...,
        1.20849200e-01, 7.84340024e-04, 1.74556990e-04],
       [1.58269983e-02, 1.64419755e-01, 2.51321226e-01, ...,
        1.81481452e-03, 2.22116680e-04, 7.49232058e-05],
       ...,
       [8.61908402e-03, 1.65966637e-02, 1.53604103e-03, ...,
        2.48016603e-02, 8.57372710e-04, 2.44405412e-04],
       [2.48402860e-02, 1.86409745e-02, 4.12693829e-04, ...,
        3.87707800e-02, 1.65469036e-01, 1.59916766e-02],
       [1.52784004e-03, 1.10406789e-03, 1.11099752e-03, ...,
        4.59945342e-03, 6.80508616e-04, 3.68338136e-04]], dtype=float32)

In [9]:
import pandas as pd


from dataset import PosterDataset

df = pd.read_csv(PosterDataset.csv_file)
true_values = df.iloc[:, -20:-1].values
genres = df.columns[-20:-1]
genres = list(map(lambda x: x[1:], genres))
single_genres = df.genre_count == 1

In [10]:
from sklearn.model_selection import train_test_split

train_indices, test_indices = train_test_split(
    np.arange(len(df)), test_size=0.2, random_state=42
)
test_mask = np.isin(np.arange(len(df)), test_indices)
single_genres &= test_mask

In [15]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, roc_auc_score

# calculate accuracy
single_prediction = (
    resnet50_predictions[single_genres]
    == resnet50_predictions.max(axis=1)[single_genres][:, None]
)
roc_auc_score(true_values[single_genres], single_prediction, average="macro"), f1_score(
    true_values[single_genres], single_prediction, average="macro"
), precision_score(
    true_values[single_genres], single_prediction, average="macro"
), accuracy_score(
    true_values[single_genres], single_prediction
),

(0.6263585338537645,
 0.299289987074939,
 0.33580036859436097,
 0.5455732366674859)

### 3 Single Label


In [12]:
three_columns = ["Action", "Romance", "Thriller"]
three_genres = np.isin(genres, three_columns)
three_genres_index_mask = (
    true_values[:, three_genres].sum(axis=1) >= 1
) & single_genres
three_genres_index_mask.sum()

3275

In [16]:
from sklearn.metrics import accuracy_score, f1_score

# calculate accuracy
three_single_prediction = resnet50_predictions[three_genres_index_mask,][
    :, three_genres
]
three_single_prediction_classes = (
    three_single_prediction == three_single_prediction.max(axis=1)[:, None]
)

roc_auc_score(
    true_values[three_genres_index_mask,][:, three_genres],
    three_single_prediction_classes,
    average="macro",
), f1_score(
    true_values[three_genres_index_mask,][:, three_genres],
    three_single_prediction_classes,
    average="macro",
), precision_score(
    true_values[three_genres_index_mask,][:, three_genres],
    three_single_prediction_classes,
    average="macro",
), accuracy_score(
    three_single_prediction_classes,
    true_values[three_genres_index_mask,][:, three_genres],
)

(0.7747479441120696,
 0.6966546602417862,
 0.7015623338455478,
 0.6964885496183206)

In [14]:
three_single_prediction_classes.sum(axis=0) / three_single_prediction.shape[0]

array([0.28305344, 0.38320611, 0.33374046])