In [8]:
# load results
import numpy as np

resnet18_predictions = np.load("resnet18_predictions.npz")["arr_0"]
resnet18_predictions

array([[5.9096091e-03, 1.8780015e-02, 1.3236778e-02, ..., 4.5237308e-03,
        1.4008738e-03, 2.3258946e-04],
       [2.4652281e-03, 1.8975829e-03, 1.5274537e-03, ..., 1.7141350e-02,
        2.9268896e-04, 8.4046675e-05],
       [2.8993608e-02, 7.7607580e-02, 4.1559402e-02, ..., 5.5505987e-03,
        5.9728534e-04, 9.5455231e-05],
       ...,
       [2.5258152e-02, 1.6007029e-02, 2.2484516e-03, ..., 2.0169454e-02,
        4.5092553e-03, 2.7794398e-05],
       [7.1792738e-03, 1.3719053e-02, 3.4775727e-03, ..., 1.8464502e-02,
        1.1106500e-02, 5.5939052e-03],
       [4.4155668e-04, 6.5601862e-04, 1.6811199e-04, ..., 7.2238903e-04,
        6.0856150e-04, 6.6429155e-04]], dtype=float32)

In [9]:
import pandas as pd


from dataset import PosterDataset

df = pd.read_csv(PosterDataset.csv_file)
true_values = df.iloc[:, -20:-1].values
genres = df.columns[-20:-1]
genres = list(map(lambda x: x[1:], genres))
single_genres = df.genre_count == 1

In [10]:
from sklearn.model_selection import train_test_split

train_indices, test_indices = train_test_split(
    np.arange(len(df)), test_size=0.2, random_state=42
)
test_mask = np.isin(np.arange(len(df)), test_indices)
single_genres &= test_mask

In [15]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, roc_auc_score

# calculate accuracy
single_prediction = (
    resnet18_predictions[single_genres]
    == resnet18_predictions.max(axis=1)[single_genres][:, None]
)

roc_auc_score(true_values[single_genres], single_prediction, average="macro"), f1_score(
    true_values[single_genres], single_prediction, average="macro"
), precision_score(
    true_values[single_genres], single_prediction, average="macro"
), accuracy_score(
    true_values[single_genres], single_prediction
),

(0.6162750963581851,
 0.2814491567500852,
 0.34001448930279726,
 0.5364032931924305)

### 3 Single Label


In [12]:
three_columns = ["Action", "Romance", "Thriller"]
three_genres = np.isin(genres, three_columns)
three_genres_index_mask = (
    true_values[:, three_genres].sum(axis=1) >= 1
) & single_genres
three_genres_index_mask.sum()

3275

In [16]:
from sklearn.metrics import accuracy_score, f1_score

# calculate accuracy
three_single_prediction = resnet18_predictions[three_genres_index_mask,][
    :, three_genres
]
three_single_prediction_classes = (
    three_single_prediction == three_single_prediction.max(axis=1)[:, None]
)

roc_auc_score(
    true_values[three_genres_index_mask,][:, three_genres],
    three_single_prediction_classes,
    average="macro",
), f1_score(
    true_values[three_genres_index_mask,][:, three_genres],
    three_single_prediction_classes,
    average="macro",
), precision_score(
    true_values[three_genres_index_mask,][:, three_genres],
    three_single_prediction_classes,
    average="macro",
), accuracy_score(
    three_single_prediction_classes,
    true_values[three_genres_index_mask,][:, three_genres],
)

(0.7742326413464683,
 0.6976424788789392,
 0.6971367086670432,
 0.6970992366412214)

In [14]:
three_single_prediction_classes.sum(axis=0) / three_single_prediction.shape[0]

array([0.33374046, 0.33526718, 0.33099237])