# Validation

## Image model

In [31]:
from src.models.movie_genre_module import MovieGenreModule
from src.data.components.movie_genre_dataset import MovieGenreDataset
from torch.utils.data import DataLoader
import torch
from torchmetrics.classification import MultilabelF1Score
import numpy as np
from sklearn.metrics import classification_report
import pandas as pd

# config
ckpt_path = "logs/train/runs/2023-12-24_11-26-55/checkpoints/epoch_021.ckpt"
threshold_decode = 0.3
device = 'cuda'
num_classes = 18
genre_file = 'data/ml1m/content/dataset/genres.txt'
excel_path =  "data/report/experiment_2_renet50_datav2.xlsx" # chang this
results_path = "data/inference_results/renet50_datav2.npz" # chang this
save_golder_label = None

# excel_path =  None
# results_path = None
# save_golder_label = None


# helper functions
def decode_label(output_probability):
    output_label = []
    for each_output in output_probability:
        output_label.append([0 if x < threshold_decode else 1 for x in each_output])
    return output_label

# components
model = MovieGenreModule.load_from_checkpoint(ckpt_path)
test_set = MovieGenreDataset(
    set='ensemble_test',
    data_file='data/ml1m/content/dataset/movies_test.dat',
    folder_img_path='data/ml1m/content/dataset/ml1m-images',
    genre_file=genre_file
)
test_loader = DataLoader(
    dataset=test_set,
    batch_size=64,
    num_workers=32,
    pin_memory=False,
    shuffle=False,
)

# compute
model.eval()
model.to(device)

golden_label = []
predict_label = []
predict_probs = []

f1 = MultilabelF1Score(num_labels=num_classes, threshold=threshold_decode, average='macro')
f1 = f1.to(device)
f1_all = 0

with torch.no_grad():
    for img_tensor, genre_tensor in test_loader:
        img_tensor = img_tensor.to(device)
        genre_tensor = genre_tensor.to(device)
        
        probs = model(img_tensor)
        preds = torch.tensor(decode_label(probs), device="cuda")
        
        predict_probs.extend(probs.cpu().numpy())
        predict_label.extend(preds.cpu().numpy())
        golden_label.extend(genre_tensor.cpu().numpy())
        
        f1_batch = f1(probs, genre_tensor)
        f1_all += f1_batch

print('F1 (from baseline notebooks): ', f1_all/len(test_loader))

# report
predict_probs = np.array(predict_probs)
predict_label = np.array(predict_label)
golden_label = np.array(golden_label)

with open(genre_file, 'r') as f:
    genre_all = f.readlines()
genre_all = [x.replace('\n', '') for x in genre_all]
print("\n Classification reports")
print(classification_report(golden_label, predict_label, target_names=genre_all))

# export
if excel_path is not None:
    report = classification_report(golden_label, predict_label, target_names=genre_all, output_dict=True)
    df = pd.DataFrame(report)
    df = df.transpose()
    df.to_excel(excel_path)

if results_path is not None:
    with open(results_path, 'wb') as f:
        np.save(f, predict_probs)

if save_golder_label is not None:
    with open(save_golder_label, 'wb') as f:
        np.save(f, golden_label)

/work/hpc/pad/PAD2003-multi-label-movie-genres/envs/lib/python3.8/site-packages/lightning/pytorch/utilities/parsing.py:198: Attribute 'net' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['net'])`.


F1 (from baseline notebooks):  tensor(0.1966, device='cuda:0')

 Classification reports
              precision    recall  f1-score   support

       Crime       0.15      0.10      0.12        31
    Thriller       0.30      0.32      0.31       106
     Fantasy       0.07      0.14      0.10         7
      Horror       0.36      0.21      0.27        75
      Sci-Fi       0.36      0.33      0.34        48
      Comedy       0.74      0.30      0.43       247
 Documentary       0.09      0.40      0.15        30
   Adventure       0.00      0.00      0.00        48
   Film-Noir       0.11      0.17      0.13         6
   Animation       0.32      0.67      0.43        21
     Romance       0.29      0.13      0.18        94
       Drama       0.65      0.26      0.37       309
     Western       0.25      0.29      0.27        14
     Musical       0.20      0.38      0.26        13
      Action       0.34      0.30      0.32        90
     Mystery       0.00      0.00      0.00    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Hybrid model

In [1]:
from src.models.hybrid_movie_genre_module import HybridMovieGenreModule
from src.data.components.hybrid_movie_genre_dataset import HybridMovieGenreDataset
from torch.utils.data import DataLoader 
import torch
from torchmetrics.classification import MultilabelF1Score
import numpy as np
from sklearn.metrics import classification_report
import pandas as pd

# config
ckpt_path = "logs/train/runs/2023-12-26_02-14-54/checkpoints/epoch_029.ckpt"
threshold_decode = 0.3
device = 'cuda'
num_classes = 18
genre_file = 'data/ml1m/content/dataset/genres.txt'
save_golder_label = None

# excel_path =  "data/report/experiment_4_hybrid.xlsx" # chang this
# results_path = "data/inference_results/hybrid.npz" # chang this

excel_path =  None
results_path = None

# helper functions
def decode_label(output_probability):
    output_label = []
    for each_output in output_probability:
        output_label.append([0 if x < threshold_decode else 1 for x in each_output])
    return output_label

# components 
model = HybridMovieGenreModule.load_from_checkpoint(ckpt_path)
test_set = HybridMovieGenreDataset(
    set='ensemble_test',
    data_file='data/ml1m/content/dataset/movies_test.dat',
    folder_img_path='data/ml1m/content/dataset/ml1m-images',
    genre_file=genre_file
)
test_loader = DataLoader(
    dataset=test_set,
    batch_size=64,
    num_workers=32,
    pin_memory=False,
    shuffle=False,
)

# compute
model.eval()
model.to(device)

golden_label = []
predict_label = []
predict_probs = []

f1 = MultilabelF1Score(num_labels=num_classes, threshold=threshold_decode, average='macro')
f1 = f1.to(device)
f1_all = 0

with torch.no_grad():
    for text_input_ids, text_attention_mask, image_input, genre_tensor in test_loader:
        text_input_ids = text_input_ids.to(device)
        text_attention_mask = text_attention_mask.to(device)
        image_input = image_input.to(device)
        genre_tensor = genre_tensor.to(device)
        
        probs = model(text_input_ids, text_attention_mask, image_input)
        preds = torch.tensor(decode_label(probs), device="cuda")
        
        predict_probs.extend(probs.cpu().numpy())
        predict_label.extend(preds.cpu().numpy())
        golden_label.extend(genre_tensor.cpu().numpy())
        
        f1_batch = f1(probs, genre_tensor)
        f1_all += f1_batch

print('F1 (from baseline notebooks): ', f1_all/len(test_loader))

# report
predict_probs = np.array(predict_probs)
predict_label = np.array(predict_label)
golden_label = np.array(golden_label)

with open(genre_file, 'r') as f:
    genre_all = f.readlines()
genre_all = [x.replace('\n', '') for x in genre_all]
print("\n Classification reports")
print(classification_report(golden_label, predict_label, target_names=genre_all))

# export
if excel_path is not None:
    report = classification_report(golden_label, predict_label, target_names=genre_all, output_dict=True)
    df = pd.DataFrame(report)
    df = df.transpose()
    df.to_excel(excel_path)

if results_path is not None:
    with open(results_path, 'wb') as f:
        np.save(f, predict_probs)

if save_golder_label is not None:
    with open(save_golder_label, 'wb') as f:
        np.save(f, golden_label)

  from .autonotebook import tqdm as notebook_tqdm
/work/hpc/pad/PAD2003-multi-label-movie-genres/envs/lib/python3.8/site-packages/lightning/pytorch/utilities/parsing.py:198: Attribute 'net' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['net'])`.


F1 (from baseline notebooks):  tensor(0.2595, device='cuda:0')

 Classification reports
              precision    recall  f1-score   support

       Crime       0.10      0.19      0.13        31
    Thriller       0.36      0.38      0.37       106
     Fantasy       0.00      0.00      0.00         7
      Horror       0.66      0.64      0.65        75
      Sci-Fi       0.53      0.54      0.54        48
      Comedy       0.55      0.53      0.54       247
 Documentary       0.14      0.03      0.05        30
   Adventure       0.29      0.29      0.29        48
   Film-Noir       0.00      0.00      0.00         6
   Animation       0.57      0.19      0.29        21
     Romance       0.29      0.23      0.26        94
       Drama       0.46      0.84      0.59       309
     Western       0.12      0.07      0.09        14
     Musical       0.00      0.00      0.00        13
      Action       0.40      0.44      0.42        90
     Mystery       0.30      0.17      0.21    

  _warn_prf(average, modifier, msg_start, len(result))


## Hybrid v2

In [5]:
from src.models.hybrid_movie_genre_module_v2 import HybridMovieGenreModuleV2
from src.data.components.hybrid_movie_genre_dataset_v2 import HybridMovieGenreDatasetv2
from torch.utils.data import DataLoader 
import torch
from torchmetrics.classification import MultilabelF1Score
import numpy as np
from sklearn.metrics import classification_report
import pandas as pd

# config
ckpt_path = "logs/train/runs/2023-12-26_17-11-33/checkpoints/epoch_029.ckpt"
threshold_decode = 0.3
device = 'cuda'
num_classes = 18
genre_file = 'data/ml1m/content/dataset/genres.txt'
save_golder_label = None

excel_path =  "data/report/experiment_4_hybrid_v2.xlsx" # chang this
results_path = "data/inference_results/hybrid_v2.npz" # chang this

# excel_path =  None
# results_path = None

# helper functions
def decode_label(output_probability):
    output_label = []
    for each_output in output_probability:
        output_label.append([0 if x < threshold_decode else 1 for x in each_output])
    return output_label

# components 
model = HybridMovieGenreModuleV2.load_from_checkpoint(ckpt_path)
test_set = HybridMovieGenreDatasetv2(
    set='ensemble_test',
    data_file='data/ml1m/content/dataset/movies_test.dat',
    user_file='data/ml1m/content/dataset/users.dat',
    rating_file='data/ml1m/content/dataset/ratings.dat',
    folder_img_path='data/ml1m/content/dataset/ml1m-images',
    genre_file=genre_file
)
test_loader = DataLoader(
    dataset=test_set,
    batch_size=64,
    num_workers=32,
    pin_memory=False,
    shuffle=False,
)

# compute
model.eval()
model.to(device)

golden_label = []
predict_label = []
predict_probs = []

f1 = MultilabelF1Score(num_labels=num_classes, threshold=threshold_decode, average='macro')
f1 = f1.to(device)
f1_all = 0

with torch.no_grad():
    for (ratings_general, ratings_age, ratings_occupation), text_input_ids, text_attention_mask, image_input, genre_tensor in test_loader:
        ratings_general = ratings_general.to(device)
        ratings_age = ratings_age.to(device)
        ratings_occupation = ratings_occupation.to(device)
        text_input_ids = text_input_ids.to(device)
        text_attention_mask = text_attention_mask.to(device)
        image_input = image_input.to(device)
        genre_tensor = genre_tensor.to(device)
        
        user_rating = (ratings_general, ratings_age, ratings_occupation)
        probs = model(user_rating, text_input_ids, text_attention_mask, image_input)
        preds = torch.tensor(decode_label(probs), device="cuda")
        
        predict_probs.extend(probs.cpu().numpy())
        predict_label.extend(preds.cpu().numpy())
        golden_label.extend(genre_tensor.cpu().numpy())
        
        f1_batch = f1(probs, genre_tensor)
        f1_all += f1_batch

print('F1 (from baseline notebooks): ', f1_all/len(test_loader))

# report
predict_probs = np.array(predict_probs)
predict_label = np.array(predict_label)
golden_label = np.array(golden_label)

with open(genre_file, 'r') as f:
    genre_all = f.readlines()
genre_all = [x.replace('\n', '') for x in genre_all]
print("\n Classification reports")
print(classification_report(golden_label, predict_label, target_names=genre_all))

# export
if excel_path is not None:
    report = classification_report(golden_label, predict_label, target_names=genre_all, output_dict=True)
    df = pd.DataFrame(report)
    df = df.transpose()
    df.to_excel(excel_path)

if results_path is not None:
    with open(results_path, 'wb') as f:
        np.save(f, predict_probs)

if save_golder_label is not None:
    with open(save_golder_label, 'wb') as f:
        np.save(f, golden_label)

/work/hpc/pad/PAD2003-multi-label-movie-genres/envs/lib/python3.8/site-packages/lightning/pytorch/utilities/parsing.py:198: Attribute 'net' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['net'])`.


F1 (from baseline notebooks):  tensor(0.2840, device='cuda:0')

 Classification reports
              precision    recall  f1-score   support

       Crime       0.10      0.19      0.13        31
    Thriller       0.36      0.55      0.44       106
     Fantasy       0.00      0.00      0.00         7
      Horror       0.66      0.64      0.65        75
      Sci-Fi       0.32      0.65      0.43        48
      Comedy       0.48      0.43      0.45       247
 Documentary       0.17      0.07      0.10        30
   Adventure       0.27      0.25      0.26        48
   Film-Noir       0.04      0.33      0.08         6
   Animation       0.23      0.57      0.33        21
     Romance       0.29      0.30      0.29        94
       Drama       0.55      0.46      0.50       309
     Western       0.15      0.14      0.15        14
     Musical       0.14      0.38      0.20        13
      Action       0.40      0.46      0.42        90
     Mystery       0.09      0.56      0.16    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# Ensemble

In [7]:
import numpy as np
from sklearn.metrics import classification_report
import pandas as pd

def decode_label(output_probability):
    output_label = []
    for each_output in output_probability:
        output_label.append([0 if x < 0.3 else 1 for x in each_output])
    return np.array(output_label)

genre_file = 'data/ml1m/content/dataset/genres.txt'
with open(genre_file, 'r') as f:
    genre_all = f.readlines()
genre_all = [x.replace('\n', '') for x in genre_all]

In [1]:
index_not_images = [
    21, 37, 60, 75, 76, 93, 101, 112494, 117, 124, 125, 129, 139, 141, 153,
    155, 161, 163, 167, 172548, 203, 205270, 213, 222, 234, 238, 240, 249, 254,
    256, 257, 267588, 283, 284, 290, 354479, 360292, 372, 374306, 390, 391710,
    403, 405722467, 408, 451195, 470, 471, 481, 484171, 501, 504, 508, 515, 516,
    522, 5452, 553, 561, 567, 572510, 576, 590650, 586, 596, 612, 633, 643, 661410346,
    678297, 681, 703, 724, 726, 729, 732164477, 743, 745302, 748, 753239, 760, 762, 767,
    772
]
print(len(index_not_images))

83


In [56]:
golden_label_results = "data/inference_results/golden_label.npz"
text_model_results = "data/inference_results/text_model.npz"

vgg19_datav2_results = "data/inference_results/vgg19_datav2.npz"
resnet50_datav2_results = "data/inference_results/renet50_datav2.npz"

oversampling_20ep_results = "data/inference_results/oversampling_20ep.npz"
preprocessing_base_20ep_results = "data/inference_results/preprocessing_base_20ep.npz"

hybrid_results = "data/inference_results/hybrid.npz"
hybrid_v2_results = "data/inference_results/hybrid_v2.npz"

with open(vgg19_datav2_results, "rb") as f:
    vgg19_datav2 = np.load(f)

with open(text_model_results, "rb") as f:
    text_model = np.load(f)
    
with open(resnet50_datav2_results, "rb") as f:
    resnet50_datav2 = np.load(f)

with open(hybrid_results, "rb") as f:
    hybrid = np.load(f)
    
with open(hybrid_v2_results, "rb") as f:
    hybrid_v2 = np.load(f)

with open(oversampling_20ep_results, "rb") as f:
    oversampling_20ep = np.load(f)

with open(preprocessing_base_20ep_results, "rb") as f:
    preprocessing_base_20ep = np.load(f)
    
with open(golden_label_results, "rb") as f:
    golden_label = np.load(f)

## Hard voting

In [63]:
vgg19_datav2_label = decode_label(vgg19_datav2)
resnet50_datav2_label = decode_label(resnet50_datav2)

oversampling_20ep_label = decode_label(oversampling_20ep)
preprocessing_base_20ep_label = decode_label(preprocessing_base_20ep)
text_model_label = decode_label(text_model)

hybrid_label = decode_label(hybrid)
hybrid_v2_label = decode_label(hybrid_v2)

In [79]:
print(classification_report(golden_label, hybrid_v2_label, target_names=genre_all))

              precision    recall  f1-score   support

       Crime       0.10      0.19      0.13        31
    Thriller       0.36      0.55      0.44       106
     Fantasy       0.00      0.00      0.00         7
      Horror       0.66      0.64      0.65        75
      Sci-Fi       0.32      0.65      0.43        48
      Comedy       0.48      0.43      0.45       247
 Documentary       0.17      0.07      0.10        30
   Adventure       0.27      0.25      0.26        48
   Film-Noir       0.04      0.33      0.08         6
   Animation       0.23      0.57      0.33        21
     Romance       0.29      0.30      0.29        94
       Drama       0.55      0.46      0.50       309
     Western       0.15      0.14      0.15        14
     Musical       0.14      0.38      0.20        13
      Action       0.40      0.46      0.42        90
     Mystery       0.09      0.56      0.16        18
         War       0.17      0.32      0.22        25
  Children's       0.57    

  _warn_prf(average, modifier, msg_start, len(result))


In [33]:
def hard_decode_label(output_probability, threshold):
    output_label = []
    for each_output in output_probability:
        output_label.append([0 if x < threshold else 1 for x in each_output])
    return np.array(output_label)

In [48]:
# hard voting 1
print(classification_report(golden_label, hard_decode_label(
    oversampling_20ep_label + preprocessing_base_20ep_label + resnet50_datav2_label + vgg19_datav2_label, 
    2
), target_names=genre_all))

              precision    recall  f1-score   support

       Crime       0.11      0.10      0.10        31
    Thriller       0.47      0.41      0.43       106
     Fantasy       0.00      0.00      0.00         7
      Horror       0.75      0.59      0.66        75
      Sci-Fi       0.71      0.62      0.67        48
      Comedy       0.52      0.66      0.59       247
 Documentary       0.10      0.40      0.16        30
   Adventure       0.45      0.27      0.34        48
   Film-Noir       0.00      0.00      0.00         6
   Animation       0.65      0.71      0.68        21
     Romance       0.31      0.17      0.22        94
       Drama       0.50      0.72      0.59       309
     Western       0.38      0.36      0.37        14
     Musical       0.15      0.15      0.15        13
      Action       0.43      0.50      0.46        90
     Mystery       0.25      0.17      0.20        18
         War       0.29      0.08      0.12        25
  Children's       0.60    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [39]:
# hard voting 2
print(classification_report(golden_label, hard_decode_label(
    oversampling_20ep_label + preprocessing_base_20ep_label + resnet50_datav2_label + vgg19_datav2_label + hybrid_label + hybrid_v2_label, 
    3
), target_names=genre_all))

              precision    recall  f1-score   support

       Crime       0.19      0.10      0.13        31
    Thriller       0.55      0.36      0.43       106
     Fantasy       0.00      0.00      0.00         7
      Horror       0.76      0.55      0.64        75
      Sci-Fi       0.74      0.58      0.65        48
      Comedy       0.56      0.59      0.57       247
 Documentary       0.10      0.40      0.16        30
   Adventure       0.53      0.21      0.30        48
   Film-Noir       0.00      0.00      0.00         6
   Animation       0.71      0.71      0.71        21
     Romance       0.40      0.13      0.19        94
       Drama       0.51      0.71      0.59       309
     Western       0.43      0.21      0.29        14
     Musical       0.18      0.15      0.17        13
      Action       0.51      0.40      0.45        90
     Mystery       0.38      0.17      0.23        18
         War       0.25      0.04      0.07        25
  Children's       0.72    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Soft voting

In [44]:
def soft_decode_label(output_probability, factor):
    output_probability = output_probability / factor
    output_label = []
    for each_output in output_probability:
        output_label.append([0 if x < 0.3 else 1 for x in each_output])
    return np.array(output_label)

In [72]:
print(classification_report(golden_label, soft_decode_label(
    oversampling_20ep + preprocessing_base_20ep + vgg19_datav2 + hybrid_v2, 
    4
), target_names=genre_all))

              precision    recall  f1-score   support

       Crime       0.17      0.19      0.18        31
    Thriller       0.43      0.46      0.45       106
     Fantasy       0.00      0.00      0.00         7
      Horror       0.83      0.64      0.72        75
      Sci-Fi       0.58      0.71      0.64        48
      Comedy       0.48      0.65      0.55       247
 Documentary       0.25      0.10      0.14        30
   Adventure       0.38      0.35      0.37        48
   Film-Noir       0.00      0.00      0.00         6
   Animation       0.61      0.52      0.56        21
     Romance       0.33      0.32      0.32        94
       Drama       0.51      0.72      0.59       309
     Western       0.36      0.29      0.32        14
     Musical       0.19      0.23      0.21        13
      Action       0.38      0.48      0.42        90
     Mystery       0.29      0.28      0.29        18
         War       0.27      0.16      0.20        25
  Children's       0.58    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [57]:

# soft voting 2
predict_probs = (vgg19_datav2 + text_model + resnet50_datav2 + 2 * hybrid)/4.0
predict_label = decode_label(predict_probs)

print(classification_report(golden_label, predict_label, target_names=genre_all))

              precision    recall  f1-score   support

       Crime       0.13      0.16      0.14        31
    Thriller       0.40      0.42      0.41       106
     Fantasy       1.00      0.14      0.25         7
      Horror       0.65      0.64      0.64        75
      Sci-Fi       0.69      0.50      0.58        48
      Comedy       0.49      0.72      0.58       247
 Documentary       0.10      0.43      0.17        30
   Adventure       0.26      0.27      0.27        48
   Film-Noir       0.50      0.17      0.25         6
   Animation       0.52      0.76      0.62        21
     Romance       0.36      0.19      0.25        94
       Drama       0.47      0.83      0.60       309
     Western       0.33      0.14      0.20        14
     Musical       0.21      0.31      0.25        13
      Action       0.42      0.53      0.47        90
     Mystery       0.33      0.11      0.17        18
         War       0.40      0.16      0.23        25
  Children's       0.56    

  _warn_prf(average, modifier, msg_start, len(result))


In [18]:
predict_probs_copy = predict_probs.copy()
predict_label_copy = predict_label.copy()
golden_label_copy = golden_label.astype(int)

Children = 17
Adventure = 7
Comedy = 5
Action = 14
SciFi = 4
Drama = 11
Fantasy = 2
Romance = 10

Children_s = 5
Adventure_s = 5
Comedy_s = 2
Action_s = 2
SciFi_s = 2
Drama_s = 1
Romance_s = 1

threshold_fantasy = 6

for i, predict in enumerate(predict_label_copy):
    fantasy_score = 0
    fantasy_score += Children_s if predict[Children] == 1 else 0
    fantasy_score += Adventure_s if predict[Adventure] == 1 else 0
    fantasy_score += Comedy_s if predict[Comedy] == 1 else 0
    fantasy_score += Action_s if predict[Action] == 1 else 0
    fantasy_score += SciFi_s if predict[SciFi] == 1 else 0
    fantasy_score += Drama_s if predict[Drama] == 1 else 0    
    fantasy_score += Romance_s if predict[Romance] == 1 else 0

    
    if fantasy_score >= threshold_fantasy:
        predict[Fantasy] = 1
#         if(golden_label[i][Fantasy] == 1 and predict[Fantasy] == 1):


                                                       
print(classification_report(golden_label, predict_label_copy, target_names=genre_all))

              precision    recall  f1-score   support

       Crime       0.13      0.16      0.14        31
    Thriller       0.40      0.42      0.41       106
     Fantasy       0.03      0.29      0.05         7
      Horror       0.65      0.64      0.64        75
      Sci-Fi       0.69      0.50      0.58        48
      Comedy       0.49      0.72      0.58       247
 Documentary       0.10      0.43      0.17        30
   Adventure       0.26      0.27      0.27        48
   Film-Noir       0.50      0.17      0.25         6
   Animation       0.52      0.76      0.62        21
     Romance       0.36      0.19      0.25        94
       Drama       0.47      0.83      0.60       309
     Western       0.33      0.14      0.20        14
     Musical       0.21      0.31      0.25        13
      Action       0.42      0.53      0.47        90
     Mystery       0.33      0.11      0.17        18
         War       0.40      0.16      0.23        25
  Children's       0.56    

  _warn_prf(average, modifier, msg_start, len(result))


In [26]:
print(classification_report(golden_label, decode_label(text_model), target_names=genre_all))

              precision    recall  f1-score   support

       Crime       0.07      0.13      0.09        31
    Thriller       0.34      0.30      0.32       106
     Fantasy       0.00      0.00      0.00         7
      Horror       0.84      0.56      0.67        75
      Sci-Fi       0.52      0.46      0.49        48
      Comedy       0.47      0.56      0.51       247
 Documentary       0.14      0.03      0.05        30
   Adventure       0.26      0.29      0.27        48
   Film-Noir       0.14      0.17      0.15         6
   Animation       0.29      0.19      0.23        21
     Romance       0.33      0.15      0.21        94
       Drama       0.51      0.57      0.54       309
     Western       0.21      0.21      0.21        14
     Musical       0.17      0.23      0.19        13
      Action       0.39      0.41      0.40        90
     Mystery       0.31      0.28      0.29        18
         War       0.26      0.20      0.23        25
  Children's       0.50    

  _warn_prf(average, modifier, msg_start, len(result))


In [31]:
print(classification_report(golden_label, decode_label(vgg19_datav2), target_names=genre_all))

              precision    recall  f1-score   support

       Crime       0.17      0.13      0.15        31
    Thriller       0.41      0.30      0.35       106
     Fantasy       0.00      0.00      0.00         7
      Horror       0.38      0.27      0.31        75
      Sci-Fi       0.40      0.17      0.24        48
      Comedy       0.44      0.80      0.57       247
 Documentary       0.10      0.40      0.16        30
   Adventure       0.11      0.02      0.04        48
   Film-Noir       0.00      0.00      0.00         6
   Animation       0.63      0.81      0.71        21
     Romance       0.17      0.05      0.08        94
       Drama       0.41      0.76      0.53       309
     Western       0.10      0.07      0.08        14
     Musical       0.00      0.00      0.00        13
      Action       0.31      0.42      0.36        90
     Mystery       0.00      0.00      0.00        18
         War       0.00      0.00      0.00        25
  Children's       0.67    

  _warn_prf(average, modifier, msg_start, len(result))


In [8]:
print(classification_report(golden_label, decode_label(resnet50_datav2), target_names=genre_all))

              precision    recall  f1-score   support

       Crime       0.15      0.10      0.12        31
    Thriller       0.30      0.32      0.31       106
     Fantasy       0.07      0.14      0.10         7
      Horror       0.36      0.21      0.27        75
      Sci-Fi       0.36      0.33      0.34        48
      Comedy       0.74      0.30      0.43       247
 Documentary       0.09      0.40      0.15        30
   Adventure       0.00      0.00      0.00        48
   Film-Noir       0.11      0.17      0.13         6
   Animation       0.32      0.67      0.43        21
     Romance       0.29      0.13      0.18        94
       Drama       0.65      0.26      0.37       309
     Western       0.25      0.29      0.27        14
     Musical       0.20      0.38      0.26        13
      Action       0.34      0.30      0.32        90
     Mystery       0.00      0.00      0.00        18
         War       0.08      0.04      0.05        25
  Children's       0.35    

  _warn_prf(average, modifier, msg_start, len(result))


In [21]:
col = vgg19_datav2[:, 0]
col.shape

(777,)

## Custom

In [None]:
with open(vgg19_datav2_results, "rb") as f:
    vgg19_datav2 = np.load(f)

with open(text_model_results, "rb") as f:
    text_model = np.load(f)
    
with open(resnet50_datav2_results, "rb") as f:
    resnet50_datav2 = np.load(f)

with open(hybrid_results, "rb") as f:
    hybrid = np.load(f)
    
with open(hybrid_v2_results, "rb") as f:
    hybrid_v2 = np.load(f)

with open(oversampling_20ep_results, "rb") as f:
    oversampling_20ep = np.load(f)

with open(preprocessing_base_20ep_results, "rb") as f:
    preprocessing_base_20ep = np.load(f)
    
with open(golden_label_results, "rb") as f:
    golden_label = np.load(f)

In [80]:
images_models = (vgg19_datav2 + resnet50_datav2) / 2.0
text_models = (text_model + oversampling_20ep + preprocessing_base_20ep) / 3.0
hybrid_models = (hybrid + hybrid_v2) / 2.0

In [81]:
print(classification_report(golden_label, decode_label(images_models), target_names=genre_all))

              precision    recall  f1-score   support

       Crime       0.18      0.06      0.10        31
    Thriller       0.36      0.29      0.32       106
     Fantasy       0.00      0.00      0.00         7
      Horror       0.54      0.25      0.35        75
      Sci-Fi       0.39      0.23      0.29        48
      Comedy       0.63      0.52      0.57       247
 Documentary       0.10      0.40      0.15        30
   Adventure       0.00      0.00      0.00        48
   Film-Noir       0.00      0.00      0.00         6
   Animation       0.47      0.71      0.57        21
     Romance       0.32      0.09      0.13        94
       Drama       0.50      0.51      0.50       309
     Western       0.25      0.14      0.18        14
     Musical       0.20      0.23      0.21        13
      Action       0.41      0.42      0.42        90
     Mystery       0.00      0.00      0.00        18
         War       0.25      0.04      0.07        25
  Children's       0.48    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [82]:
print(classification_report(golden_label, decode_label(text_models), target_names=genre_all))

              precision    recall  f1-score   support

       Crime       0.12      0.23      0.16        31
    Thriller       0.40      0.40      0.40       106
     Fantasy       0.00      0.00      0.00         7
      Horror       0.82      0.65      0.73        75
      Sci-Fi       0.55      0.60      0.57        48
      Comedy       0.47      0.67      0.56       247
 Documentary       0.11      0.07      0.08        30
   Adventure       0.31      0.38      0.34        48
   Film-Noir       0.33      0.17      0.22         6
   Animation       0.56      0.24      0.33        21
     Romance       0.31      0.31      0.31        94
       Drama       0.50      0.71      0.59       309
     Western       0.16      0.21      0.18        14
     Musical       0.20      0.23      0.21        13
      Action       0.33      0.43      0.38        90
     Mystery       0.29      0.28      0.29        18
         War       0.14      0.12      0.13        25
  Children's       0.44    

  _warn_prf(average, modifier, msg_start, len(result))


In [83]:
print(classification_report(golden_label, decode_label(hybrid_models), target_names=genre_all))

              precision    recall  f1-score   support

       Crime       0.15      0.23      0.18        31
    Thriller       0.39      0.51      0.44       106
     Fantasy       0.00      0.00      0.00         7
      Horror       0.69      0.65      0.67        75
      Sci-Fi       0.38      0.62      0.48        48
      Comedy       0.50      0.45      0.48       247
 Documentary       0.22      0.07      0.10        30
   Adventure       0.29      0.27      0.28        48
   Film-Noir       0.05      0.33      0.09         6
   Animation       0.33      0.67      0.44        21
     Romance       0.31      0.29      0.30        94
       Drama       0.56      0.49      0.52       309
     Western       0.18      0.14      0.16        14
     Musical       0.14      0.31      0.19        13
      Action       0.42      0.42      0.42        90
     Mystery       0.10      0.50      0.17        18
         War       0.20      0.28      0.23        25
  Children's       0.62    

  _warn_prf(average, modifier, msg_start, len(result))


In [128]:
images_models = (vgg19_datav2 + resnet50_datav2) / 2.0
text_models = (text_model + oversampling_20ep + preprocessing_base_20ep) / 3.0
hybrid_models = (hybrid + hybrid_v2) / 2.0

with open(genre_file, 'r') as f:
    genre_all = f.readlines()
genre_all = [x.replace('\n', '') for x in genre_all]

# ensemble_strategy = {
#     "text": ["Horror", "Sci-Fi", "Documentary", "Adventure", "Film-Noir", "Romance", "Western", "Musical", "Action", "War"],
#     "image": ["Crime", "Thriller", "Animation", "Mystery", "Children's"],
#     "both": ["Fantasy", "Comedy", "Drama"]
# }

ensemble_strategy = {
    "image": ["Comedy", "Musical"],
    "text": ["Horror", "Film-Noir", "Romance", "Drama", "Mystery"],
    "hybrid": ["Crime", "Thriller", "Action", "War", "Children's"],
    "only-hybrid": ["Fantasy", "Animation"],
    "only-vgg": ["Documentary"],
    "only-resnet": ["Western", "Musical"],
    "only-oversampling": ["Adventure"],
    "only-preprocessing": ["Sci-Fi"],
}

cols = []
for i in range(18):
    
    genre = genre_all[i]
    # print(i, genre)
    
    images_col = images_models[:, i]
    text_col = text_models[:, i]
    hybrid_col = hybrid_models[:, i]
    only_hybrid_col = hybrid[:, i]
    only_vgg_col = vgg19_datav2[:, i]
    only_resnet_col = resnet50_datav2[:, i]
    only_oversampling_col = oversampling_20ep[:, i]
    only_processing_col = preprocessing_base_20ep[:, i]
    
    if genre in ensemble_strategy["text"]:
        cols.append(text_col)
    elif genre in ensemble_strategy["image"]:
        cols.append(images_col)
    elif genre in ensemble_strategy["hybrid"]:
        cols.append(hybrid_col)
    elif genre in ensemble_strategy["only-hybrid"]:
        cols.append(only_hybrid_col)
    elif genre in ensemble_strategy["only-vgg"]:
        cols.append(only_vgg_col)
    elif genre in ensemble_strategy["only-resnet"]:
        cols.append(only_resnet_col)
    elif genre in ensemble_strategy["only-oversampling"]:
        cols.append(only_oversampling_col)
    elif genre in ensemble_strategy["only-preprocessing"]:
        cols.append(only_processing_col)
    else:
        print(genre)

# print(len(cols))
cols = np.array(cols)
# print(cols.shape)
cols = np.transpose(cols)

In [None]:
# vgg_19 documentary
# resnet_50 western musical
# oversampling adventure
# preprocessing scifi
# hybrid animation

# hard voting scifi

In [129]:
print(classification_report(golden_label, decode_label(cols), target_names=genre_all))

              precision    recall  f1-score   support

       Crime       0.15      0.23      0.18        31
    Thriller       0.39      0.51      0.44       106
     Fantasy       1.00      0.14      0.25         7
      Horror       0.82      0.65      0.73        75
      Sci-Fi       0.59      0.62      0.61        48
      Comedy       0.63      0.52      0.57       247
 Documentary       0.10      0.40      0.16        30
   Adventure       0.29      0.46      0.35        48
   Film-Noir       0.33      0.17      0.22         6
   Animation       0.57      0.76      0.65        21
     Romance       0.31      0.31      0.31        94
       Drama       0.50      0.71      0.59       309
     Western       0.25      0.29      0.27        14
     Musical       0.20      0.23      0.21        13
      Action       0.42      0.42      0.42        90
     Mystery       0.29      0.28      0.29        18
         War       0.20      0.28      0.23        25
  Children's       0.62    

  _warn_prf(average, modifier, msg_start, len(result))


In [130]:
path = "data/ensemble_results/custom_v2.npz"

with open(path, 'wb') as f:
    np.save(f, cols)

with open(path, "rb") as f:
    cols = np.load(f)

In [131]:
print(classification_report(golden_label, decode_label(cols), target_names=genre_all))

              precision    recall  f1-score   support

       Crime       0.15      0.23      0.18        31
    Thriller       0.39      0.51      0.44       106
     Fantasy       1.00      0.14      0.25         7
      Horror       0.82      0.65      0.73        75
      Sci-Fi       0.59      0.62      0.61        48
      Comedy       0.63      0.52      0.57       247
 Documentary       0.10      0.40      0.16        30
   Adventure       0.29      0.46      0.35        48
   Film-Noir       0.33      0.17      0.22         6
   Animation       0.57      0.76      0.65        21
     Romance       0.31      0.31      0.31        94
       Drama       0.50      0.71      0.59       309
     Western       0.25      0.29      0.27        14
     Musical       0.20      0.23      0.21        13
      Action       0.42      0.42      0.42        90
     Mystery       0.29      0.28      0.29        18
         War       0.20      0.28      0.23        25
  Children's       0.62    

  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# Sci-fi, Comedy, Documentary, Film-Noir, Drama, Western, Mystery, Children's 