## Load configurations

In [1]:
from cfgs.content_classification_v1 import get_cfg

cfgs = get_cfg()
print('\n', cfgs, '\n')


 CLASSES: [{'Genre': {0: 'Comedy', 1: 'Drama', 2: 'Action', 3: 'Documentary', 4: 'Crime', 5: 'Animation', 6: 'Horror', 7: 'Adventure'}, 'Rating': {0: 'value'}, 'Year': {0: 'value'}}]
HYPERPARAMS:
  BATCH_SIZE_TR: 32
  BATCH_SIZE_VAL: 32
  EPOCHS: 25
  LR: 0.0001
  TEST_SPLIT: 0.8
  VAL_SPLIT: 0.8
IMAGE:
  RESOLUTION: (200, 150, 3)
MODEL:
  BACKBONE: InceptionV3
  STRUCTURE: [{0: {'type': 'Genre', 'outNeurons': 8, 'outActivation': 'sigmoid', 'loss': 'categorical_crossentropy', 'weight': 1, 'metric': 'accuracy'}, 1: {'type': 'Rating', 'outNeurons': 1, 'outActivation': 'linear', 'loss': 'mse', 'weight': 1, 'metric': 'mae'}, 2: {'type': 'Year', 'outNeurons': 1, 'outActivation': 'linear', 'loss': 'mse', 'weight': 1, 'metric': 'mae'}}] 



In [2]:
dataset_dict = cfgs.CLASSES[0]
dataset_dict['Genre_alias'] = dict((g, i) for i, g in dataset_dict['Genre'].items())

## Load data and initialize generator

In [3]:
import pandas as pd
from data_processing.data_generator import DataGenerator

In [4]:
df = pd.read_pickle("/home/robotics/content-classification/data_processing/balanced_data.pkl")

In [5]:
data_generator = DataGenerator(df, dataset_dict, cfgs.HYPERPARAMS.TEST_SPLIT, cfgs.HYPERPARAMS.VAL_SPLIT, cfgs.IMAGE.RESOLUTION)
data_generator.df.head()

Unnamed: 0,Index,Name,Year,Genre,Rating,Poster_path
0,tt0110901,Project Shadowchaser II,1995,2,4.0,/home/robotics/Documents/data/Posters/1995/tt0...
1,tt0365265,Ginger Snaps Back: The Beginning,2004,1,5.9,/home/robotics/Documents/data/Posters/2004/tt0...
2,tt0244992,Barrela: Escola de Crimes,1990,1,6.5,/home/robotics/Documents/data/Posters/1990/tt0...
3,tt1648188,Sharunas Bartas: An Army of One,2010,3,6.5,/home/robotics/Documents/data/Posters/2010/tt1...
4,tt1935737,Aquatic Language,2002,5,6.3,/home/robotics/Documents/data/Posters/2002/tt1...


In [8]:
train_idx, valid_idx, test_idx = data_generator.split_dataset() 

## Build and compile model

In [42]:
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow import keras
from model.architecture import MultiOutputModel

In [10]:
structure = cfgs.MODEL.STRUCTURE[0]
content_model = MultiOutputModel(*cfgs.IMAGE.RESOLUTION, structure)
content_model.build_model("InceptionV3")
# keras.utils.plot_model(content_model.model, show_shapes=True)

In [11]:
opt = SGD
# opt = Adam(lr=cfgs.HYPERPARAMS.LR, decay=cfgs.HYPERPARAMS.LR/cfgs.HYPERPARAMS.EPOCHS)
content_model.compile_model(opt)
content_model.model.summary()

Model: "content_classification"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 200, 150, 3) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 99, 74, 32)   864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 99, 74, 32)   96          conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 99, 74, 32)   0           batch_normalization[0][0]        
_____________________________________________________________________________

## Train Model

In [12]:
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.backend import clear_session
import tensorflow as tf

In [13]:
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.compat.v1.Session(config=config)
tf.compat.v1.keras.backend.set_session(sess)

# GPU check - CUDA 11.1 with cuDNN 8.05 on 460.39 - GTX 1060 6GB
print(tf.test.is_built_with_cuda())
print(tf.config.list_physical_devices('GPU'))

clear_session()

True
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [14]:
tr_generator = data_generator.generate_images(train_idx, cfgs.HYPERPARAMS.BATCH_SIZE_TR, True)
val_generator = data_generator.generate_images(valid_idx, cfgs.HYPERPARAMS.BATCH_SIZE_VAL, True)

In [15]:
import datetime

%load_ext tensorboard
log_dir = log_dir = "./logs/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

callbacks = [keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=0)]
# keras.callbacks.ModelCheckpoint('./checkpoints', monitor='val_loss', verbose=0, 
#                                              save_best_only=False, save_weights_only=False, 
#                                              mode='auto', save_freq='epoch')

In [16]:
history = content_model.model.fit(tr_generator, 
                    steps_per_epoch=len(train_idx)//cfgs.HYPERPARAMS.BATCH_SIZE_TR,
                    epochs=cfgs.HYPERPARAMS.EPOCHS,
                    callbacks=callbacks,
                    validation_data=val_generator,
                    validation_steps=len(valid_idx)//cfgs.HYPERPARAMS.BATCH_SIZE_VAL)

Epoch 1/25
* * * * * Image could not be loaded - skipping * * * * *
Epoch 2/25
* * * * * Image could not be loaded - skipping * * * * *
Epoch 3/25
* * * * * Image could not be loaded - skipping * * * * *
Epoch 4/25
* * * * * Image could not be loaded - skipping * * * * *
Epoch 5/25
* * * * * Image could not be loaded - skipping * * * * *
Epoch 6/25
* * * * * Image could not be loaded - skipping * * * * *
Epoch 7/25
* * * * * Image could not be loaded - skipping * * * * *
Epoch 8/25
* * * * * Image could not be loaded - skipping * * * * *
Epoch 9/25
* * * * * Image could not be loaded - skipping * * * * *
Epoch 10/25
* * * * * Image could not be loaded - skipping * * * * *
Epoch 11/25
* * * * * Image could not be loaded - skipping * * * * *
Epoch 12/25
* * * * * Image could not be loaded - skipping * * * * *
Epoch 13/25
* * * * * Image could not be loaded - skipping * * * * *


Epoch 14/25
* * * * * Image could not be loaded - skipping * * * * *
Epoch 15/25
* * * * * Image could not be loaded - skipping * * * * *
Epoch 16/25
* * * * * Image could not be loaded - skipping * * * * *
Epoch 17/25
* * * * * Image could not be loaded - skipping * * * * *
Epoch 18/25
* * * * * Image could not be loaded - skipping * * * * *
Epoch 19/25
* * * * * Image could not be loaded - skipping * * * * *
Epoch 20/25
* * * * * Image could not be loaded - skipping * * * * *
Epoch 21/25
* * * * * Image could not be loaded - skipping * * * * *
Epoch 22/25
* * * * * Image could not be loaded - skipping * * * * *
Epoch 23/25
* * * * * Image could not be loaded - skipping * * * * *
Epoch 24/25
* * * * * Image could not be loaded - skipping * * * * *
Epoch 25/25
* * * * * Image could not be loaded - skipping * * * * *


In [17]:
%tensorboard --logdir logs

In [18]:
content_model.model.save("Model_v1")

INFO:tensorflow:Assets written to: Model_v1/assets


## Test set

In [30]:
import numpy as np

In [31]:
test_batch_size = 128
test_generator = data_generator.generate_images(test_idx, test_batch_size, False)
genre_pred, rating_pred, year_pred = content_model.model.predict(test_generator, 
                                                           steps=len(test_idx)//test_batch_size)

In [52]:
year_pred[3]

array([0.9598407], dtype=float32)

In [34]:
test_generator = data_generator.generate_images(test_idx, test_batch_size, False)

images, genres_true, ratings_true, years_true = [], [], [], []
for test_batch in test_generator:
    
    image = test_batch[0]
    labels = test_batch[1]
    
    images.extend(image)
    genres_true.extend(labels[0])
    ratings_true.extend(labels[1])
    years_true.extend(labels[2])
    
genres_true = np.array(genres_true)
ratings_true = np.array(ratings_true)
years_true = np.array(years_true)

genres_true = genres_true.argmax(axis=-1)
genres_pred = genre_pred.argmax(axis=-1)

ratings_true = ratings_true * data_generator.max_rating
ratings_pred = rating_pred * data_generator.max_rating

years_true = years_true * data_generator.max_year
years_pred = year_pred * data_generator.max_year

In [35]:
from sklearn.metrics import classification_report

In [37]:
cr_genre = classification_report(genres_true, genres_pred, target_names=dataset_dict['Genre_alias'].keys())
print(cr_genre)

              precision    recall  f1-score   support

      Comedy       0.25      0.49      0.33       406
       Drama       0.23      0.23      0.23       406
      Action       0.24      0.38      0.29       397
 Documentary       0.28      0.14      0.19       402
       Crime       0.29      0.10      0.15       433
   Animation       0.46      0.57      0.51       365
      Horror       0.30      0.39      0.34       384
   Adventure       0.34      0.03      0.06       407

    accuracy                           0.29      3200
   macro avg       0.30      0.29      0.26      3200
weighted avg       0.30      0.29      0.26      3200



In [40]:
from sklearn.metrics import r2_score
print('R2 score for ratings: ', r2_score(ratings_true, ratings_pred))

R2 score for ratings:  -0.0008662322947896595
