## Load configurations

In [1]:
from cfgs.content_classification_v2 import get_cfg

cfgs = get_cfg()
print('\n', cfgs, '\n')


 CLASSES: [{'Genre': {0: 'Comedy', 1: 'Action', 2: 'Documentary', 3: 'Crime', 4: 'Animation', 5: 'Horror'}, 'Rating': {0: 'value'}, 'Year': {0: 'value'}}]
HYPERPARAMS:
  BATCH_SIZE_TR: 64
  BATCH_SIZE_VAL: 64
  EPOCHS: 30
  LR: 0.0001
  TEST_SPLIT: 0.9
  VAL_SPLIT: 0.9
IMAGE:
  RESOLUTION: (200, 150, 3)
MODEL:
  BACKBONE: InceptionV3
  STRUCTURE: [{0: {'type': 'Genre', 'outNeurons': 6, 'outActivation': 'sigmoid', 'loss': 'categorical_crossentropy', 'weight': 1, 'metric': 'accuracy'}, 1: {'type': 'Rating', 'outNeurons': 1, 'outActivation': 'linear', 'loss': 'mse', 'weight': 1, 'metric': 'mae'}, 2: {'type': 'Year', 'outNeurons': 1, 'outActivation': 'linear', 'loss': 'mse', 'weight': 1, 'metric': 'mae'}}] 



In [2]:
dataset_dict = cfgs.CLASSES[0]
dataset_dict['Genre_alias'] = dict((g, i) for i, g in dataset_dict['Genre'].items())

## Load data and initialize generator

In [3]:
import pandas as pd
from data_processing.data_generator import DataGenerator
from sklearn.preprocessing import StandardScaler

In [4]:
df = pd.read_pickle("/home/robotics/content-classification/data_processing/balanced_data.pkl")

In [5]:
data_generator = DataGenerator(df, dataset_dict, cfgs.HYPERPARAMS.TEST_SPLIT, cfgs.HYPERPARAMS.VAL_SPLIT, cfgs.IMAGE.RESOLUTION)
data_generator.df.head()

Unnamed: 0,Index,Name,Year,Genre,Rating,Poster_path
0,tt0077838,The Last Waltz,1978,2,8.2,/home/robotics/Documents/data/Posters/1978/tt0...
1,tt0218080,Agent Red,2000,1,3.5,/home/robotics/Documents/data/Posters/2000/tt0...
2,tt0378889,"Tom, Tom, the Piper's Son",1969,0,6.2,/home/robotics/Documents/data/Posters/1969/tt0...
3,tt1183696,Second Skin,2009,2,6.6,/home/robotics/Documents/data/Posters/2009/tt1...
4,tt0068549,Essene,1972,2,7.1,/home/robotics/Documents/data/Posters/1972/tt0...


In [6]:
train_idx, valid_idx, test_idx = data_generator.split_dataset() 

## Build and compile model

In [7]:
from tensorflow.keras.optimizers import Adam
from tensorflow import keras
from model.architecture import MultiOutputModel

In [8]:
structure = cfgs.MODEL.STRUCTURE[0]
content_model = MultiOutputModel(*cfgs.IMAGE.RESOLUTION, structure)
content_model.build_model("InceptionV3")
# keras.utils.plot_model(content_model.model, show_shapes=True)

In [9]:
# opt = SGD()
opt = Adam(lr=cfgs.HYPERPARAMS.LR, decay=cfgs.HYPERPARAMS.LR/cfgs.HYPERPARAMS.EPOCHS)
content_model.compile_model(opt)
content_model.model.summary()

Model: "content_classification"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 200, 150, 3) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 99, 74, 32)   864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 99, 74, 32)   96          conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 99, 74, 32)   0           batch_normalization[0][0]        
_____________________________________________________________________________

## Train Model

In [10]:
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.backend import clear_session
import tensorflow as tf

In [11]:
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.compat.v1.Session(config=config)
tf.compat.v1.keras.backend.set_session(sess)

# GPU check - CUDA 11.1 with cuDNN 8.05 on 460.39 - GTX 1060 6GB
print(tf.test.is_built_with_cuda())
print(tf.config.list_physical_devices('GPU'))

clear_session()

True
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [12]:
tr_generator = data_generator.generate_images(train_idx, cfgs.HYPERPARAMS.BATCH_SIZE_TR, True)
val_generator = data_generator.generate_images(valid_idx, cfgs.HYPERPARAMS.BATCH_SIZE_VAL, True)

In [13]:
import datetime

%load_ext tensorboard
log_dir = log_dir = "./logs/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

callbacks = [keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=0)]
# keras.callbacks.ModelCheckpoint('./checkpoints', monitor='val_loss', verbose=0, 
#                                              save_best_only=False, save_weights_only=False, 
#                                              mode='auto', save_freq='epoch')

In [14]:
%%time
history = content_model.model.fit(tr_generator, 
                    steps_per_epoch=len(train_idx)//cfgs.HYPERPARAMS.BATCH_SIZE_TR,
                    epochs=cfgs.HYPERPARAMS.EPOCHS,
                    callbacks=callbacks,
                    validation_data=val_generator,
                    validation_steps=len(valid_idx)//cfgs.HYPERPARAMS.BATCH_SIZE_VAL)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30


Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
CPU times: user 14min 6s, sys: 1min 45s, total: 15min 52s
Wall time: 11min 17s


## TensorBoard

In [15]:
%tensorboard --logdir logs

In [21]:
content_model.model.save("Model_v5")

INFO:tensorflow:Assets written to: Model_v5/assets


## Test set

In [16]:
import numpy as np

In [17]:
t_gen = data_generator.generate_images(test_idx, cfgs.HYPERPARAMS.BATCH_SIZE_TEST, False)
genre_inf, rating_inf, year_inf = content_model.model.predict(t_gen, 
                                                           steps=len(test_idx)//cfgs.HYPERPARAMS.BATCH_SIZE_TEST)

genres_pred = genre_pred.argmax(axis=-1)
ratings_pred = rating_pred * data_generator.max_rating
years_pred = year_pred * data_generator.max_year


* * * * * Image could not be loaded - skipping * * * * *


In [18]:
genres_true, ratings_true, years_true = generate_truth(self, test_idx, cfgs.HYPERPARAMS.BATCH_SIZE_TEST)


* * * * * Image could not be loaded - skipping * * * * *


In [19]:
from sklearn.metrics import classification_report

In [20]:
cr_genre = classification_report(genres_true, genres_pred, target_names=dataset_dict['Genre_alias'].keys())
print(cr_genre)

              precision    recall  f1-score   support

      Comedy       0.46      0.34      0.39       187
      Action       0.36      0.38      0.37       189
 Documentary       0.43      0.44      0.43       208
       Crime       0.37      0.41      0.39       186
   Animation       0.67      0.62      0.65       192
      Horror       0.43      0.51      0.47       190

    accuracy                           0.45      1152
   macro avg       0.46      0.45      0.45      1152
weighted avg       0.46      0.45      0.45      1152



In [23]:
from sklearn.metrics import r2_score
print('R2 score for ratings: ', r2_score(ratings_true, ratings_pred))

R2 score for ratings:  -0.240832386616592
