In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import load_model
# import tensorflow as tf
import plotly.express as px
import pandas as pd
import numpy as np
import json
import os

In [3]:
!cp drive/MyDrive/DP/1m/training/nn_model.py .

import nn_model

In [4]:
checkpoint_model_filepath = '/content/drive/MyDrive/DP/1m/training/checkpoints/'

project_path = '/content/drive/MyDrive/DP/'

training_data_path = '/content/drive/MyDrive/DP/1m/training/data/'

config_path = '/content/drive/MyDrive/DP/config/nn_config.json'

wandb_config_path = '/content/drive/MyDrive/DP/config/wandb_config.json'

wandb_log = True
wandb_run_name = 'arch0 - no lr, no dp'
wandb_group_name = 'First fittings'

wandb_entity = 'xbarantomik'
wandb_project_name = 'DP'

In [5]:
try:
  with open(wandb_config_path, 'r') as f:
    config = json.load(f)

  # wandb_log = bool(config["wandb_log"])
  if config["wandb_log"] == 'false':
    wandb_log = False
  else:
    wandb_log = True
  wandb_run_name = config["wandb_run_name"]
  wandb_group_name = config["wandb_group_name"]
  wandb_entity = config["wandb_entity"]
  wandb_project_name = config["wandb_project_name"]
  # new_model = bool(config["new_model"])         # If True, train a new model. If False, load a pretrained model
  if config["new_model"] == 'false':
    new_model = False
  else:
    new_model = True
  run_name_before = config["run_name_before"]   # Model to be loaded
  # eval_only = bool(config["eval_only"]) # If True, load a pretrained model and perform test and eval
  if config["eval_only"] == 'false':
    eval_only = False
  else:
    eval_only = True

  print('wandb_log:', wandb_log)
  print('wandb_run_name:', wandb_run_name)
  print('wandb_group_name:', wandb_group_name)
  print('wandb_entity:', wandb_entity)
  print('wandb_project_name:', wandb_project_name)
  print('new_model:', new_model)
  print('run_name_before:', run_name_before)
  print('eval_only:', eval_only)

except OSError as e:
  print('\nERROR loading wandb_config file')
  print(e)
  quit()

wandb_log: True
wandb_run_name: arch1_lr0.01_1
wandb_group_name: Dense
wandb_entity: xbarantomik
wandb_project_name: DP
new_model: True
run_name_before: -
eval_only: False


In [6]:
try:
  with open(config_path, 'r') as f:
    config = json.load(f)

  epochs = int(config['epochs'])
  batch_size = int(config['batch_size'])
  patience = int(config['patience'])
  learning_rate = float(config['learning_rate'])
  architecture = int(config['architecture'])

  print('epochs:', epochs)
  print('batch_size:', batch_size)
  print('patience:', patience)
  print('learning_rate:', learning_rate)
  print('architecture:', architecture)

except OSError as e:
  print('\nERROR loading nn_config file')
  print(e)
  quit()

epochs: 400
batch_size: 256
patience: 45
learning_rate: 0.01
architecture: 1


#### Load training data

In [7]:
try:
  train_data = np.load(training_data_path + 'train_data.npy')
  val_data = np.load(training_data_path + 'val_data.npy')
  test_data = np.load(training_data_path + 'test_data.npy')
  train_target_ratings = np.load(training_data_path + 'train_target_ratings.npy')
  val_target_ratings = np.load(training_data_path + 'val_target_ratings.npy')
  test_target_ratings = np.load(training_data_path + 'test_target_ratings.npy')

except OSError as e:
  print('\nERROR loading training data')
  print(e)
  quit()

In [8]:
if wandb_log and 'COLAB_GPU' in os.environ:
  !pip install wandb
  # !wandb login --relogin



In [9]:
if wandb_log:
  import wandb
  from wandb.keras import WandbMetricsLogger, WandbModelCheckpoint

  wandb.login()

  wandb.init(
    config=config,
    entity=wandb_entity,
    project=wandb_project_name,
    name=wandb_run_name,
    group=wandb_group_name
  )

[34m[1mwandb[0m: Currently logged in as: [33mxbarantomik[0m. Use [1m`wandb login --relogin`[0m to force relogin


#### NN MODEL

In [10]:
early_stopping = EarlyStopping(patience=patience, monitor='val_loss')

In [11]:
# lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
#     learning_rate,
#     decay_steps=75,
#     decay_rate=0.96,
#     staircase=True)

In [12]:
# scheduler = tf.keras.callbacks.LearningRateScheduler(lr_schedule)

checkpoint_filepath = checkpoint_model_filepath + f'{wandb_run_name}_checkpoint.h5'
checkpoint_callback = ModelCheckpoint(filepath=checkpoint_filepath, monitor='val_loss', save_best_only=True, mode='min')

if wandb_log:
  # callbacks = [early_stopping, checkpoint_callback, OutputCallback(), WandbMetricsLogger()]
  # callbacks = [early_stopping, scheduler, WandbMetricsLogger()]
  callbacks = [early_stopping, checkpoint_callback, WandbMetricsLogger()]
  # callbacks = [early_stopping, WandbMetricsLogger()]
else:
  # callbacks = [early_stopping, checkpoint_callback, OutputCallback()]
  # callbacks = [early_stopping, scheduler]
  callbacks = [early_stopping, checkpoint_callback]
  # callbacks = [early_stopping]

#### Create model

In [13]:
# time_steps = 1
# train_data_reshaped = np.reshape(train_data, (train_data.shape[0], time_steps, train_data.shape[1]))

# train_data_reshaped.shape[1:]

# # my_model = nn_model.create_model(architecture=architecture, train_data_shape=train_data.shape[1])
# my_model = nn_model.create_model(architecture=architecture, train_data_shape=train_data_reshaped.shape[1:])

# if my_model != None:
#   my_model.summary()
# else:
#   print('\nERROR loading NN model')
#   quit()

In [14]:
if new_model:
  my_model = nn_model.create_model(architecture=architecture, lr=learning_rate, train_data_shape=train_data.shape[1])
  print('\nCreated a new model')
else:
  my_model = load_model(checkpoint_model_filepath + run_name_before + '_checkpoint.h5')
  print('\nLoaded a model')

if my_model != None:
  my_model.summary()
else:
  print('\nERROR loading NN model')
  quit()


Created a new model
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 256)               3328      
                                                                 
 activation (Activation)     (None, 256)               0         
                                                                 
 dense_1 (Dense)             (None, 64)                16448     
                                                                 
 activation_1 (Activation)   (None, 64)                0         
                                                                 
 dense_2 (Dense)             (None, 1)                 65        
                                                                 
Total params: 19841 (77.50 KB)
Trainable params: 19841 (77.50 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________

## Model fitting

In [15]:
if not eval_only:
  print('Training model')
  trained_model = my_model.fit(train_data, train_target_ratings, batch_size, epochs, validation_data=(val_data, val_target_ratings), callbacks=callbacks, verbose=1)
else:
  print('Only evaluating model')

Training model
Epoch 1/400
Epoch 2/400


  saving_api.save_model(


Epoch 3/400
Epoch 4/400
Epoch 5/400
Epoch 6/400
Epoch 7/400
Epoch 8/400
Epoch 9/400
Epoch 10/400
Epoch 11/400
Epoch 12/400
Epoch 13/400
Epoch 14/400
Epoch 15/400
Epoch 16/400
Epoch 17/400
Epoch 18/400
Epoch 19/400
Epoch 20/400
Epoch 21/400
Epoch 22/400
Epoch 23/400
Epoch 24/400
Epoch 25/400
Epoch 26/400
Epoch 27/400
Epoch 28/400
Epoch 29/400
Epoch 30/400
Epoch 31/400
Epoch 32/400
Epoch 33/400
Epoch 34/400
Epoch 35/400
Epoch 36/400
Epoch 37/400
Epoch 38/400
Epoch 39/400
Epoch 40/400
Epoch 41/400
Epoch 42/400
Epoch 43/400
Epoch 44/400
Epoch 45/400
Epoch 46/400
Epoch 47/400
Epoch 48/400
Epoch 49/400
Epoch 50/400
Epoch 51/400
Epoch 52/400
Epoch 53/400
Epoch 54/400
Epoch 55/400
Epoch 56/400
Epoch 57/400
Epoch 58/400
Epoch 59/400
Epoch 60/400
Epoch 61/400
Epoch 62/400
Epoch 63/400
Epoch 64/400
Epoch 65/400
Epoch 66/400
Epoch 67/400
Epoch 68/400
Epoch 69/400
Epoch 70/400
Epoch 71/400
Epoch 72/400
Epoch 73/400
Epoch 74/400
Epoch 75/400
Epoch 76/400
Epoch 77/400
Epoch 78/400
Epoch 79/400
Epoch 

#### After fitting

In [16]:
# fig_loss = px.line({
#   'Train': trained_model.history['loss'],
#   'Val': trained_model.history['val_loss']
#   },
#   title='Train vs Val Loss')
# fig_loss.show()

In [17]:
# fig_mae = px.line({
#   'Train': trained_model.history['root_mean_squared_error'],
#   'Val': trained_model.history['val_root_mean_squared_error']
#   },
#   title='Train vs Val RootMSE')
# fig_mae.show()

In [18]:
# fig_mae = px.line({
#   'Train': trained_model.history['mean_absolute_error'],
#   'Val': trained_model.history['val_mean_absolute_error']
#   },
#   title='Train vs Val MAE')
# fig_mae.show()

In [19]:
# # Validation data values in the last epoch
# print("number of epochs:", trained_model.epoch[-1] + 1)
# print("val_loss:", trained_model.history['val_loss'][-1])
# print("val_root_mean_squared_error:", trained_model.history['val_root_mean_squared_error'][-1])
# print("val_mean_absolute_error:", trained_model.history['val_mean_absolute_error'][-1])

In [20]:
# Evaluate the model on the test data using `evaluate`
print("Evaluate on test data")
test_loss, test_mean_squared_error, test_mean_absolute_error = my_model.evaluate(test_data, test_target_ratings)
print("test_loss:", test_loss)
print("test_root_mean_squared_error:", test_mean_squared_error)
print("test_mean_absolute_error:", test_mean_absolute_error)
print('\n')

print("generate predictions for 10 samples")
predictions = my_model.predict(test_data)
test_y_comparison = pd.DataFrame({'test_y_true': test_target_ratings, 'test_y_pred': [x for x in predictions]})

print(test_y_comparison.head(10))

Evaluate on test data
test_loss: 1.0383657217025757
test_root_mean_squared_error: 1.0190023183822632
test_mean_absolute_error: 0.8162111043930054


generate predictions for 10 samples
   test_y_true  test_y_pred
0            4  [3.0720105]
1            3  [3.1425924]
2            5  [3.5446558]
3            5  [3.0315166]
4            4  [3.7019558]
5            4   [4.050746]
6            4    [4.26002]
7            3  [3.8434272]
8            1   [3.158271]
9            5   [4.575457]


In [21]:
if wandb_log:
  wandb.finish()

0,1
epoch/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch/learning_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch/loss,█▆▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch/mean_absolute_error,█▆▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch/root_mean_squared_error,█▆▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch/val_loss,█▆▄▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▂▁▁▁▁▁▁▁▁▁▁
epoch/val_mean_absolute_error,█▆▅▄▃▃▃▂▃▂▃▂▂▂▃▂▂▂▂▁▃▂▃▂▂▂▂▂▂▂▁▂▁▂▁▁▂▁▁▁
epoch/val_root_mean_squared_error,█▆▄▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁

0,1
epoch/epoch,399.0
epoch/learning_rate,0.001
epoch/loss,0.99756
epoch/mean_absolute_error,0.79892
epoch/root_mean_squared_error,0.99878
epoch/val_loss,1.03446
epoch/val_mean_absolute_error,0.81444
epoch/val_root_mean_squared_error,1.01708
