# IMDB dataset. Experiment 4

This fourth experiment consist in evaluate the performance of a neural network with one layer on the IMDB classification reviews problem. We have selected `MlFlow` to track the performance variation an assess the models.

In this case, the activation function is `tanh`.

Then, we import the necessary libraries.

In [3]:
# imports

import os

import pandas as pd
import tensorflow as tf
import numpy as np
import mlflow

from mlflow.tracking import MlflowClient
from tensorflow import keras
from keras import layers

2024-04-29 13:10:45.800089: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-29 13:10:45.868805: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-29 13:10:46.107723: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
# import local modules
import sys
sys.path.append('/mnt/0A2AAC152AABFBB7/sideProjects/deepLearning')
from deeplearning.deep_utils import imdb_model_setup_one_layer

Import a previous report with the same NN architecture.

In [6]:
PATH_TO_RUNS = "/mnt/0A2AAC152AABFBB7/sideProjects/deepLearning/mlflow_runs"

old_runs = pd.read_csv(
    os.path.join(PATH_TO_RUNS, "imdb_runs_2.csv")
)

From this point we do not add more comments since we follow the path determined in Chollet's book.

In [7]:
# Loading the IMDB dataset

(train_data, train_labels), (test_data, test_labels) = tf.keras.datasets.imdb.load_data(num_words=10000)

In [8]:
# Encoding the integer sequences via multi-hot encoding

def vectorize_sequences(sequences, dimension=10000):

    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        for j in sequence:
            results[i, j] = 1.
    return results

x_train = vectorize_sequences(train_data)
x_test = vectorize_sequences(test_data)

y_train = np.asarray(train_labels).astype("float32")
y_test = np.asarray(test_labels).astype("float32")

In [9]:
# Setting aside a validation set

x_val = x_train[:10000]
partial_x_train = x_train[10000:]

y_val = y_train[:10000]
partial_y_train = y_train[10000:]

In [10]:
# Initialize client and experiment

EXPERIMENT_NAME = "deepLearning_IMDB_dataset"
client = MlflowClient()
mlflow.set_experiment(EXPERIMENT_NAME)
exp = client.get_experiment_by_name(EXPERIMENT_NAME)

We create a list with the number of units in the layer. It would be interesting to try with more units but due to Collab limitation we can only experiment with three possible sizes.

In [11]:
units_2_hidden = [8, 16, 32]

Here we train the three NN and store their performances with the help of `MlFlow`.

In [12]:
count = 0
for units in units_2_hidden:

    model, history = imdb_model_setup_one_layer(
        units_layer=units,
        activ_func="tanh",
        X_train=partial_x_train,
        y_train=partial_y_train,
        X_val=x_val,
        y_val=y_val
    )

    hist_dict = history.history
    test_loss, test_accuracy = model.evaluate(x_test, y_test)


    with mlflow.start_run():
        mlflow.set_tag("model", "Base_tanh_{}".format(count))
        mlflow.log_param("units_1st_layer", units)
        mlflow.log_param("hidden_activation", 'tanh')
        mlflow.log_param("epochs", 4)
        mlflow.log_param("batch_size", 512)
        mlflow.log_metric("accuracy", test_accuracy)
        mlflow.log_metric("loss", test_loss)
    count+=1

    del hist_dict
    del history
    del model

Epoch 1/4


2024-04-29 13:11:20.186480: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 600000000 exceeds 10% of free system memory.


[1m28/30[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 8ms/step - accuracy: 0.7042 - loss: 0.5991

2024-04-29 13:11:21.959140: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 400000000 exceeds 10% of free system memory.


[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 32ms/step - accuracy: 0.7119 - loss: 0.5932 - val_accuracy: 0.8501 - val_loss: 0.4503
Epoch 2/4
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.8830 - loss: 0.4021 - val_accuracy: 0.8747 - val_loss: 0.3734
Epoch 3/4
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.9094 - loss: 0.3255 - val_accuracy: 0.8804 - val_loss: 0.3327
Epoch 4/4
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.9208 - loss: 0.2720 - val_accuracy: 0.8803 - val_loss: 0.3161


2024-04-29 13:11:26.029538: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 1000000000 exceeds 10% of free system memory.


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 901us/step - accuracy: 0.8734 - loss: 0.3292
Epoch 1/4


2024-04-29 13:11:28.189806: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 600000000 exceeds 10% of free system memory.


[1m28/30[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 8ms/step - accuracy: 0.6938 - loss: 0.5958

2024-04-29 13:11:29.622946: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 400000000 exceeds 10% of free system memory.


[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 29ms/step - accuracy: 0.7033 - loss: 0.5882 - val_accuracy: 0.8337 - val_loss: 0.4322
Epoch 2/4
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.8893 - loss: 0.3639 - val_accuracy: 0.8763 - val_loss: 0.3412
Epoch 3/4
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.9149 - loss: 0.2795 - val_accuracy: 0.8872 - val_loss: 0.3018
Epoch 4/4
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.9258 - loss: 0.2352 - val_accuracy: 0.8787 - val_loss: 0.2997
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8716 - loss: 0.3135
Epoch 1/4
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 31ms/step - accuracy: 0.7093 - loss: 0.5737 - val_accuracy: 0.8400 - val_loss: 0.4005
Epoch 2/4
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.887

In [13]:
new_runs =mlflow.search_runs()
new_runs[['experiment_id', 'status', 'metrics.loss', 'metrics.accuracy', 'params.units_1st_layer', "params.hidden_activation"]]

Unnamed: 0,experiment_id,status,metrics.loss,metrics.accuracy,params.units_1st_layer,params.hidden_activation
0,926124755463801135,FINISHED,0.291257,0.88064,32,tanh
1,926124755463801135,FINISHED,0.314445,0.8716,16,tanh
2,926124755463801135,FINISHED,0.329476,0.87284,8,tanh
3,926124755463801135,FINISHED,0.288918,0.88404,32,relu
4,926124755463801135,FINISHED,0.29791,0.88084,16,relu
5,926124755463801135,FINISHED,0.304674,0.8828,8,relu
6,926124755463801135,FINISHED,0.338135,0.87084,64,tanh
7,926124755463801135,FINISHED,0.309813,0.8784,32,tanh
8,926124755463801135,FINISHED,0.287834,0.88152,16,tanh
9,926124755463801135,FINISHED,0.385586,0.84884,64,relu


In [14]:
runs = pd.concat([old_runs, new_runs], axis=0)
runs

Unnamed: 0.1,Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.accuracy,metrics.loss,params.batch_size,params.epochs,params.units_1st_layer,params.hidden_activation,params.units_2nd_layer,tags.mlflow.source.name,tags.mlflow.runName,tags.mlflow.user,tags.model,tags.mlflow.source.type
0,0.0,b3288fe1ad6e4c269fdda3ec7897bdae,926124755463801135,FINISHED,file:///mnt/0A2AAC152AABFBB7/sideProjects/deep...,2024-04-29 10:47:21.145000+00:00,2024-04-29 10:47:21.169000+00:00,0.88404,0.288918,512,4,32,relu,,/mnt/0A2AAC152AABFBB7/sideProjects/deepLearnin...,inquisitive-loon-101,luisggon,Base_tanh_2,LOCAL
1,1.0,913cf68f86e443adae0b87269c2d7897,926124755463801135,FINISHED,file:///mnt/0A2AAC152AABFBB7/sideProjects/deep...,2024-04-29 10:47:15.278000+00:00,2024-04-29 10:47:15.303000+00:00,0.88084,0.29791,512,4,16,relu,,/mnt/0A2AAC152AABFBB7/sideProjects/deepLearnin...,spiffy-shrimp-14,luisggon,Base_tanh_1,LOCAL
2,2.0,d1c1615ba1a44a92a80dd6e88b0b1af4,926124755463801135,FINISHED,file:///mnt/0A2AAC152AABFBB7/sideProjects/deep...,2024-04-29 10:47:09.139000+00:00,2024-04-29 10:47:09.164000+00:00,0.8828,0.304674,512,4,8,relu,,/mnt/0A2AAC152AABFBB7/sideProjects/deepLearnin...,unruly-gull-578,luisggon,Base_tanh_0,LOCAL
3,3.0,2143f77529164b8ca07fca057f2f5a08,926124755463801135,FINISHED,file:///mnt/0A2AAC152AABFBB7/sideProjects/deep...,2024-04-29 10:29:48.952000+00:00,2024-04-29 10:29:49.003000+00:00,0.87084,0.338135,512,4,64,tanh,64.0,/mnt/0A2AAC152AABFBB7/sideProjects/deepLearnin...,likeable-deer-618,luisggon,Base_tanh_2,LOCAL
4,4.0,fd6c45e187074f2680867d44a7fd7738,926124755463801135,FINISHED,file:///mnt/0A2AAC152AABFBB7/sideProjects/deep...,2024-04-29 10:29:40.953000+00:00,2024-04-29 10:29:40.979000+00:00,0.8784,0.309813,512,4,32,tanh,32.0,/mnt/0A2AAC152AABFBB7/sideProjects/deepLearnin...,sassy-colt-222,luisggon,Base_tanh_1,LOCAL
5,5.0,86caf32d83274909b20505b6d3e74afe,926124755463801135,FINISHED,file:///mnt/0A2AAC152AABFBB7/sideProjects/deep...,2024-04-29 10:29:34.243000+00:00,2024-04-29 10:29:34.275000+00:00,0.88152,0.287834,512,4,16,tanh,16.0,/mnt/0A2AAC152AABFBB7/sideProjects/deepLearnin...,shivering-mouse-746,luisggon,Base_tanh_0,LOCAL
6,6.0,e1eee968bf3d4c169386b71c68da99c2,926124755463801135,FINISHED,file:///mnt/0A2AAC152AABFBB7/sideProjects/deep...,2024-04-29 09:47:10.132000+00:00,2024-04-29 09:47:10.159000+00:00,0.84884,0.385586,512,4,64,relu,64.0,/mnt/0A2AAC152AABFBB7/sideProjects/deepLearnin...,upset-snake-27,luisggon,Base_relu_2,LOCAL
7,7.0,acccca3ed4d24086bf23482cb15cc9d8,926124755463801135,FINISHED,file:///mnt/0A2AAC152AABFBB7/sideProjects/deep...,2024-04-29 09:47:02.683000+00:00,2024-04-29 09:47:02.711000+00:00,0.87392,0.313741,512,4,32,relu,32.0,/mnt/0A2AAC152AABFBB7/sideProjects/deepLearnin...,receptive-midge-765,luisggon,Base_relu_1,LOCAL
8,8.0,be8220fcea6547c4b537882eed3e31ee,926124755463801135,FINISHED,file:///mnt/0A2AAC152AABFBB7/sideProjects/deep...,2024-04-29 09:46:56.301000+00:00,2024-04-29 09:46:56.329000+00:00,0.8806,0.294502,512,4,16,relu,16.0,/mnt/0A2AAC152AABFBB7/sideProjects/deepLearnin...,colorful-skink-172,luisggon,Base_relu_0,LOCAL
0,,b1f52b1ed1b04b7fb03c3b76ca4f7dec,926124755463801135,FINISHED,file:///mnt/0A2AAC152AABFBB7/sideProjects/deep...,2024-04-29 11:11:40.334000+00:00,2024-04-29 11:11:40.360000+00:00,0.88064,0.291257,512,4,32,tanh,,/mnt/0A2AAC152AABFBB7/sideProjects/deepLearnin...,upbeat-whale-254,luisggon,Base_tanh_2,LOCAL


In [15]:
runs.to_csv(
    os.path.join(PATH_TO_RUNS, "imdb_runs_2.csv")
)