# Experimenting with Deep Learning and Reuters dataset

# Reuters dataset. Experiment 1

This first experiment consist in evaluate the performance of a NN with two hidden layers on the Reuters classification problem. We have selected `MlFlow` to track the performance variation an assess the models.

In [1]:
# imports
import os

import tensorflow as tf
import numpy as np
import mlflow

from mlflow.tracking import MlflowClient;

2024-04-29 15:38:18.311015: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-29 15:38:18.314778: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-29 15:38:18.361542: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# import local modules
import sys
sys.path.append('/mnt/0A2AAC152AABFBB7/sideProjects/deepLearning')
from deeplearning.deep_utils import reuters_model_three_layers

In [3]:
# Loading the Reuters dataset

(train_data, train_labels), (test_data, test_labels) = tf.keras.datasets.reuters.load_data(num_words=10000);

In [4]:
# Encoding the integer sequences via multi-hot encoding

def vectorize_sequences(sequences, dimension=10000):

    results = np.zeros((len(sequences), dimension))

    for i, sequence in enumerate(sequences):
        for j in sequence:
            results[i, j] = 1.
    return results

# vectorize training and test data

x_train = vectorize_sequences(train_data)
x_test = vectorize_sequences(test_data)

# vectorize training and test labels

y_train = tf.keras.utils.to_categorical(train_labels)
y_test = tf.keras.utils.to_categorical(test_labels)

# Setting aside a validation set

x_val = x_train[:1000]
partial_x_train = x_train[1000:]
y_val = y_train[:1000]
partial_y_train = y_train[1000:]

In [5]:
# Initialize client and experiment

EXPERIMENT_NAME = "deepLearning_reuters_dataset"
client = MlflowClient()
mlflow.set_experiment(EXPERIMENT_NAME)
exp = client.get_experiment_by_name(EXPERIMENT_NAME)

2024/04/29 15:38:22 INFO mlflow.tracking.fluent: Experiment with name 'deepLearning_reuters_dataset' does not exist. Creating a new experiment.


In [6]:
layer_units = [
    [64, 64, 46], [64, 32, 46], [128, 128, 46], [32, 32, 46], [8, 8, 46]
]

In [7]:
for units in layer_units:

    model, history = reuters_model_three_layers(
        units=units,
        activ_func="relu",
        X_train=partial_x_train,
        y_train=partial_y_train,
        X_val=x_val,
        y_val=y_val
    )

    hist_dict = history.history
    test_loss, test_accuracy = model.evaluate(x_test, y_test)

    with mlflow.start_run():
        mlflow.set_tag("model", "Base_0")
        mlflow.log_param("units_1st_layer", units[0])
        mlflow.log_param("units_2nd_layer", units[1])
        mlflow.log_param("hidden_activation", 'relu')
        mlflow.log_param("epochs", 9)
        mlflow.log_param("batch_size", 512)
        mlflow.log_metric("accuracy", test_accuracy)
        mlflow.log_metric("loss", test_loss)

Epoch 1/9
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - accuracy: 0.3127 - loss: 3.2601 - val_accuracy: 0.6090 - val_loss: 1.9225
Epoch 2/9
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.6501 - loss: 1.6862 - val_accuracy: 0.6800 - val_loss: 1.4107
Epoch 3/9
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.7312 - loss: 1.2228 - val_accuracy: 0.7320 - val_loss: 1.2044
Epoch 4/9
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.7926 - loss: 0.9740 - val_accuracy: 0.7620 - val_loss: 1.0962
Epoch 5/9
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.8191 - loss: 0.8399 - val_accuracy: 0.7880 - val_loss: 1.0133
Epoch 6/9
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.8503 - loss: 0.6977 - val_accuracy: 0.7900 - val_loss: 0.9649
Epoch 7/9
[1m16/16[0m [32m━━━━━━━━━━━

In [8]:
runs = mlflow.search_runs()
PATH_TO_RUNS = "/mnt/0A2AAC152AABFBB7/sideProjects/deepLearning/mlflow_runs"
runs.to_csv(
    os.path.join(PATH_TO_RUNS, "reuters_runs_1.csv")
)
runs

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.loss,metrics.accuracy,params.epochs,params.hidden_activation,params.batch_size,params.units_1st_layer,params.units_2nd_layer,tags.mlflow.runName,tags.mlflow.user,tags.model,tags.mlflow.source.name,tags.mlflow.source.type
0,f8cadb40d1834860acedc8ad0d104d85,852056537141572214,FINISHED,file:///mnt/0A2AAC152AABFBB7/sideProjects/deep...,2024-04-29 13:38:43.407000+00:00,2024-04-29 13:38:43.436000+00:00,1.59456,0.617097,9,relu,512,64,64,bright-wren-878,luisggon,Base_0,/mnt/0A2AAC152AABFBB7/sideProjects/deepLearnin...,LOCAL
1,3c317361783042b6bfdd4134e9a14703,852056537141572214,FINISHED,file:///mnt/0A2AAC152AABFBB7/sideProjects/deep...,2024-04-29 13:38:40.224000+00:00,2024-04-29 13:38:40.273000+00:00,1.111658,0.747996,9,relu,512,64,64,sincere-pig-226,luisggon,Base_0,/mnt/0A2AAC152AABFBB7/sideProjects/deepLearnin...,LOCAL
2,4a0d15ba59e64a50bbcd1a09a14e53b4,852056537141572214,FINISHED,file:///mnt/0A2AAC152AABFBB7/sideProjects/deep...,2024-04-29 13:38:36.748000+00:00,2024-04-29 13:38:36.780000+00:00,0.944035,0.794301,9,relu,512,64,64,valuable-roo-908,luisggon,Base_0,/mnt/0A2AAC152AABFBB7/sideProjects/deepLearnin...,LOCAL
3,372f37aac0674b8fad9ffe3c291e0f7d,852056537141572214,FINISHED,file:///mnt/0A2AAC152AABFBB7/sideProjects/deep...,2024-04-29 13:38:31.164000+00:00,2024-04-29 13:38:31.191000+00:00,1.000223,0.768923,9,relu,512,64,64,wise-smelt-437,luisggon,Base_0,/mnt/0A2AAC152AABFBB7/sideProjects/deepLearnin...,LOCAL
4,48805b853d714a85b449d11e7361052a,852056537141572214,FINISHED,file:///mnt/0A2AAC152AABFBB7/sideProjects/deep...,2024-04-29 13:38:26.850000+00:00,2024-04-29 13:38:26.877000+00:00,0.984594,0.771594,9,relu,512,64,64,puzzled-seal-143,luisggon,Base_0,/mnt/0A2AAC152AABFBB7/sideProjects/deepLearnin...,LOCAL
