# Experimenting with Deep Learning and Reuters dataset

# Reuters dataset. Experiment 2

This first experiment consist in evaluate the performance of a NN with three hidden layers on the Reuters classification problem. We have selected `MlFlow` to track the performance variation an assess the models.

In [None]:
# imports
import os

import pandas as pd
import tensorflow as tf
import numpy as np
import mlflow

from mlflow.tracking import MlflowClient
from tensorflow import keras
from keras import layers

In [None]:
# import local modules
import sys
sys.path.append('/mnt/0A2AAC152AABFBB7/sideProjects/deepLearning')
from deeplearning.deep_utils import reuters_model_four_layers

Mounted at /content/drive


In [None]:
# Loading the Reuters dataset

(train_data, train_labels), (test_data, test_labels) = tf.keras.datasets.reuters.load_data(num_words=10000)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/reuters.npz


In [None]:
# Encoding the integer sequences via multi-hot encoding

def vectorize_sequences(sequences, dimension=10000):

    results = np.zeros((len(sequences), dimension))

    for i, sequence in enumerate(sequences):
        for j in sequence:
            results[i, j] = 1.
    return results

# vectorize training and test data

x_train = vectorize_sequences(train_data)
x_test = vectorize_sequences(test_data)

# vectorize training and test labels

y_train = tf.keras.utils.to_categorical(train_labels)
y_test = tf.keras.utils.to_categorical(test_labels)

# Setting aside a validation set

x_val = x_train[:1000]
partial_x_train = x_train[1000:]
y_val = y_train[:1000]
partial_y_train = y_train[1000:]

In [None]:
# Initialize client and experiment

EXPERIMENT_NAME = "deepLearning_reuters_dataset"
client = MlflowClient()
mlflow.set_experiment(EXPERIMENT_NAME)
exp = client.get_experiment_by_name(EXPERIMENT_NAME)

In [None]:
layer_units = [
    [64, 64, 64, 46], [32, 32, 32, 46], [128, 128, 46], [8, 8, 8, 46]
]

In [None]:
for units in layer_units:

    model, history = reuters_model_four_layers(
        units=units,
        activ_func="relu",
        X_train=partial_x_train,
        y_train=partial_y_train,
        X_val=x_val,
        y_val=y_val
    )

    hist_dict = history.history
    test_loss, test_accuracy = model.evaluate(x_test, y_test)

    with mlflow.start_run():
        mlflow.set_tag("model", "Base_0")
        mlflow.log_param("units_1st_layer", units[0])
        mlflow.log_param("units_2nd_layer", units[1])
        mlflow.log_param("units_3rd_layer", units[2])
        mlflow.log_param("hidden_activation", 'relu')
        mlflow.log_param("epochs", 9)
        mlflow.log_param("batch_size", 512)
        mlflow.log_metric("accuracy", test_accuracy)
        mlflow.log_metric("loss", test_loss)

In [None]:
runs = mlflow.search_runs()
PATH_TO_RUNS = "/mnt/0A2AAC152AABFBB7/sideProjects/deepLearning/mlflow_runs"
runs.to_csv(
    os.path.join(PATH_TO_RUNS, "reuters_runs_1.csv")
)
runs
runs

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.accuracy,metrics.loss,params.units_1st_layer,params.batch_size,params.units_3rd_layer,params.hidden_activation,params.epochs,params.units_2nd_layer,tags.mlflow.runName,tags.mlflow.source.name,tags.mlflow.source.type,tags.model,tags.mlflow.user
0,453b3d4366ea44619e961b19a65d1495,0,FINISHED,file:///content/mlruns/0/453b3d4366ea44619e961...,2024-02-22 15:44:35.220000+00:00,2024-02-22 15:44:35.239000+00:00,0.777827,1.06651,128,512,128,relu,9,128,flawless-wren-237,/usr/local/lib/python3.10/dist-packages/colab_...,LOCAL,Base_0,root
1,46294537475541b5aadea7e89c08bd53,0,FINISHED,file:///content/mlruns/0/46294537475541b5aadea...,2024-02-22 15:44:11.927000+00:00,2024-02-22 15:44:11.945000+00:00,0.580142,1.818586,8,512,8,relu,9,8,wise-wasp-944,/usr/local/lib/python3.10/dist-packages/colab_...,LOCAL,Base_0,root
2,f7cf0147555341749f1d9107724c4c05,0,FINISHED,file:///content/mlruns/0/f7cf0147555341749f1d9...,2024-02-22 15:44:05.142000+00:00,2024-02-22 15:44:05.160000+00:00,0.739982,1.126988,32,512,32,relu,9,32,fortunate-hound-332,/usr/local/lib/python3.10/dist-packages/colab_...,LOCAL,Base_0,root
3,8cb9549dd83645cf8e71e2d7a2874e63,0,FINISHED,file:///content/mlruns/0/8cb9549dd83645cf8e71e...,2024-02-22 15:43:52.375000+00:00,2024-02-22 15:43:52.394000+00:00,0.769368,1.096069,64,512,64,relu,9,64,wistful-trout-870,/usr/local/lib/python3.10/dist-packages/colab_...,LOCAL,Base_0,root


The addition of a third layer do not improve the results obtained with only two layers.