In [1]:
from tqdm.notebook import tqdm
import tensorflow as tf
from tensorflow import keras
import threading
from threading import Thread
from multiprocessing.dummy import Pool as ThreadPool

((X_train, y_train), (X_test, y_test)) = keras.datasets.fashion_mnist.load_data()
y_train = keras.utils.to_categorical(y_train)
y_test = keras.utils.to_categorical(y_test)

def config_single_gpu(gpu):
    if isinstance(gpu, int):
        gpu = tf.config.list_physical_devices("GPU")[gpu]
    tf.config.experimental.set_memory_growth(gpu, True)
    tf.config.experimental.set_visible_devices(gpu, 'GPU')

class AttrDict(dict):
    __getattr__ = dict.__getitem__
    __setattr__ = dict.__setitem__
        
class Simulation:
    def __init__(self, lr, layers_params):
        self.lr = lr
        self.layers_params = layers_params
    
    def get_model(self):
        layers = [
            tf.keras.layers.Flatten(input_shape=(28, 28)),
        ]
        for params in self.layers_params:
            layers.append(
                tf.keras.layers.Dense(params.n, kernel_initializer="random_normal", bias_initializer="random_normal", activation="tanh")
            )
        layers.append(
            tf.keras.layers.Dense(10, kernel_initializer="random_normal", bias_initializer="random_normal", activation="softmax")
        )
        model = tf.keras.models.Sequential(layers)
        model.compile(
            loss="categorical_crossentropy",
            optimizer=tf.keras.optimizers.Adam(lr=self.lr),
            metrics=["accuracy"]
        )
        
        return model
    
    def run(self, train_ds, test_ds, runs, epochs, batch_size, output=[]):
        def fit_worker(*args):
            model = self.get_model()
            fit_feedback = model.fit(train_ds,
                                     validation_data=test_ds,
                                     batch_size=batch_size, 
                                     epochs=epochs,
                                     use_multiprocessing=True,
                                     verbose=0)
            acc =  fit_feedback.history["val_accuracy"][-1]
            return acc
        
        with ThreadPool() as p:
            acc = p.map(fit_worker, range(runs))
            
        out = tf.reduce_mean(acc).numpy()
        output.append(out)

class ThreadSimulations:
    def __init__(self, lr, layers_params):
        self.lr = lr
        self.layers_params = layers_params
        self.simulations = [Simulation(lr, lp) for lp in self.layers_params]
    
    def run(self, train_ds, test_ds, runs, epochs, batch_size, mean=True):
        outputs = [[] for _ in self.simulations]
        threads = [
            Thread(target=sim.run, args=(train_ds, test_ds, runs, epochs, batch_size, out)) for sim, out in zip(self.simulations, outputs)
        ]
        
        for t in threads:
            t.start()
        
        for t in threads:
            t.join()
        
        if mean:
            return [tf.reduce_mean(out).numpy() for out in outputs]
        return outputs

Question 1 : Hidden units
---

Take again the neural network you defined in question 4 of the previous task (one hidden layer with *tanh* activation, with a learning rate of $10^{-5}$

). Let's study the impact of the number of units in the hidden layer.

Build a model with 10 units in the hidden layer, one with $100$ units, and another one with $1000$ units. Which one performs best ?

Perform $10$ distinct runs (training + testing) for each model and average the results. Use $100$ epochs to fit each model.

Don't change anything in your network besides the number of hidden units.

Report the mean test accuracies of the three models using the format: *test_acc_10*, *test_acc_100*, *test_acc_1000*

In [2]:
N_RUNS = 10
N_EPOCHS = 100
BATCH_SIZE = 32
n_hiddens = [10, 100, 1000]

config_single_gpu(1)

train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).cache().batch(BATCH_SIZE).prefetch(-1)
test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test)).cache().batch(BATCH_SIZE).prefetch(-1)

layers_params = [[AttrDict(n=n)] for n in n_hiddens]

single_layer_simu = ThreadSimulations(1e-5, layers_params)
means = single_layer_simu.run(train_ds, test_ds, N_RUNS, N_EPOCHS, BATCH_SIZE)

print(f"test_acc_10, test_acc_100, test_acc_1000 :: ", ", ".join([f"{v:.3f}" for v in means]))

test_acc_10, test_acc_100, test_acc_1000 ::  0.743, 0.839, 0.867


Question 2 : Adding layers
---

In [53]:
from itertools import product, combinations
N_RUNS = 10
N_EPOCHS = 100
BATCH_SIZE = 32
n_hiddens = [10, 100, 1000]
n_layers = [1, 2, 3]

layers_params = []
for n_layer in n_layers:
    combi = list(product(n_hiddens, repeat=n_layer))
    for params in combi:
        layers_params.append(
            [AttrDict(n=n) for n in params]
        )

multi_layer_simu = ThreadSimulations(1e-5, layers_params)
means = multi_layer_simu.run(x_train, y_train, x_test, y_test, N_RUNS, N_EPOCHS, BATCH_SIZE)

In [52]:
best_mean_idx = tf.argmax(means).numpy()
print("Best params for layers :: ", *[a.n for a in layers_params[best_mean_idx]])

Best params for layers :: 
 1000 1000


In [2]:
!pwd

/home/romaingrx/Nextcloud/EPL/Q8/Cours/LINGI2262 - Machine Learning/LINGI2262-Projects/Assignment 4 - Deep Learning
