In [2]:
import sys
import json
from pathlib import Path

import numpy as np
import pandas as pd
import tensorflow as tf
from tqdm.auto import tqdm
from tqdm.keras import TqdmCallback
sys.path.append("../")
from src.Evaluation import plot_model_history
from src.ModelBuilder import get_MLP, get_MCDCNN, get_Time_CNN, get_FCN, get_Encoder, get_Resnet
from src.LoadData import get_all_datasets_test_train_np_arrays
from src.Helpers import append_to_csv
from src.Helpers import append_to_csv, get_confusion_matrix_for_model_and_data, visualize_confusion_matrix

2023-01-24 11:31:04.598461: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-01-24 11:31:07.322476: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-01-24 11:31:08.598749: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /opt/bwhpc/common/devel/cuda/11.8/lib64
2023-01-24 11:31:08.598821: W tensorflow/compil

In [3]:
path_to_datasets = "../datasets"

In [13]:
datasets_test_train_data = get_all_datasets_test_train_np_arrays(path_to_datasets)

In [14]:
models_getter = [get_MLP, get_MCDCNN, get_Time_CNN, get_FCN, get_Encoder, get_Resnet]
models_names = ["MLP", "MCDCNN", "Time_CNN", "FCN", "Encoder", "Resnet"]

In [8]:
# tf.debugging.set_log_device_placement(True)
# devices = tf.config.list_logical_devices('GPU') # Use this to run training just on GPUs
devices = tf.config.list_logical_devices()
strategy = tf.distribute.MirroredStrategy(devices)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)


2023-01-24 11:31:35.948908: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /opt/bwhpc/common/devel/cuda/11.8/lib64
2023-01-24 11:31:35.948938: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2023-01-24 11:31:35.948955: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (uc2n368.localdomain): /proc/driver/nvidia/version does not exist
2023-01-24 11:31:36.019995: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate co

In [9]:
devices

[LogicalDevice(name='/device:CPU:0', device_type='CPU')]

In [10]:
path_persist_results = "./training_res.csv"
if not (csv_path := Path(path_persist_results)).exists():
    csv_path.touch()
if len(csv_path.read_text()) == 0:
    # Only write if the file is empty
    columns = ["dataset_name", "model_name", "test_loss", "test_acc", "confusion_matrix", "history"]
    append_to_csv(path_persist_results, columns)

In [11]:
epochs = 30
batch_size = 10
validation_split = 0.2

In [12]:
# with strategy.scope():
for ds_name, ds_data in tqdm(datasets_test_train_data.items(), unit='dataset'):
    print("Dataset name: ", ds_name)
    x_test, y_test = ds_data["test_data"]
    x_train, y_train = ds_data["train_data"]

    input_size = x_train.shape[1]
    output_size = len(np.unique(y_train))

    for get_model, model_name in tqdm(list(zip(models_getter, models_names)), unit='model', desc=f'Train on "{ds_name}"'):
        print("Model name: ", model_name)
        model = get_model(input_size, output_size)
        model.compile(optimizer='SGD',
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])
        history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=validation_split, callbacks=[TqdmCallback(verbose=0, desc=model_name)], verbose=0)
        test_loss, test_acc = model.evaluate(x_test, y_test)
        
        confusion_matrix = get_confusion_matrix_for_model_and_data(model, x_test, y_test)
        #visualize_confusion_matrix(confusion_matrix, model_name, ds_name)

        row = [ds_name,
              model_name,
              test_loss,
              test_acc,
              json.dumps(confusion_matrix.tolist()), 
              json.dumps(history.history)]
        append_to_csv(path_persist_results, row)

  0%|          | 0/2 [00:00<?, ?dataset/s]

Dataset name:  dodger_loop_day


Train on "dodger_loop_day":   0%|          | 0/1 [00:00<?, ?model/s]

Model name:  Resnet


Resnet: 0epoch [00:00, ?epoch/s]

Dataset name:  arrow_head


Train on "arrow_head":   0%|          | 0/1 [00:00<?, ?model/s]

Model name:  Resnet


Resnet: 0epoch [00:00, ?epoch/s]



array([[286,   0,  14],
       [ 11, 287,   0],
       [158,   0, 144]])

array([[286,   0,  14],
       [ 11, 287,   0],
       [158,   0, 144]])

# Example load training results and display evaluation results

In [None]:
df_training_res = pd.read_csv(path_persist_results)

In [None]:
df_training_res

In [None]:
history = df_training_res['history'][0]

In [None]:
plot_model_history(json.loads(history), epochs=epochs)