In [None]:
import os, sys

if os.path.abspath(".") == "/content":  # google colab
    %pip install sktime keras==2.8.0 tensorflow==2.8.2 keras-nlp==0.4.0 mlflow GitPython tensorflow_addons==0.17.1 databricks-cli  --quiet
    from google.colab import drive
    drive.mount("/content/drive")

    os.chdir("/content/drive/My Drive/master-thesis/src")

In [None]:
import sys, os
from models import Encoder_model
from preprocessing import ConstantLengthDataGenerator, plot
from mlflow_logging import MlFlowLogging
from reading import ConcatenatedDataset
import numpy as np
import tensorflow.keras as keras
import matplotlib.pyplot as plt
import sklearn
import shutil
from sklearn.model_selection import train_test_split
import mlflow
import tensorflow as tf
import logging

In [None]:
mlflow.set_experiment("Encoder")

In [None]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)
mlflow_logging = MlFlowLogging()
mlflow.tensorflow.autolog()

In [None]:
category = "MOTION"
X, y = ConcatenatedDataset().read_dataset(dataset="ECG200")

In [None]:
y_encoder = sklearn.preprocessing.OneHotEncoder(categories="auto")
y = y_encoder.fit_transform(y.reshape(-1, 1)).toarray()
mlflow.log_param("y.shape", y.shape)
y.shape, X.shape

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.25, stratify=y)

# Model

In [None]:
number_of_classes = y.shape[1]
output_directory = f"./data/models/encoder/category={category}"
batch_size = 256
input_length = 256
os.makedirs(output_directory, exist_ok=True)

In [None]:
input_layer = keras.layers.Input(shape=(input_length, 1))
fcn_model = Encoder_model(number_of_classes=y.shape[1])(input_layer)
model = keras.models.Model(inputs=input_layer, outputs=fcn_model)

lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=1e-4, decay_steps=int(1e6), decay_rate=.5
)

callbacks = []
callbacks.append(
    tf.keras.callbacks.ModelCheckpoint(
        filepath=output_directory, monitor="val_accuracy", save_best_only=True
    )
)

model.compile(
    loss="categorical_crossentropy",
    optimizer=keras.optimizers.Adam(lr_schedule),
    metrics=["accuracy"],
)

In [None]:
model.summary()

In [None]:
kwargs = {
    "min_length": input_length,
    "max_length": input_length,
    "logging_call": mlflow.log_params,
}
data_generator_train = ConstantLengthDataGenerator(
    X_train,
    y_train,
    batch_size=batch_size,
    augmentation_probability=0.2,
    cutting_probability=0.2,
    padding_probability=0.5,
    **kwargs
)
data_generator_val = ConstantLengthDataGenerator(
    X_val,
    y_val,
    batch_size=len(y_val),
    augmentation_probability=0,
    cutting_probability=0,
    padding_probability=1,
    **kwargs
)
validation_data = next(data_generator_val)

In [None]:
history = model.fit(
    data_generator_train,
    epochs=5,
    validation_data=validation_data,
    callbacks=callbacks,
)

# Logging the results

In [None]:
summary, loss, acc = mlflow_logging.log_history(history.history)
mlflow.log_figure(summary, "summary.txt")
mlflow.log_figure(loss, "loss.png")
mlflow.log_figure(acc, "acc.png")

In [None]:
conf_matrix, conf_summary = mlflow_logging.log_confusion_matrix(*validation_data, classifier=model, y_encoder=y_encoder)
mlflow.log_figure(conf_matrix, "conf_matrix.png")
mlflow.log_text(conf_summary, "conf_summary.txt")

In [None]:
examples = mlflow_logging.log_example_data(*next(data_generator_train), encoder=y_encoder)
mlflow.log(examples, "example_data.png")

In [None]:
mlflow.end_run()