In [1]:
import os, sys

if os.path.abspath(".") == "/content":  # google colab
    %pip install sktime keras==2.8.0 tensorflow==2.8.2 keras-nlp==0.4.0 mlflow==2.1.1 GitPython tensorflow_addons --user 1> /dev/null
    from git import Repo, GitCommandError
    import mlflow
    import shutil
    from google.colab import drive

    drive.mount("/content/drive")
    try:
        secret = open("/content/drive/My Drive/master-thesis/gh-secret.txt").read()
        Repo.clone_from(
            f"https://colab:{secret}@github.com/PaulinaPacyna/master-thesis.git",
            "/content/master-thesis",
        )
    except GitCommandError as e:
        print(e)

    os.src

    data_dir = "/content/drive/My Drive/master-thesis"
    curdir = "/content/master-thesis/src"
    tracking_uri = f"file://{data_dir}/mlflow"
else:
    curdir = "."
    data_dir = "./data"
    tracking_uri = None

In [2]:
import sys, os
from models import Encoder_model
from preprocessing import ConstantLengthDataGenerator, plot
from mlflow_logging import MlFlowLogging
from reading import ConcatenatedDataset
import numpy as np
import tensorflow.keras as keras
import matplotlib.pyplot as plt
import sklearn
import shutil
from sklearn.model_selection import train_test_split
import mlflow
import tensorflow as tf
import logging

In [3]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)
mlflow_logging = MlFlowLogging()
mlflow.set_tracking_uri(tracking_uri)
mlflow.set_experiment("Encoder")
mlflow.tensorflow.autolog()

In [4]:
category = "MOTION"

X, y = ConcatenatedDataset().read_dataset(dataset="ECG200")

INFO:root:Loading only one dataset: ECG200
INFO:root:Loading only one dataset: ECG200


In [5]:
y_encoder = sklearn.preprocessing.OneHotEncoder(categories="auto")
y = y_encoder.fit_transform(y.reshape(-1, 1)).toarray()
mlflow.log_param("y.shape", y.shape)
y.shape, X.shape

((200, 2), (200,))

In [6]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.25, stratify=y)

# Model

In [7]:
number_of_classes = y.shape[1]
output_directory = f"{data_dir}/data/models/encoder/category={category}"
batch_size = 256
input_length = 256
os.makedirs(output_directory, exist_ok=True)

In [8]:
input_layer = keras.layers.Input(shape=(input_length, 1))
fcn_model = Encoder_model(number_of_classes=y.shape[1])(input_layer)
model = keras.models.Model(inputs=input_layer, outputs=fcn_model)

lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=1e-4, decay_steps=3, decay_rate=1
)

callbacks = []
callbacks.append(
    tf.keras.callbacks.ModelCheckpoint(
        filepath=output_directory, monitor="val_accuracy", save_best_only=True
    )
)

model.compile(
    loss="categorical_crossentropy",
    optimizer=keras.optimizers.Adam(lr_schedule),
    metrics=["accuracy"],
)

In [9]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 256, 1)]     0           []                               
                                                                                                  
 conv1d (Conv1D)                (None, 256, 128)     768         ['input_1[0][0]']                
                                                                                                  
 instance_normalization (Instan  (None, 256, 128)    256         ['conv1d[0][0]']                 
 ceNormalization)                                                                                 
                                                                                                  
 p_re_lu (PReLU)                (None, 256, 128)     128         ['instance_normalization[0][0

In [10]:
kwargs = {
    "min_length": input_length,
    "max_length": input_length,
    "logging_call": mlflow.log_params,
}
data_generator_train = ConstantLengthDataGenerator(
    X_train,
    y_train,
    batch_size=batch_size,
    augmentation_probability=0.2,
    cutting_probability=0.2,
    padding_probability=0.5,
    **kwargs
)
data_generator_val = ConstantLengthDataGenerator(
    X_val,
    y_val,
    batch_size=len(y_val),
    augmentation_probability=0,
    cutting_probability=0,
    padding_probability=1,
    **kwargs
)
validation_data = next(data_generator_val)

In [11]:
history = model.fit(
    data_generator_train,
    epochs=2,
    validation_data=validation_data,
    callbacks=callbacks,
)

Epoch 1/2



INFO:tensorflow:Assets written to: ./data/data/models/encoder\category=MOTION\assets


INFO:tensorflow:Assets written to: ./data/data/models/encoder\category=MOTION\assets


Epoch 2/2




INFO:tensorflow:Assets written to: C:\Users\pacyn\AppData\Local\Temp\tmpcmmx2u47\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\pacyn\AppData\Local\Temp\tmpcmmx2u47\model\data\model\assets


# Logging the results

In [None]:
mlflow_logging.log_confusion_matrix(*validation_data, classifier=model, y_encoder=y_encoder)
mlflow_logging.log_history(
    history.history, 
)
mlflow_logging.log_example_data(*next(data_generator_train), encoder=y_encoder)

In [23]:
mlflow.end_run()