In [None]:
import os
import pandas as pd
import numpy as np
import h5py
import dask.array as da
from datetime import datetime
import logging

from utils.DataPreparation import prepare_data
from utils.DataPreparation import scale_data
from utils.DataGenerator import Generator

from Models.CDBLSTM import CDBLSTM

import tensorflow as tf
tf.version.VERSION

In [None]:
# Data directory
h5_path = "./prepared_samples.h5" # SET CORRECT PATH TO PREPARED TRAINING SAMPLES

# Model directory
project_dir = "Full_Model/" # Choose where to save the trained model
os.mkdir(project_dir) if not os.path.exists(project_dir) else None # Create folder if it does not exist yet
os.mkdir(project_dir + "/Model_Checkpoint") if not os.path.exists(project_dir + "/Model_Checkpoint") else None

## Read Data

In [None]:
h5_file = h5py.File(h5_path, 'r')

x_train      = da.from_array(h5_file['x_train_timeseries'], chunks=100000)
x_train_meta = da.from_array(h5_file['x_train_metadata'], chunks=100000)
y_train      = da.from_array(h5_file['y_train'], chunks=100000)

x_val      = da.from_array(h5_file['x_val_timeseries'], chunks=100000)
x_val_meta = da.from_array(h5_file['x_val_metadata'], chunks=100000)
y_val      = da.from_array(h5_file['y_val'], chunks=100000)

## Prepare

In [None]:
# drop incomplete batches according to applied batch size of 512

train_data_amount = y_train.shape[0] // 512 * 512
val_data_amount = y_val.shape[0] // 512 * 512
print("training samples: {}, batches: {}".format(train_data_amount, int(train_data_amount / 512)))
print("validation samples: {}, batches: {}".format(val_data_amount, int(val_data_amount / 512)))

In [None]:
# define data generators for training process

generator_train = Generator(x_train[:train_data_amount], 
                            x_train_meta[:train_data_amount], 
                            y_train[:train_data_amount], 
                            batch_size=512, logfile=project_dir+'/datagenerator.log')
generator_val   = Generator(x_val[:val_data_amount], 
                            x_val_meta[:val_data_amount], 
                            y_val[:val_data_amount], 
                            batch_size=512)

print(len(generator_train), len(generator_val))

In [None]:
# define model architecture

model = CDBLSTM(classes=2, features=1, metafeatures=3, window_size=30, batch_size=512)

In [None]:
# apply early stopping, checkpointing and logging

class LogEndOfTraining(tf.keras.callbacks.Callback):
    def __init__(self, filename):
        self.filename = filename
    def on_train_end(self, logs=None):
        with open(self.filename, 'a') as f:
            f.write(str(datetime.now()) + "  Training completed.\n")

cb = [tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', 
                                        patience=5, min_delta=0.00001,
                                        verbose=1, restore_best_weights=True),
      tf.keras.callbacks.ModelCheckpoint(project_dir + "/Model_Checkpoint",
         monitor='val_loss', save_best_only=False, save_weights_only=False, verbose=1),
      tf.keras.callbacks.CSVLogger(project_dir + "/training_log.csv", append=True),
      LogEndOfTraining(project_dir + "/end_of_training.log")]

## Run Model Training

In [None]:
try:
    model.fit(generator_train,
           validation_data=generator_val,
            epochs=100, callbacks=cb, shuffle=True,
            workers=3, use_multiprocessing=True)
except Exception as e:
    logging.basicConfig(filename=project_dir + "/error.log", level=logging.INFO, 
                            format='%(asctime)s - %(levelname)s - %(message)s')
    logging.error(f"Training failed: {e}")