In [None]:
import os
import pandas as pd
import numpy as np
import h5py
import dask.array as da
from datetime import datetime
import logging

import sys
sys.path.append('../')

from utils.DataPreparation import prepare_data
from utils.DataPreparation import scale_data
from utils.DataGenerator import Generator

import tensorflow as tf
tf.version.VERSION

In [None]:
# Data directory
hdf5_file = "../Data/datasets.h5" # SET CORRECT PATH TO DATASET FILE

# Base model directory
base_model = '../Models/Coddora' # SET CORRECT PATH TO CODDORA MODEL

# Resulting model directory
project_dir = '../Models/CoddoraRW/'  # Choose where to save the retrained model
os.mkdir(project_dir) if not os.path.exists(project_dir) else None # Create folder if it does not exist yet
os.mkdir(project_dir + '/Model_Checkpoint') if not os.path.exists(project_dir + '/Model_Checkpoint') else None

In [None]:
k = 'Home' # key for the Home dataset
df = pd.read_hdf(hdf5_file, k)
print("{} ({} days)".format(k, len(df.Day.unique())))

## Define Metadata

In [None]:
metadata = {
    "Office_A" : {'volume': 77.5,  'infiltration': 0.0001,   'occupants':  2},
    "Home" :     {'volume':   40,  'infiltration': 0.0001,   'occupants':  2},
    "Stjelja" :  {'volume': 73.5,  'infiltration': 0.0001,   'occupants': 12},
} 

def scale_meta_features(meta_features):
    '''
    :param meta_features: feature array [_volume, infiltration, maxOccupants]
    '''
    meta_features[0] = scale_data(meta_features[0], min_domain=9.6, max_domain=400)
    meta_features[1] = scale_data(meta_features[1], min_domain=0.000085, max_domain=0.00085)
    meta_features[2] = scale_data(meta_features[2], min_domain=1, max_domain=12)
    return meta_features

## Sample Data

In [None]:
def sample_data(k, days, add_metadata=False):
    
    df = pd.read_hdf(hdf5_file, k)
    training_data = df[df.Day.isin(df.Day.unique()[:days])]
    val_data     = df[~df.Day.isin(df.Day.unique()[:days])]
    print(len(training_data), len(val_data))

    x_train, y_train = prepare_data(training_data['CO2'].values, training_data['Occupancy'].values, 
                                  window_size=30, normalize='CO2', verbose=0)
    x_val, y_val = prepare_data(val_data['CO2'].values, val_data['Occupancy'].values, 
                                  window_size=30, normalize='CO2', verbose=0)
    if add_metadata:
        meta_features = scale_meta_features(list(metadata[k].values()))
        x_train_meta = np.array([meta_features for d in range(0, len(x_train))])
        x_val_meta = np.array([meta_features for d in range(0, len(x_val))])
        print("train", np.shape(x_train), np.shape(x_train_meta), np.shape(y_train))
        print("val ", np.shape(x_val), np.shape(x_val_meta), np.shape(y_val))
        data = {'x_train': [x_train, x_train_meta], 
                'y_train': y_train,
                'x_val': [x_val, x_val_meta],
                'y_val': y_val}
    else:
        print("train", np.shape(x_train), np.shape(y_train))
        print("val ", np.shape(x_val), np.shape(y_val))
        data = {'x_train': x_train, 'y_train': y_train, 'x_val': x_val, 'y_val': y_val}

    return data

In [None]:
# prepare 45 days for training, 5 days for validation

data = sample_data('Home', 45, add_metadata=True)

## Prepare

In [None]:
# drop incomplete batches according to applied batch size of 128

train_data_amount = data['y_train'].shape[0] // 128 * 128
val_data_amount = data['y_val'].shape[0] // 128 * 128
print("training samples: {}, batches: {}".format(train_data_amount, int(train_data_amount / 128)))
print("validation samples: {}, batches: {}".format(val_data_amount, int(val_data_amount / 128)))

In [None]:
x_train      = data['x_train'][:train_data_amount][0]
x_train_meta = data['x_train'][:train_data_amount][1]
y_train      = data['y_train'][:train_data_amount]

x_val        = data['x_val'][:val_data_amount][0]
x_val_meta   = data['x_val'][:val_data_amount][1]
y_val        = data['y_val'][:val_data_amount]

print("training:",   np.shape(x_train), np.shape(x_train_meta), np.shape(y_train))
print("validation:", np.shape(x_val), np.shape(x_val_meta), np.shape(y_val))

In [None]:
# apply early stopping, checkpointing and logging

class LogEndOfTraining(tf.keras.callbacks.Callback):
    def __init__(self, filename):
        self.filename = filename
    def on_train_end(self, logs=None):
        with open(self.filename, 'a') as f:
            f.write(str(datetime.now()) + "  Training completed.\n")

cb = [tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', 
                                        patience=5, min_delta=0.00001,
                                        verbose=1, restore_best_weights=True),
      tf.keras.callbacks.ModelCheckpoint(project_dir + "/Model_Checkpoint",
         monitor='val_loss', save_best_only=True, save_weights_only=False, verbose=1),
      tf.keras.callbacks.CSVLogger(project_dir + "/training_log.csv", append=True),
      LogEndOfTraining(project_dir + "/end_of_training.log")]

In [None]:
# load base model

model = tf.keras.models.load_model(base_model)

In [None]:
# set seed value

tf.random.set_seed(0)

## Run Model Training

In [None]:
try:
    model.fit([x_train, x_train_meta], y_train, 
              validation_data=([x_val, x_val_meta], y_val),
              epochs=100, batch_size=128, callbacks=cb, shuffle=True)
except Exception as e:
    logging.basicConfig(filename=project_dir + "/error.log", level=logging.INFO, 
                            format='%(asctime)s - %(levelname)s - %(message)s')
    logging.error(f"Training failed: {e}")