In [None]:
import os
import pandas as pd
import numpy as np
import h5py

import sys
sys.path.append('../')

from Models.CDBLSTM import CDBLSTM

from utils.DataPreparation import prepare_data
from utils.DataPreparation import scale_data
from utils.Evaluation import evaluate
from utils.Experiments import Settings, Experiment, read_all

import tensorflow as tf
tf.version.VERSION

In [None]:
# Data directory
hdf5_file = "../Data/datasets.h5" # SET CORRECT PATH TO DATASET FILE

# Coddora model directory
coddora_dir = '../Models/Coddora'

# CoddoraRW model directory
coddoraRW_dir = '../Models/CoddoraRW'

# Results
result_path = './Evaluation_Results/' # Choose where to save the evaluation results
os.mkdir(result_path) if not os.path.exists(result_path) else None # Create folder if it does not exist yet

In [None]:
# Datasets for evaluation 
# (note that the meeting room dataset by Stjelja et al. could not be published)

for k in ['Office_A', 'Stjelja']:
    df = pd.read_hdf(hdf5_file, k)
    print("{} ({} days)".format(k, len(df.Day.unique())))

## Define Metadata

In [None]:
metadata = {
    "Office_A" : {'volume': 77.5,  'infiltration': 0.0001,   'occupants':  2},
    "Home" :     {'volume':   40,  'infiltration': 0.0001,   'occupants':  2},
    "Stjelja" :  {'volume': 73.5,  'infiltration': 0.0001,   'occupants': 12},
} 

def scale_meta_features(meta_features):
    '''
    :param meta_features: feature array [_volume, infiltration, maxOccupants]
    '''
    meta_features[0] = scale_data(meta_features[0], min_domain=9.6, max_domain=400)
    meta_features[1] = scale_data(meta_features[1], min_domain=0.000085, max_domain=0.00085)
    meta_features[2] = scale_data(meta_features[2], min_domain=1, max_domain=12)
    return meta_features

## Sample Data Function

In [None]:
def sample_data(k, days, add_metadata=False):
    '''
    Prepares the data sequences and applies a training and test split.
    The passed number of days is used for training, all remaining days for testing.
    :param k:    key of the dataset in the hdf5 file
    :param days: number of days to be included in the training set, or 'all' to use all data for training
    :param add_medadata: boolean wheather metadata shall be added to the samples (or time sequences only) 
    '''
    
    df = pd.read_hdf(hdf5_file, k)
    
    if days == 'all':
        training_data = df
        x_train, y_train = prepare_data(training_data['CO2'].values, training_data['Occupancy'].values, 
                                        window_size=30, normalize='CO2', verbose=0)
        if add_metadata:
            meta_features = scale_meta_features(list(metadata[k].values()))
            x_train_meta = np.array([meta_features for d in range(0, len(x_train))])
            print("train", np.shape(x_train), np.shape(x_train_meta), np.shape(y_train))
            data = {'x_train': [x_train, x_train_meta], 'y_train': y_train}
        else:
            print("train", np.shape(x_train), np.shape(y_train))
            data = {'x_train': x_train, 'y_train': y_train}
        
    else:
        training_data = df[df.Day.isin(df.Day.unique()[:days])]
        test_data     = df[~df.Day.isin(df.Day.unique()[:days])]
        print(len(training_data), len(test_data))

        x_train, y_train = prepare_data(training_data['CO2'].values, training_data['Occupancy'].values, 
                                      window_size=30, normalize='CO2', verbose=0)
        x_test, y_test = prepare_data(test_data['CO2'].values, test_data['Occupancy'].values, 
                                      window_size=30, normalize='CO2', verbose=0)
        if add_metadata:
            meta_features = scale_meta_features(list(metadata[k].values()))
            x_train_meta = np.array([meta_features for d in range(0, len(x_train))])
            x_test_meta = np.array([meta_features for d in range(0, len(x_test))])
            print("train", np.shape(x_train), np.shape(x_train_meta), np.shape(y_train))
            print("test ", np.shape(x_test), np.shape(x_test_meta), np.shape(y_test))
            data = {'x_train': [x_train, x_train_meta], 
                    'y_train': y_train,
                    'x_test': [x_test, x_test_meta],
                    'y_test': y_test}
        else:
            print("train", np.shape(x_train), np.shape(y_train))
            print("test ", np.shape(x_test), np.shape(y_test))
            data = {'x_train': x_train, 'y_train': y_train, 'x_test': x_test, 'y_test': y_test}

    return data

## Office Evaluation

In [None]:
project_path = result_path + "Evaluation_Office"
k = "Office_A" # key for office room dataset

if not os.path.exists(project_path):
    os.makedirs(project_path)

for days in np.append(np.arange(1, 21), np.arange(25, 45, 5)):
    # each integer until 20, then intevals of 5
    
#for days in np.arange(45, 80, 5): # extended up to 75
    
    subproject = project_path + ("/experiments_{}d".format(days))

    if not os.path.exists(subproject):
        os.makedirs(subproject)

    subproject = subproject + ("/{}".format(k))

    if not os.path.exists(subproject):
        os.makedirs(subproject)
        
    settings = Settings(subproject, CDBLSTM, trials=20,
                    window_size=30, epochs=200, batch_size=128,
                    classes=2, features=1, metafeatures=3, 
                    validation_split=0.2,
                    verbose=1, initial_seed=0)
        
    # Baseline without transfer and without metadata

    print("Run experiment for training on target data only...")
    data = sample_data(k, days, add_metadata=False)
    Experiment(data, settings, save_as='targetOnly', pretrained_model_path=None).run()

    # Fine-Tuning Coddora
    
    data = sample_data(k, days, add_metadata=True)
    
    print("Run experiment using the pretrained base model Coddora...")
    Experiment(data, settings, save_as='Coddora', 
               pretrained_model_path=coddora_dir).run()
      
    # Fine-Tuning CoddoraRW
        
    print("Run experiment using the real-wolrd retrained base model CddoraRW")
    Experiment(data, settings, save_as='CoddoraRW', 
               pretrained_model_path=coddoraRW_dir).run()
    

## Meeting Room Evaluation

In [None]:
project_path = result_path + "Evaluation_Meeting_Room"
k = "Stjelja" # key for meeting rooms dataset

if not os.path.exists(project_path):
    os.makedirs(project_path)
    
for days in np.arange(1, 21):
    
    subproject = project_path + ("/experiments_{}d".format(days))

    if not os.path.exists(subproject):
        os.makedirs(subproject)

    subproject = subproject + ("/{}".format(k))

    if not os.path.exists(subproject):
        os.makedirs(subproject)
        
    settings = Settings(subproject, CDBLSTM, trials=20,
                    window_size=30, epochs=200, batch_size=128,
                    classes=2, features=1, metafeatures=3, 
                    validation_split=0.2,
                    verbose=1, initial_seed=0)
    
    # Baseline without transfer and without metadata

    print("Run experiment for training on target data only...")
    data = sample_data(k, days, add_metadata=False)
    Experiment(data, settings, save_as='targetOnly', pretrained_model_path=None).run()

    # Fine-Tuning Coddora
    
    data = sample_data(k, days, add_metadata=True)
    
    print("Run experiment using the pretrained base model Coddora...")
    Experiment(data, settings, save_as='Coddora', 
               pretrained_model_path=coddora_dir).run()
      
    # Fine-Tuning CoddoraRW
        
    print("Run experiment using the real-wolrd retrained base model CddoraRW")
    Experiment(data, settings, save_as='CoddoraRW', 
               pretrained_model_path=coddoraRW_dir).run()
    

## Transfer Learning Baseline

Using the Home dataset (50 days) for pretraining before fine-tuning with scarce target data

#### 1. Office Room

In [None]:
project_path = result_path + "Evaluation_Office"
key = "Office_A"
pretraining_key = "Home" 
pretrained_model_iterations_path = project_path + \
               "/experiments_1d/Office_A/Pretrained_Home_20/models"

if not os.path.exists(project_path):
    os.makedirs(project_path)

def run_transfer_experiment(days):
    print("Days:", days)
    
    subproject = project_path + ("/experiments_{}d".format(days))

    if not os.path.exists(subproject):
        os.makedirs(subproject)

    subproject = subproject + ("/{}".format(key))

    if not os.path.exists(subproject):
        os.makedirs(subproject)
        
    settings = Settings(subproject, CDBLSTM, trials=20,
                    window_size=30, epochs=200, batch_size=128,
                    classes=2, features=1, metafeatures=3, 
                    validation_split=0.2,
                    verbose=1, initial_seed=0)
        
    # Pretraining
    
    if days == 1:   # pretrained models are reused for fine-tuning with increased data amounts
        
        data_pre = sample_data(pretraining_key, 'all', add_metadata=False)
        print("pretraining...")
        settings.save_models = True
        Experiment(data_pre, settings, save_as='Pretrained_Home_20').run()
        
    # Fine-Tuning
    
    data = sample_data(key, days, add_metadata=False)
    print("fine-tuning...")
    settings.save_models = False
    Experiment(data, settings, save_as='FineTuned_Home_20', 
               pretrained_model_iterations_path=pretrained_model_iterations_path).run()
    
for days in np.append(np.arange(1, 21), np.arange(25, 45, 5)): 
    # each integer until 20, then intevals of 5
    run_transfer_experiment(days)

#### 2. Meeting Room

In [None]:
project_path = result_path + "Evaluation_Meeting_Room"
key = "Stjelja"
pretraining_key = "Home" 
pretrained_model_iterations_path = project_path + \
               "/experiments_1d/Stjelja/Pretrained_Home/models"

if not os.path.exists(project_path):
    os.makedirs(project_path)

for days in np.arange(1, 21):
    run_transfer_experiment(days)