In [1]:
import os
import pandas as pd
import numpy as np
import datetime

from utils.Logger import Logger
from utils.Experiments import Data, DataDomainwise, Settings, TrainOnce, PretrainingFinetuning, DomainAdversarialLearning
from utils.DataPreparation import prepare_data
from utils.Evaluation import evaluate

from Models.CDBLSTM import CDBLSTM
from Models.DACDBLSTM import DACDBLSTM

from sklearn.model_selection import train_test_split
import tensorflow as tf
tf.version.VERSION

'2.6.0'

In [4]:
project_dir = "../../3_Results/Experiments_1day_20/"
os.mkdir(project_dir) if not os.path.exists(project_dir) else None

In [5]:
logger = Logger("../../3_Results", "Experiments_1day_20", "logfile_with_selected_candanedo_data_S2.log")
logger.activate_logging()

Logging to ../../3_Results/Experiments_1day_20/logfile_with_selected_candanedo_data_S2.log


In [6]:
hdf5_file = "../../1_Data/datasets.h5"

target_dataset_names = ['Office_A', 'Office_B', 'Home', 'Stjelja']
source_dataset_names = ['Candanedo_Selected_S1']

# Note that the Stjelja dataset was not published along with this script and needs to be removed to run it.

#### Select only reliable sections of the Candanedo data

In [11]:
df = pd.read_hdf(hdf5_file, 'Candanedo')

# remove unexplainable behavior
df = df[df.Day != '2015-02-12']
df = df[df.Day != '2015-02-16']
df = df[df.Day != '2015-02-17']

# remove slowly decaying CO2 despite occupancy
df = df[(df.Day != '2015-02-03') | (df.Time < datetime.time(11, 23)) | (df.Time > datetime.time(13, 10))]   
df = df[(df.Day != '2015-02-03') | (df.Time < datetime.time(16, 50)) | (df.Time > datetime.time(18, 13))]   
df = df[(df.Day != '2015-02-05') | (df.Time < datetime.time(16, 49)) | (df.Time > datetime.time(18,  5))]   
df = df[(df.Day != '2015-02-06') | (df.Time < datetime.time(12, 35)) | (df.Time > datetime.time(12, 54))]   
df = df[(df.Day != '2015-02-06') | (df.Time < datetime.time(16, 59)) | (df.Time > datetime.time(18,  7))]  
df = df[(df.Day != '2015-02-09') | (df.Time < datetime.time(12,  4)) | (df.Time > datetime.time(13, 12))]  
df = df[(df.Day != '2015-02-09') | (df.Time < datetime.time(17, 24)) | (df.Time > datetime.time(18,  4))]  
df = df[(df.Day != '2015-02-13') | (df.Time < datetime.time(11, 21)) | (df.Time > datetime.time(13,  1))]  
df = df[(df.Day != '2015-02-13') | (df.Time < datetime.time(17, 10)) | (df.Time > datetime.time(18,  6))]  

# prepare data
x, y = prepare_data(df['CO2'].values, df['Occupancy'].values, splitAt=None, window_size=30)
print(np.shape(x))
print(np.shape(y))

no train-test split applied
train: (6546, 30, 1) (6546, 1)
6546 training samples
data normalized to range [0, 1]
train: (6528, 30, 1) (6528, 1)
(6528, 30, 1)
(6528, 1)


#### Use 80% for source training, 20% for source validaion

In [12]:
x_src_train = x[:4275]
x_src_val = x[4275:]
y_src_train = y[:4275]
y_src_val = y[4275:]
print("x_src_train:", np.shape(x_src_train))
print("y_src_train:", np.shape(y_src_train))
print("x_src_val:", np.shape(x_src_val))
print("y_src_val:", np.shape(y_src_val))

x_src_train: (4275, 30, 1)
y_src_train: (4275, 1)
x_src_val: (2253, 30, 1)
y_src_val: (2253, 1)


#### Experiment Settings

In [13]:
training_days = 1 # number of days from target data used for training
window_size = 30  # length of each input sample passed to the model
trials = 20       # number of repetitions for each experiment
epochs = 1000     # maximum number of epochs if early stopping does not occur   
initial_seed = 0  # seed value of first trial; seeds are then incremented by one with each trial

## Run Experiments

In [None]:
def run_experiments():

    for target in target_dataset_names:
        for source in source_dataset_names:
            if target == source:
                continue
                
            # Preparation
            subproject = source + "->" + target
            project_subdir = project_dir + subproject + "/"
            os.mkdir(project_subdir) if not os.path.exists(project_subdir) else None
            print(subproject)
            
            dataset_tar = pd.read_hdf(hdf5_file, target)

            x_train_raw, x_test_raw, y_train_raw, y_test_raw = train_test_split(dataset_tar['CO2'].values, 
                                                                         dataset_tar['Occupancy'].values, 
                                                                         test_size=0.2, shuffle=False)
            # For testing, we only use the last 20% of each dataset, which were held out during hyperparameter tuning
            print("Preparing test data:") # Test data for all transfer methods
            x_test, y_test = prepare_data(x_test_raw, y_test_raw, window_size=window_size)
            

            # Target Only Training
            print("Target Only Training...")
            x_train, y_train, x_val, y_val, _, _ = prepare_data(x_train_raw, y_train_raw, 
                                                                splitAt=[training_days*1440, training_days*1440+1440], 
                                                                window_size=window_size)

            path = project_subdir + "targetOnly"
            data = Data(x_train, y_train, x_val, y_val, x_test, y_test)
            settings = Settings(path, CDBLSTM, trials=trials, epochs=epochs, verbose=2, initial_seed=initial_seed)
            TrainOnce(data, settings).run()

            # Domain-Adversarial Learning
            print("Domain-Adversarial Learning (Domain Classifier Position 1)...")
            domain_labels = np.concatenate((np.zeros(len(y_train)).reshape(-1, 1), np.ones(len(y_src_train)).reshape(-1, 1)), axis=0)
            y_train_DA = (np.concatenate((y_train, y_src_train), axis=0), domain_labels)
            x_train_DA = np.concatenate((x_train, x_src_train), axis=0)
            y_val_DA   = (y_val, np.zeros(len(y_val)).reshape(-1, 1))
            x_val_DA   = x_val      
            print(np.shape(y_train_DA), np.shape(x_train_DA))
            print(np.shape(y_val_DA), np.shape(x_val_DA))
            path = project_subdir + "DA_Pos1"
            data = Data(x_train_DA, y_train_DA, x_val_DA, y_val_DA, x_test, y_test)
            settings = Settings(path, DACDBLSTM, trials=trials, epochs=epochs, verbose=2, initial_seed=initial_seed)
            DomainAdversarialLearning(data, settings, domain_clf_position=1, save_as='DA_Pos1').run()

            print("Domain-Adversarial Learning (Domain Classifier Position 2)...")
            path = project_subdir + "DA_Pos2"
            settings = Settings(path, DACDBLSTM, trials=trials, epochs=epochs, verbose=2, initial_seed=initial_seed)
            DomainAdversarialLearning(data, settings, domain_clf_position=2, save_as='DA_Pos2').run()

            # Pretraining & Fine-Tuning
            print("Pretraining & Fine-Tuning...")
            x_tar_train, y_tar_train, x_tar_val, y_tar_val, _, _ = \
                                        prepare_data(x_train_raw, y_train_raw,
                                                     splitAt=[training_days*1440, training_days*1440+1440], 
                                                     window_size=window_size)
            path = project_subdir + "pretrainingFineTuning"
            data = DataDomainwise(x_tar_train, y_tar_train, x_tar_val, y_tar_val, 
                                  x_src_train, y_src_train, x_src_val, y_src_val, x_test, y_test)
            settings = Settings(path, CDBLSTM, trials=trials, epochs=epochs, verbose=2, initial_seed=initial_seed)
            PretrainingFinetuning(data, settings).run()

            print("Pretraining & Fine-Tuning (with source and target data)...")
            path = project_subdir + "pretrainingFineTuning_src_and_target"
            x_fine_tune = np.append(x_tar_train, x_src_train[:(int(len(x_tar_train)))], axis=0)
            y_fine_tune = np.append(y_tar_train, y_src_train[:(int(len(y_tar_train)))], axis=0)
            print("Fine-Tune on:", np.shape(x_fine_tune), np.shape(y_fine_tune))
            data_plus = DataDomainwise(x_fine_tune, y_fine_tune, x_tar_val, y_tar_val, x_src_train, y_src_train, x_src_val, y_src_val, x_test, y_test)
            settings = Settings(path, CDBLSTM, trials=trials, epochs=epochs, verbose=2, initial_seed=initial_seed)
            PretrainingFinetuning(data_plus, settings, 
                                  save_as = ["pretrainingFineTuning_src_tar_pre", 
                                             "pretrainingFineTuning_src_tar_fine"]).run()

            # Layer-Freezing
            print("Layer Freezing (freeze CNN)...")
            path = project_subdir + "layerFreezing_CNN"
            settings = Settings(path, CDBLSTM, trials=trials, epochs=epochs, verbose=2, initial_seed=initial_seed)
            PretrainingFinetuning(data, settings, freeze_first_n_layers=4, # 4 = freeze CNN
                                  save_as = ["layerFreezing_CNN_pre", 
                                             "layerFreezing_CNN"]).run()

            print("Layer Freezing (freeze CNN+BLSTM)...")
            path = project_subdir + "layerFreezing_CNN_BLSTM"
            settings = Settings(path, CDBLSTM, trials=trials, epochs=epochs, verbose=2, initial_seed=initial_seed)
            PretrainingFinetuning(data, settings, freeze_first_n_layers=7, # 7 = freeze CNN+BLSTM
                                  save_as = ["layerFreezing_CNN_BLSTM_pre", 
                                             "layerFreezing_CNN_BLSTM"]).run()

run_experiments()
print("done")