In [1]:
import pandas as pd

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from typing import List
import json
import time
from statistics import mean
import numpy as np
import random
random.seed(42)

rootpath = ".."

import transfer_logic.transfer_LD_weightinit as transfer_LD_weightinit
import transfer_logic.transfer_autoLSTM as transfer_autoLSTM

degree_sign = u'\N{DEGREE SIGN}'

2023-11-25 04:03:43.289100: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
result_filename = f"{rootpath}/results/result_data/transfer_results.json"

with open(result_filename, "r") as f:
    results_data = json.load(f)

ft_percents = results_data["options"]["ft_percents"]
shuffle_seeds = results_data["options"]["seeds"]
transfer_methods = ["weight_initialization_LSTMDense", "weight_initialization_AutoLSTM"]

In [3]:
def save_results(from_key, to_key, transfer_method, ft_percent, shuffle_seed, rmse=None, mae=None, mae_sd=None, training_time=None, datapoints=None):
    result_filename = f"{rootpath}/results/result_data/transfer_results.json"
    with open(result_filename, "r") as f:
        results_data = json.load(f)

    ft_percents = results_data["options"]["ft_percents"]
    shuffle_seeds = results_data["options"]["seeds"]

    if not to_key in results_data[transfer_method][from_key]:
        results_data[transfer_method][from_key][to_key] = {f"ft{int(x*100)}" : {
                    "rmse": [None]*len(shuffle_seeds),
                    "mae": [None]*len(shuffle_seeds),
                    "mae_sd": [None]*len(shuffle_seeds),
                    "training_time": [None]*len(shuffle_seeds),
                    "datapoints": 0
                } for x in ft_percents}
    
    saving_index = shuffle_seeds.index(shuffle_seed)
    results_data[transfer_method][from_key][to_key][f"ft{int(ft_percent*100)}"]["rmse"][saving_index] = rmse
    results_data[transfer_method][from_key][to_key][f"ft{int(ft_percent*100)}"]["mae"][saving_index] = mae
    results_data[transfer_method][from_key][to_key][f"ft{int(ft_percent*100)}"]["mae_sd"][saving_index] = mae_sd
    results_data[transfer_method][from_key][to_key][f"ft{int(ft_percent*100)}"]["training_time"][saving_index] = training_time
    results_data[transfer_method][from_key][to_key][f"ft{int(ft_percent*100)}"]["datapoints"] = datapoints
    
    with open(result_filename, "w") as f:
        json.dump(results_data, f, indent=4)

In [4]:
def plot_multiseed_error_pir(ft_percents: List[float], from_building: str, from_tower: int, from_season: str, to_building: str, to_tower: int, to_season: str = None, skip_existing_plot: bool = True):
    """
    Performs the following steps
    1. Performs transfer learning by finetuning entire model
    2. Performs transfer learning by finetuning only the output layer
    3. Plots mean absolute errors for test datasets picked with 5 different random seeds for both transfer learning methods
    4. Compares errors from transfer with error in the base model of the tower being transferred to
    5. Plots performance improvement ratio
    """
    to_features = ['FlowEvap', 'PerHumidity', 'TempAmbient', 'TempCondIn',
       'TempCondOut', 'TempEvapIn', 'TempEvapOut', 'TempWetBulb',
       'PerFreqConP', 'Tonnage','DayOfWeek', 'HourOfDay', 'PerFreqFan']
    to_target = 'EnergyConsumption'

    for ft_percent in ft_percents:
            for s, seed in enumerate(shuffle_seeds):
                from_key=f"{from_building}{from_tower}_{from_season}"
                to_key=f"{to_building}{to_tower}_{to_season}"

                # perform each transfer learning method
                if skip_existing_plot:
                    exists = results_data.get(transfer_methods[0], None).get(from_key, {}).get(to_key, None).get(f"ft{int(ft_percent*100)}", None).get("mae", None)
                    if exists and None not in exists:
                        print(f"skipping {from_key} to {to_key} for seed={seed} and ft={ft_percent}")
                        continue

                # finetuning all layers
                rmse1, _, mae1, tt, dp = transfer_LD_weightinit.transfer_LD_weightinit(from_building_name=from_building, from_tower_number=from_tower, from_season=from_season, to_building_name=to_building, to_tower_number=to_tower, to_features=to_features, to_target=to_target, to_season=to_season, finetuning_percentage=ft_percent, display_results=True, use_delta=True, shuffle_seed=seed)
                save_results(from_key, to_key, transfer_method=transfer_methods[0], ft_percent=ft_percent, shuffle_seed=seed, rmse=rmse1, mae=mae1, training_time=tt, datapoints=dp)

                # using lstm autoencoder model and finetuning only lstm layer
                rmse2, _, mae2, tt, dp = transfer_autoLSTM.transfer_autoLSTM(from_building_name=from_building, from_tower_number=from_tower, from_season=from_season, to_building_name=to_building, to_tower_number=to_tower, to_features=to_features, to_target=to_target, to_season=to_season, finetuning_percentage=ft_percent, display_results=True, use_delta=True, shuffle_seed=seed)
                save_results(from_key, to_key, transfer_method=transfer_methods[1], ft_percent=ft_percent, shuffle_seed=seed, rmse=rmse2, mae=mae2, training_time=tt, datapoints=dp)

In [None]:
def clear_result(ft_percents: List[float], from_building: str, from_tower: int, from_season: str, to_building: str, to_tower: int, to_season: str = None):
    """
    CAREFUL NOT TO CALL THIS BY MISTAKE
    """

    for ft_percent in ft_percents:
            for s, seed in enumerate(shuffle_seeds):
                save_results(from_key=f"{from_building}{from_tower}_{from_season}", to_key=f"{to_building}{to_tower}_{to_season}", transfer_method=transfer_methods[0], ft_percent=ft_percent, shuffle_seed=seed, rmse=None, mae=None, training_time=None, datapoints=None)
                save_results(from_key=f"{from_building}{from_tower}_{from_season}", to_key=f"{to_building}{to_tower}_{to_season}", transfer_method=transfer_methods[1], ft_percent=ft_percent, shuffle_seed=seed, rmse=None, mae=None, training_time=None, datapoints=None)

In [None]:
# plot_multiseed_error_pir(ft_percents=ft_percents, from_building="ESB", from_tower=1, from_season="summer", to_building="ESB", to_tower=2, to_season="summer", skip_existing_plot=False)

# Intra-season (Summer)

In [5]:
# create all combinations of intraseason transfers for Kissam and ESB
buildings = ["ESB", "Kissam"]
towers = [1, 2]
seasons = ["summer", "fall"]
intraseason_combinations = [((b1,t1,s1),(b2,t2,s2)) for s1 in seasons for t1 in towers for b1 in buildings for s2 in seasons for t2 in towers for b2 in buildings if (b1,t1,s1)!=(b2,t2,s2) and s1==s2]
intraseason_combinations = list(set(intraseason_combinations)) # making sure there are no duplicates

# # TEMPORARY ADDITION FIXME
# intraseason_combinations = random.sample(intraseason_combinations, 5)
print(intraseason_combinations)

[(('ESB', 2, 'fall'), ('Kissam', 1, 'fall')), (('ESB', 1, 'fall'), ('ESB', 2, 'fall')), (('Kissam', 1, 'summer'), ('ESB', 1, 'summer')), (('ESB', 2, 'summer'), ('ESB', 1, 'summer')), (('Kissam', 2, 'fall'), ('ESB', 2, 'fall')), (('ESB', 1, 'summer'), ('Kissam', 2, 'summer')), (('Kissam', 2, 'fall'), ('ESB', 1, 'fall')), (('Kissam', 1, 'fall'), ('Kissam', 2, 'fall')), (('ESB', 2, 'fall'), ('ESB', 1, 'fall')), (('Kissam', 1, 'summer'), ('Kissam', 2, 'summer')), (('ESB', 2, 'summer'), ('Kissam', 2, 'summer')), (('ESB', 1, 'summer'), ('ESB', 2, 'summer')), (('Kissam', 2, 'summer'), ('ESB', 2, 'summer')), (('ESB', 1, 'summer'), ('Kissam', 1, 'summer')), (('Kissam', 2, 'summer'), ('Kissam', 1, 'summer')), (('ESB', 1, 'fall'), ('Kissam', 2, 'fall')), (('Kissam', 1, 'fall'), ('ESB', 2, 'fall')), (('Kissam', 2, 'summer'), ('ESB', 1, 'summer')), (('ESB', 1, 'fall'), ('Kissam', 1, 'fall')), (('Kissam', 2, 'fall'), ('Kissam', 1, 'fall')), (('Kissam', 1, 'summer'), ('ESB', 2, 'summer')), (('ESB', 2

In [7]:
for c in intraseason_combinations:
    b1 = c[0][0]
    t1 = c[0][1]
    s1 = c[0][2]
    b2 = c[1][0]
    t2 = c[1][1]
    s2 = c[1][2]
    print(c)
    plot_multiseed_error_pir(ft_percents=ft_percents, from_building=b1, from_tower=t1, from_season=s1, to_building=b2, to_tower=t2, to_season=s2, skip_existing_plot = False)

(('ESB', 2, 'fall'), ('Kissam', 1, 'fall'))
number of times the hvac is on (energy consumption is > zero) True     21543
False     3936
Name: EnergyConsumption, dtype: int64


2023-11-25 04:03:58.662932: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


RMSE: 57.426
number of times the hvac is on (energy consumption is > zero) True     21543
False     3936
Name: EnergyConsumption, dtype: int64
RMSE: 97.470
number of times the hvac is on (energy consumption is > zero) True     21543
False     3936
Name: EnergyConsumption, dtype: int64
RMSE: 57.426
number of times the hvac is on (energy consumption is > zero) True     21543
False     3936
Name: EnergyConsumption, dtype: int64
RMSE: 97.470
number of times the hvac is on (energy consumption is > zero) True     21543
False     3936
Name: EnergyConsumption, dtype: int64
RMSE: 57.426
number of times the hvac is on (energy consumption is > zero) True     21543
False     3936
Name: EnergyConsumption, dtype: int64
RMSE: 97.470
number of times the hvac is on (energy consumption is > zero) True     21543
False     3936
Name: EnergyConsumption, dtype: int64
RMSE: 57.426
number of times the hvac is on (energy consumption is > zero) True     21543
False     3936
Name: EnergyConsumption, dtype: int64

# Inter-season (Summer to Fall, Fall to Summer)

### Kissam and ESB

In [8]:
# create all combinations of interseason transfers between summer and fall for Kissam and ESB
buildings = ["ESB", "Kissam"]
towers = [1, 2]
seasons = ["summer", "fall"]
interseason_combinations = [((b1,t1,s1),(b2,t2,s2)) for s1 in seasons for t1 in towers for b1 in buildings for s2 in seasons for t2 in towers for b2 in buildings if (b1,t1,s1)!=(b2,t2,s2) and s1!=s2]
interseason_combinations = list(set(interseason_combinations)) # making sure there are no duplicates

# # TEMPORARY ADDITION FIXME
# interseason_combinations = random.sample(interseason_combinations, 5)
print(interseason_combinations)

[(('Kissam', 1, 'summer'), ('Kissam', 1, 'fall')), (('ESB', 2, 'summer'), ('Kissam', 1, 'fall')), (('ESB', 1, 'summer'), ('ESB', 1, 'fall')), (('Kissam', 1, 'fall'), ('Kissam', 2, 'summer')), (('ESB', 2, 'fall'), ('ESB', 1, 'summer')), (('Kissam', 2, 'summer'), ('ESB', 1, 'fall')), (('Kissam', 1, 'summer'), ('ESB', 2, 'fall')), (('ESB', 2, 'summer'), ('ESB', 2, 'fall')), (('ESB', 1, 'fall'), ('Kissam', 2, 'summer')), (('Kissam', 1, 'summer'), ('ESB', 1, 'fall')), (('ESB', 2, 'summer'), ('ESB', 1, 'fall')), (('Kissam', 1, 'fall'), ('ESB', 2, 'summer')), (('Kissam', 2, 'fall'), ('Kissam', 2, 'summer')), (('Kissam', 1, 'fall'), ('Kissam', 1, 'summer')), (('ESB', 1, 'summer'), ('Kissam', 2, 'fall')), (('Kissam', 2, 'summer'), ('Kissam', 2, 'fall')), (('ESB', 2, 'fall'), ('Kissam', 2, 'summer')), (('Kissam', 1, 'fall'), ('ESB', 1, 'summer')), (('ESB', 1, 'summer'), ('Kissam', 1, 'fall')), (('Kissam', 2, 'summer'), ('Kissam', 1, 'fall')), (('ESB', 1, 'fall'), ('ESB', 2, 'summer')), (('ESB', 

In [9]:
for c in interseason_combinations:
    b1 = c[0][0]
    t1 = c[0][1]
    s1 = c[0][2]
    b2 = c[1][0]
    t2 = c[1][1]
    s2 = c[1][2]
    print(c)
    plot_multiseed_error_pir(ft_percents=ft_percents, from_building=b1, from_tower=t1, from_season=s1, to_building=b2, to_tower=t2, to_season=s2, skip_existing_plot = False)

(('Kissam', 1, 'summer'), ('Kissam', 1, 'fall'))
number of times the hvac is on (energy consumption is > zero) True     21543
False     3936
Name: EnergyConsumption, dtype: int64
RMSE: 50.576
number of times the hvac is on (energy consumption is > zero) True     21543
False     3936
Name: EnergyConsumption, dtype: int64
RMSE: 47.348
number of times the hvac is on (energy consumption is > zero) True     21543
False     3936
Name: EnergyConsumption, dtype: int64
RMSE: 50.576
number of times the hvac is on (energy consumption is > zero) True     21543
False     3936
Name: EnergyConsumption, dtype: int64
RMSE: 47.348
number of times the hvac is on (energy consumption is > zero) True     21543
False     3936
Name: EnergyConsumption, dtype: int64
RMSE: 50.576
number of times the hvac is on (energy consumption is > zero) True     21543
False     3936
Name: EnergyConsumption, dtype: int64
RMSE: 47.348
number of times the hvac is on (energy consumption is > zero) True     21543
False     3936
N

## Display graphs

In [10]:
with open(result_filename, "r") as f:
    results_data = json.load(f)
    
# Create a subplots figure with two subplots and set title
x = [fp * 100 for fp in ft_percents]

for tm in results_data:
    if tm == "options": continue
    for from_key in results_data[tm]:
        for to_key in results_data[tm][from_key]:
            if to_key == "base": continue
            print(tm, from_key, to_key)
            
            try:
                average_mae = [0.0] * len(ft_percents)
                for i, ft in enumerate([f"ft{int(fp * 100)}" for fp in ft_percents]):
                    # print(results_data[tm][from_key][to_key][ft])
                    average_mae[i] = results_data[tm][from_key][to_key][ft]["mae"]
                    if None in average_mae[i]: raise ValueError("Skip this graph")
                
                average_mae = [[average_mae[j][i] for j in range(len(average_mae))] for i in range(len(average_mae[0]))] # reshape
                
                # add errors of transfer
                figfinal = go.Figure()
                for i, ft_mae_list in enumerate(average_mae):
                    figfinal.add_trace(
                        go.Scatter(
                            x=x, 
                            y=ft_mae_list, 
                            name=f"MAE for {shuffle_seeds[i]}")
                    )
                print(ft_mae_list)
                
                # add error of base model

                base_error_info = results_data[tm][to_key]["base"]

                figfinal.add_trace(go.Scatter(
                    x=x, 
                    y=[base_error_info["mae"]]*len(ft_percents), 
                    name=f"Base model MAE seed-42", 
                    line=dict(color='rgb(155, 185, 155)'),
                    mode='lines'))
                # add sd of base model
                y_upper = [base_error_info["mae"] + base_error_info["mae_sd"]]*len(ft_percents)
                y_lower = [base_error_info["mae"] - base_error_info["mae_sd"]]*len(ft_percents)

                figfinal.add_trace(go.Scatter(
                    x=x+x[::-1], # x, then x reversed
                    y=y_upper+y_lower[::-1], # upper, then lower reversed
                    fill='toself',
                    fillcolor='rgba(155, 185, 155,0.1)',
                    line=dict(color='rgba(0,0,0,0)'),
                    hoverinfo="skip",
                    showlegend=False
                ))
                
                # formatting
                figfinal.update_yaxes(range=[-1, 12])
                figfinal.update_layout(
                    title=f"{from_key} to {to_key} ({tm})",
                    xaxis_title='Amount of data',
                    yaxis_title='Mean absolute error'
                )
                # figfinal.show()
                figfinal.write_html(f"{rootpath}/results/plots/transfer_comparison/{tm}_{from_key}_to_{to_key}_mae.html")
            except: continue # in case of null values (transfers that have not been run yet)

weight_initialization_LSTMDense ESB1_summer ESB2_summer
[16.005281803007676, 8.262009439414078, 7.824918404021623, 7.504006964991126, 7.434468668891707]
weight_initialization_LSTMDense ESB1_summer Kissam1_summer
[22.892478948974436, 11.152051676363941, 10.154387387427583, 9.85767103843266, 9.768598821108094]
weight_initialization_LSTMDense ESB1_summer Kissam1_fall
[116.49859503727691, 9.472938924274901, 7.605590536417641, 7.064566111346442, 6.738484149850649]
weight_initialization_LSTMDense ESB1_summer ESB2_fall
[33.37511756606645, 13.023447023803797, 10.048450506711543, 9.303532698471257, 9.13125871445098]
weight_initialization_LSTMDense ESB1_summer Kissam2_fall
[64.41555765198704, 11.390374158005578, 9.230571098650234, 8.262628218049986, 7.698124058475416]
weight_initialization_LSTMDense ESB1_summer ESB1_fall
[31.744126283931493, 10.195090505446682, 8.414704085312323, 7.834866565257979, 7.5022667118873025]
weight_initialization_LSTMDense ESB1_summer Kissam2_summer
[27.09450785770178,