In [None]:
# Loading the Datasets
filepath = 'data/schemas/warm_up/'

# Building information
b_1 = pd.read_csv(filepath + 'Building_1.csv')
b_2 = pd.read_csv(filepath + 'Building_2.csv')
b_3 = pd.read_csv(filepath + 'Building_3.csv')

# Other information
carbon_int = pd.read_csv(filepath + 'carbon_intensity.csv')
pricing    = pd.read_csv(filepath + 'pricing.csv')
weather    = pd.read_csv(filepath + 'weather.csv')

# Building level combine the dfs
comb_b_1 = pd.concat([b_1.reset_index(drop=True),
                      carbon_int.reset_index(drop=True),
                      pricing.reset_index(drop=True),
                      weather.reset_index(drop=True)], axis=1)

comb_b_2 = pd.concat([b_2.reset_index(drop=True),
                      carbon_int.reset_index(drop=True),
                      pricing.reset_index(drop=True),
                      weather.reset_index(drop=True)], axis=1)

comb_b_3 = pd.concat([b_3.reset_index(drop=True),
                      carbon_int.reset_index(drop=True),
                      pricing.reset_index(drop=True),
                      weather.reset_index(drop=True)], axis=1)

# Make a list of the buildings
b_list = [comb_b_1,comb_b_2,comb_b_3]

In [None]:
# Check if the dataframes contain inf

# Building 1
d = np.isfinite(comb_b_1) 

# Building 2
d = np.isfinite(comb_b_2) 


# Building 3
d = np.isfinite(comb_b_3) 

In [None]:
# Fix the titles
b_list_clear = []

for b in b_list:
    regex = re.compile(r"\[|\]|<", re.IGNORECASE)
    b.columns = [regex.sub("_", col) if any(x in str(col) for x in set(('[', ']', '<'))) else col for col in b.columns.values]
    b_list_clear.append(b)

In [None]:
# XGBoost Models

def XGBoost_Model(X_train, X_test, y_train, y_test,hpt):

    reg = XGBRegressor(n_estimators=1000)
    reg.fit(X_train, y_train,
            eval_set=[(X_train, y_train), (X_test, y_test)],
            early_stopping_rounds=50,
           verbose=False)
     
    y_pred  = reg.predict(X_test)
    
    # Generate the df
    df = pd.DataFrame(
        {'Actual Value': y_test,
        'Predicted Value': y_pred
        })

    return df, reg
    

In [None]:
# LightGBM Models

# Generating the LightGBM

def LightGBM_Model(X_train, X_test, y_train, y_test,hpt):

    if hpt == True:
        params = {
            'max_depth':        [3, 4, 5],
            'num_leaves':       [10, 15, 20],
            'learning_rate':    [0.05, 0.1, 0.15],
            'n_estimators':     [50, 100, 200],
            'subsample':        [0.5, 0.7, 0.9],
            'colsample_bytree': [0.5, 0.7, 0.9],
            'reg_alpha':        [0.01, 0.1, 1],
            'reg_lambda':       [0.01, 0.1, 1],
            'verbose':[-1]
        }
    
        lgb_mean = LGBMRegressor(boosting_type='gbdt', objective='regression')
        grid_search_mean = GridSearchCV(lgb_mean, params, cv=5, n_jobs=-1)
        grid_search_mean.fit(X_train, y_train)
        
        y_pred_mean  = grid_search_mean.predict(X_test)
    
        # Generate the df
        df = pd.DataFrame(
            {'Actual Value': y_test,
             'Predicted Value': y_pred_mean
            })
     
        return df, grid_search_mean
    
    
    else:
        lgb_params = {
        'n_jobs': 1,
        'max_depth': 4,
        'min_data_in_leaf': 10,
        'boosting_type': 'gbdt',
        'objective': 'regression',
        'subsample': 0.9,
        'n_estimators': 80,
        'learning_rate': 0.1,
        'colsample_bytree': 0.9,
        'steps':48,
        }
        
        # fitting the model
        gbm = LGBMRegressor(**lgb_params)
        gbm.fit(X_train, y_train)
        
        y_pred = gbm.predict(X_test)
        
        # Generate the df
        df = pd.DataFrame(
            {'Actual Value': y_test,
             'Predicted Value': y_pred
            })
     
        return df, gbm

In [None]:
model_type = 'lgb'

In [None]:
# 1.) Cooling Load (kWh)
i = 1
for b in b_list_clear:
    
    # Load the feature importance
    f_l = pd.read_csv('data/features/feature_importance_Cooling_Load__kWh_.csv')
    
    # Generate the x,y
    X = b[f_l['feature']]
    y = b['Cooling Load (kWh)']

    # Generate the test,train 
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, shuffle=False)

    if model_type == 'xgb':
        df, xgb = XGBoost_Model(X_train, X_test, y_train, y_test,False)
        xgb.save_model('my_models/models/cooling_load_model_b'+str(i)+'.json')
    if model_type == 'lgb':
        df, lgb = LightGBM_Model(X_train, X_test, y_train, y_test,False)
        joblib.dump(lgb, 'my_models/models/cooling_load_model_b'+str(i)+'.pkl')
        #lgb.booster_.save_model('my_models/models/cooling_load_model_b'+str(i)+'_hyper.txt')
    i = i + 1


In [None]:
# 2.) DHW Load (kWh)
i = 1
for b in b_list_clear:

    # Generate the x,y
    X = b
    y = b['DHW Heating (kWh)']

    # Generate the test,train 
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, shuffle=False)

    
    if model_type == 'xgb':
        df, xgb = XGBoost_Model(X_train, X_test, y_train, y_test,False)
        xgb.save_model('my_models/models/dhw_load_model_b'+str(i)+'.json')
    if model_type == 'lgb':
        df, lgb = LightGBM_Model(X_train, X_test, y_train, y_test,True)
        joblib.dump(lgb, 'my_models/models/dhw_load_model_b'+str(i)+'_hyper.pkl')
        #lgb.booster_.save_model('my_models/models/dhw_load_model_b'+str(i)+'_hyper.txt')
    i = i + 1

In [None]:
# 3.) Equipment Electric Power (kWh)
i = 1
for b in b_list_clear:
    
    # Generate the x,y
    X = b
    y = b['Equipment Electric Power (kWh)']

    # Generate the test,train 
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, shuffle=False)
    
    
    if model_type == 'xgb':
        df, xgb = XGBoost_Model(X_train, X_test, y_train, y_test,False)
        xgb.save_model('my_models/models/Equipment_Electric_Power_model_b'+str(i)+'.json')
    if model_type == 'lgb':
        df, lgb = LightGBM_Model(X_train, X_test, y_train, y_test,True)
        joblib.dump(lgb, 'my_models/models/Equipment_Electric_Power_model_b'+str(i)+'_hyper.pkl')
        #lgb.booster_.save_model('my_models/models/Equipment_Electric_Power_model_b'+str(i)+'_hyper.txt')
    i = i + 1

In [None]:
# Neighbour Level: Carbon Intensity (kgCO2e/kWh) ; Solar Generation (W/kW)

# 1.) Carbon Intensity (kgCO2e/kWh)
# combine the datasets to one since we only have one CI 
comb = pd.concat([b_list_clear[0].reset_index(drop=True),
                  b_list_clear[1].reset_index(drop=True),
                  b_list_clear[2].reset_index(drop=True)])
    
# Generate the x,y
X = comb
y = comb['kg_CO2/kWh']

# Generate the test,train 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, shuffle=False)

    
if model_type == 'xgb':
    df, xgb = XGBoost_Model(X_train, X_test, y_train, y_test,False)
    xgb.save_model('my_models/models/Carbon_Intensity_Power_model'+str(i)+'.json')
if model_type == 'lgb':
    df, lgb = LightGBM_Model(X_train, X_test, y_train, y_test,True)
    joblib.dump(lgb, 'my_models/models/Carbon_Intensity_Power_model_hyper.pkl')
    #lgb.booster_.save_model('my_models/models/Carbon_Intensity_Power_model_hyper.txt')


In [None]:
# 3.) Solar Generation (W/kW)
sg = []
i = 1

for b in b_list_clear:
    
    # Load the feature importance
    f_l = pd.read_csv('data/features/feature_importance_Solar_Generation__W_kW_.csv')

    # Generate the x,y
    X = b[f_l['feature']]
    y = b['Solar Generation (W/kW)']

    # Generate the test,train 
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, shuffle=False)
    
    if model_type == 'xgb':
        df, xgb = XGBoost_Model(X_train, X_test, y_train, y_test,False)
        xgb.save_model('my_models/models/solar_generation_model_b'+str(i)+'.json')
    if model_type == 'lgb':
        df, lgb = LightGBM_Model(X_train, X_test, y_train, y_test,True)
        joblib.dump(lgb, 'my_models/models/solar_generation_model_b'+str(i)+'_hyper.pkl')
        #lgb.booster_.save_model('my_models/models/solar_generation_model_b'+str(i)+'_hyper.txt')
        
    sg.append(df)
    i = i + 1

### FastAI Testing

In [None]:
from timeseries_fastai.imports import *
from timeseries_fastai.core import *
from timeseries_fastai.data import *
from timeseries_fastai.models import *

In [None]:
PATH = Path.cwd().parent
df_train, df_test = load_df_ucr(PATH, 'Adiac')
x_cols = df_train.columns[0:-2].to_list()
dls = TSDataLoaders.from_dfs(df_train, df_test, x_cols=x_cols, label_col='target', bs=16)
dls.show_batch()

In [None]:
inception = create_inception(1, len(dls.vocab))
learn = Learner(dls, inception, metrics=[accuracy])
learn.fit_one_cycle(1, 1e-3)

## Feature Selection

In [None]:
# Mapping the Features










## Libraries

In [1]:
#!pip install lightgbm
#!pip install category_encoders

import os
import glob
import numpy as np 
import pandas as pd 
import lightgbm as lgb
import category_encoders as ce
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from lightgbm import LGBMRegressor

import matplotlib.ticker as ticker
import re
from sklearn.model_selection import GridSearchCV
from joblib import dump, load
import joblib
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_diabetes
from sklearn.feature_selection import mutual_info_regression
from sklearn.ensemble import AdaBoostRegressor
import time
from tqdm.auto import tqdm
import json

from citylearn.citylearn import CityLearnEnv
from my_models.user_model import SubmissionModel
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.metrics import mean_squared_error
import logging
from sklearn.ensemble import VotingRegressor

logging.getLogger('tensorflow').setLevel(logging.ERROR)
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from sklearn.base import BaseEstimator, RegressorMixin
from tensorflow.keras.callbacks import EarlyStopping

from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm
2023-10-23 22:29:35.905046: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-10-23 22:29:35.905313: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory


## Simulator 

In [2]:
# Create a test env
class WrapperEnv:
    """
    Env to wrap provide Citylearn Env data without providing full env
    Preventing attribute access outside of the available functions
    """
    def __init__(self, env_data):
        self.observation_names = env_data['observation_names']
        self.action_names = env_data['action_names']
        self.observation_space = env_data['observation_space']
        self.action_space = env_data['action_space']
        self.time_steps = env_data['time_steps']
        self.seconds_per_time_step = env_data['seconds_per_time_step']
        self.random_seed = env_data['random_seed']
        self.buildings_metadata = env_data['buildings_metadata']
        self.episode_tracker = env_data['episode_tracker']
    
    def get_metadata(self):
        return {'buildings': self.buildings_metadata}

In [3]:
def create_citylearn_env(config):
    env = CityLearnEnv(config.SCHEMA)

    env_data = dict(
        observation_names = env.observation_names,
        action_names = env.action_names,
        observation_space = env.observation_space,
        action_space = env.action_space,
        time_steps = 720,
        buildings_metadata = env.get_metadata()['buildings'],
        num_buildings = len(env.buildings),
        building_names = [b.name for b in env.buildings],
        b0_pv_capacity = env.buildings[0].pv.nominal_power,
    )

    # Turn off actions for all buildings and do not simulate power outage (forecasting only).
    for b in env.buildings:
        b.ignore_dynamics = True
        b.simulate_power_outage = False

    return env, env_data

In [4]:
class Config:
    data_dir = './data/'
    SCHEMA = os.path.join(data_dir, 'schemas/warm_up/schema.json')
config = Config()

In [5]:
env, env_data = create_citylearn_env(config)

## Generation of the Features


TODO: Add the other features!

In [6]:
# Dataframes

b_1_dataframe = pd.DataFrame(columns=['day_type', 'hour', 'outdoor_dry_bulb_temperature', 'outdoor_dry_bulb_temperature_predicted_6h', 
                                      'outdoor_dry_bulb_temperature_predicted_12h', 'outdoor_dry_bulb_temperature_predicted_24h', 'diffuse_solar_irradiance',
                                      'diffuse_solar_irradiance_predicted_6h', 'diffuse_solar_irradiance_predicted_12h', 
                                      'diffuse_solar_irradiance_predicted_24h', 'direct_solar_irradiance', 'direct_solar_irradiance_predicted_6h',
                                      'direct_solar_irradiance_predicted_12h', 'direct_solar_irradiance_predicted_24h', 'carbon_intensity', 
                                      'indoor_dry_bulb_temperature', 'non_shiftable_load', 'solar_generation', 'dhw_storage_soc', 'electrical_storage_soc', 
                                      'electricity_pricing', 'electricity_pricing_predicted_6h', 
                                      'electricity_pricing_predicted_12h', 'electricity_pricing_predicted_24h', 'cooling_demand',
                                      'dhw_demand','indoor_dry_bulb_temperature_set_point','occupant_count','net_electricity_consumption'])

b_2_dataframe = pd.DataFrame(columns=['day_type', 'hour', 'outdoor_dry_bulb_temperature', 'outdoor_dry_bulb_temperature_predicted_6h', 
                                      'outdoor_dry_bulb_temperature_predicted_12h', 'outdoor_dry_bulb_temperature_predicted_24h', 'diffuse_solar_irradiance',
                                      'diffuse_solar_irradiance_predicted_6h', 'diffuse_solar_irradiance_predicted_12h', 
                                      'diffuse_solar_irradiance_predicted_24h', 'direct_solar_irradiance', 'direct_solar_irradiance_predicted_6h',
                                      'direct_solar_irradiance_predicted_12h', 'direct_solar_irradiance_predicted_24h', 'carbon_intensity', 
                                      'indoor_dry_bulb_temperature', 'non_shiftable_load', 'solar_generation', 'dhw_storage_soc', 'electrical_storage_soc', 
                                      'electricity_pricing', 'electricity_pricing_predicted_6h', 
                                      'electricity_pricing_predicted_12h', 'electricity_pricing_predicted_24h', 'cooling_demand',
                                      'dhw_demand','indoor_dry_bulb_temperature_set_point','occupant_count','net_electricity_consumption'])

b_3_dataframe = pd.DataFrame(columns=['day_type', 'hour', 'outdoor_dry_bulb_temperature', 'outdoor_dry_bulb_temperature_predicted_6h', 
                                      'outdoor_dry_bulb_temperature_predicted_12h', 'outdoor_dry_bulb_temperature_predicted_24h', 'diffuse_solar_irradiance',
                                      'diffuse_solar_irradiance_predicted_6h', 'diffuse_solar_irradiance_predicted_12h', 
                                      'diffuse_solar_irradiance_predicted_24h', 'direct_solar_irradiance', 'direct_solar_irradiance_predicted_6h',
                                      'direct_solar_irradiance_predicted_12h', 'direct_solar_irradiance_predicted_24h', 'carbon_intensity', 
                                      'indoor_dry_bulb_temperature', 'non_shiftable_load', 'solar_generation', 'dhw_storage_soc', 'electrical_storage_soc', 
                                      'electricity_pricing', 'electricity_pricing_predicted_6h', 
                                      'electricity_pricing_predicted_12h', 'electricity_pricing_predicted_24h', 'cooling_demand',
                                      'dhw_demand','indoor_dry_bulb_temperature_set_point','occupant_count','net_electricity_consumption'])

b_dataframe_list = [b_1_dataframe,b_2_dataframe,b_3_dataframe]


In [7]:
for idx, b in enumerate(env.buildings):
    
    con = b.net_electricity_consumption
    print(str(len(con)))
    
env.buildings[0].net_electricity_consumption

1
1
1


array([0.67788136], dtype=float32)

In [8]:
# Generate the Datasets for the different buildings:
# Here I only need to simulate the ones, which are not present in the dataset:

for idx, b in enumerate(env.buildings):
    indoor_dry_bulb_temperature           = b.energy_simulation.indoor_dry_bulb_temperature
    non_shiftable_load                    = b.energy_simulation.non_shiftable_load
    solar_generation                      = b.energy_simulation.solar_generation
    dhw_storage_soc                       = b.dhw_storage.soc
    electrical_storage_soc                = b.electrical_storage.soc
    cooling_demand                        = b.energy_simulation.cooling_demand
    dhw_demand                            = b.energy_simulation.dhw_demand
    indoor_dry_bulb_temperature_set_point = b.energy_simulation.indoor_dry_bulb_temperature_set_point
    occupant_count                        = b.occupant_count.repeat(720)
    net_electricity_consumption           = b.net_electricity_consumption.repeat(720)
    
    # After the generation of the different features I will add the global features (which are independend from the houses!)
    day_type         = env.buildings[0].energy_simulation.day_type
    hour             = env.buildings[0].energy_simulation.hour
    carbon_intensity = env.buildings[0].carbon_intensity.carbon_intensity

    # Loading the local features
    filepath = 'data/schemas/warm_up/'

    pricing    = pd.read_csv(filepath + 'pricing.csv')
    weather    = pd.read_csv(filepath + 'weather.csv')

    electricity_pricing                = pricing['Electricity Pricing [$/kWh]']
    electricity_pricing_predicted_6h   = pricing['6h Prediction Electricity Pricing [$/kWh]']
    electricity_pricing_predicted_12h  = pricing['12h Prediction Electricity Pricing [$/kWh]']
    electricity_pricing_predicted_24h  = pricing['24h Prediction Electricity Pricing [$/kWh]']

    outdoor_dry_bulb_temperature                = weather['Outdoor Drybulb Temperature (C)']
    outdoor_dry_bulb_temperature_predicted_6h   = weather['6h Outdoor Drybulb Temperature (C)']
    outdoor_dry_bulb_temperature_predicted_12h  = weather['12h Outdoor Drybulb Temperature (C)']
    outdoor_dry_bulb_temperature_predicted_24h  = weather['24h Outdoor Drybulb Temperature (C)']

    diffuse_solar_irradiance                    = weather['Diffuse Solar Radiation (W/m2)']
    diffuse_solar_irradiance_predicted_6h       = weather['6h Diffuse Solar Radiation (W/m2)']
    diffuse_solar_irradiance_predicted_12h      = weather['12h Diffuse Solar Radiation (W/m2)']
    diffuse_solar_irradiance_predicted_24h      = weather['24h Diffuse Solar Radiation (W/m2)']

    direct_solar_irradiance                     = weather['Direct Solar Radiation (W/m2)']
    direct_solar_irradiance_predicted_6h        = weather['6h Direct Solar Radiation (W/m2)']
    direct_solar_irradiance_predicted_12h       = weather['12h Direct Solar Radiation (W/m2)']
    direct_solar_irradiance_predicted_24h       = weather['24h Direct Solar Radiation (W/m2)']
    
    # Generate the Dataframe for the training
    b_dataframe_list[idx]['day_type']                                   = day_type
    b_dataframe_list[idx]['hour']                                       = hour
    b_dataframe_list[idx]['outdoor_dry_bulb_temperature']               = outdoor_dry_bulb_temperature
    b_dataframe_list[idx]['outdoor_dry_bulb_temperature_predicted_6h']  = outdoor_dry_bulb_temperature_predicted_6h
    b_dataframe_list[idx]['outdoor_dry_bulb_temperature_predicted_12h'] = outdoor_dry_bulb_temperature_predicted_12h
    b_dataframe_list[idx]['outdoor_dry_bulb_temperature_predicted_24h'] = outdoor_dry_bulb_temperature_predicted_24h
    b_dataframe_list[idx]['diffuse_solar_irradiance']                   = diffuse_solar_irradiance
    b_dataframe_list[idx]['diffuse_solar_irradiance_predicted_6h']      = diffuse_solar_irradiance_predicted_6h
    b_dataframe_list[idx]['diffuse_solar_irradiance_predicted_12h']     = diffuse_solar_irradiance_predicted_12h
    b_dataframe_list[idx]['diffuse_solar_irradiance_predicted_24h']     = diffuse_solar_irradiance_predicted_24h
    b_dataframe_list[idx]['direct_solar_irradiance']                    = direct_solar_irradiance
    b_dataframe_list[idx]['direct_solar_irradiance_predicted_6h']       = direct_solar_irradiance_predicted_6h
    b_dataframe_list[idx]['direct_solar_irradiance_predicted_12h']      = direct_solar_irradiance_predicted_12h
    b_dataframe_list[idx]['direct_solar_irradiance_predicted_24h']      = direct_solar_irradiance_predicted_24h
    b_dataframe_list[idx]['carbon_intensity']                           = carbon_intensity
    b_dataframe_list[idx]['indoor_dry_bulb_temperature']                = indoor_dry_bulb_temperature
    b_dataframe_list[idx]['non_shiftable_load']                         = non_shiftable_load
    b_dataframe_list[idx]['solar_generation']                           = solar_generation
    b_dataframe_list[idx]['dhw_storage_soc']                            = dhw_storage_soc
    b_dataframe_list[idx]['electrical_storage_soc']                     = electrical_storage_soc
    b_dataframe_list[idx]['electricity_pricing']                        = electricity_pricing
    b_dataframe_list[idx]['electricity_pricing_predicted_6h']           = electricity_pricing_predicted_6h
    b_dataframe_list[idx]['electricity_pricing_predicted_12h']          = electricity_pricing_predicted_12h
    b_dataframe_list[idx]['electricity_pricing_predicted_24h']          = electricity_pricing_predicted_24h
    b_dataframe_list[idx]['cooling_demand']                             = cooling_demand
    b_dataframe_list[idx]['dhw_demand']                                 = dhw_demand
    b_dataframe_list[idx]['indoor_dry_bulb_temperature_set_point']      = indoor_dry_bulb_temperature_set_point
    b_dataframe_list[idx]['occupant_count']                             = occupant_count
    b_dataframe_list[idx]['net_electricity_consumption']                = net_electricity_consumption

### Feature Selection

In [None]:
# Save the important features into files
b = 1
for data in b_dataframe_list:
    feature_selection(data,'cooling_demand')
    feature_selection(data,'dhw_demand')
    feature_selection(data,'non_shiftable_load')
    feature_selection(data,'carbon_intensity')
    feature_selection(data,'solar_generation')
    b = b + 1

In [None]:
def feature_selection(data,obs_feature):
    # Split the dataset into features and target
    X = data
    y = data[obs_feature]
    
    # Apply Information Gain
    ig = mutual_info_regression(X, y)

    # Create a dictionary of feature importance scores
    feature_scores = {}
    i = 0
    for (columnName, columnData) in data.items():
        feature_scores[columnName] = ig[i]
        i = i + 1
    # Sort the features by importance score in descending order
    sorted_features = sorted(feature_scores.items(), key=lambda x: x[1], reverse=True)

    f_l = []
    s_l = []
    a_l = []
    a_l_s = []
    # Print the feature importance scores and the sorted features
    for feature, score in sorted_features:
        a_l.append(feature)
        a_l_s.append(score)
        if score > 0.10:
            # save the features
            f_l.append(feature)
            s_l.append(score)
            
    dic = {'feature': f_l, 'score': s_l}
    dic_a = {'feature': a_l, 'score': a_l_s}
    df2 = pd.DataFrame(dic_a)
    df = pd.DataFrame(dic)
    df.to_csv('data/features/feature_importance_'+str(obs_feature)+'.csv')
    

## Train the Predictors (LightGBM)

In [16]:
model_type = 'fusion'
hyperparameter = True

## Building Level Predictors

### 1.) Cooling Load (kWh)

In [None]:

i = 1
for b in b_dataframe_list:
    print("Building Model!")
    if model_type == 'xgb':
        xgb = XGBoost_Model(b,hyperparameter,'cooling_demand')
        joblib.dump(xgb, 'my_models/models/cooling_demand_model_b'+str(i)+'_xgb.pkl')
    if model_type == 'lgb':
        lgb = LightGBM_Model(b,hyperparameter,'cooling_demand')
        joblib.dump(lgb, 'my_models/models/cooling_demand_model_b'+str(i)+'_lightgbm.pkl')
    if model_type == 'fusion':
        lgb_model  = LightGBM_Model(b,hyperparameter,'cooling_demand')
        lstm_model = LSTM_Model(b,hyperparameter,'cooling_demand')
        
        joblib.dump(lgb_model, 'my_models/models/fusion/LightGBM/cooling_demand_model_b'+str(i)+'_2.pkl')
        joblib.dump(lstm_model, 'my_models/models/fusion/LSTM/cooling_demand_model_b'+str(i)+'_2.pkl')
        
        #fusion_model = Fusion_Model(b,hyperparameter,'cooling_demand',lstm_model,lgb_model)
        #joblib.dump(fusion_model, 'my_models/models/fusion/Fusion/cooling_demand_model_b'+str(i)+'.pkl')

    i = i + 1

### 2.) DHW Load (kWh)

In [None]:
i = 1
for b in b_dataframe_list:

    if model_type == 'xgb':
        xgb = XGBoost_Model(b,hyperparameter,'dhw_demand')
        joblib.dump(xgb, 'my_models/models/dhw_demand_model_b'+str(i)+'_xgb.pkl')
    if model_type == 'lgb':
        lgb = LightGBM_Model(b,hyperparameter,'dhw_demand')
        joblib.dump(lgb, 'my_models/models/dhw_demand_model_b'+str(i)+'_lightgbm.pkl')
    if model_type == 'fusion':
        lgb_model  = LightGBM_Model(b,hyperparameter,'dhw_demand')
        lstm_model = LSTM_Model(b,hyperparameter,'dhw_demand')
        
        joblib.dump(lgb_model, 'my_models/models/fusion/LightGBM/dhw_demand_model_b'+str(i)+'_2.pkl')
        joblib.dump(lstm_model, 'my_models/models/fusion/LSTM/dhw_demand_model_b'+str(i)+'_2.pkl')
        
        #fusion_model = Fusion_Model(b,hyperparameter,'dhw_demand',lstm_model,lgb_model)
        #joblib.dump(fusion_model, 'my_models/models/fusion/Fusion/dhw_demand_model_b'+str(i)+'.pkl')
    i = i + 1

### 3.) Equipment Electric Power (kWh)

In [17]:
i = 1
for b in b_dataframe_list:
    
    if model_type == 'xgb':
        xgb = XGBoost_Model(b,hyperparameter,'non_shiftable_load')
        joblib.dump(xgb, 'my_models/models/Equipment_Electric_Power_model_b'+str(i)+'_new_xgb.pkl')
    if model_type == 'lgb':
        lgb = LightGBM_Model(b,hyperparameter,'non_shiftable_load')
        joblib.dump(lgb, 'my_models/models/Equipment_Electric_Power_model_b'+str(i)+'_new_2_hyper.pkl')
    if model_type == 'fusion':
        lgb_model  = LightGBM_Model(b,hyperparameter,'non_shiftable_load')
        #lstm_model = LSTM_Model(b,hyperparameter,'non_shiftable_load')
        
        joblib.dump(lgb_model, 'my_models/models/fusion/LightGBM/Equipment_Electric_Power_model_b'+str(i)+'_2.pkl')
        #joblib.dump(lstm_model, 'my_models/models/fusion/LSTM/Equipment_Electric_Power_model_b'+str(i)+'_2.pkl')
        
        #fusion_model = Fusion_Model(b,hyperparameter,'non_shiftable_load',lstm_model,lgb_model)
        #joblib.dump(fusion_model, 'my_models/models/fusion/Fusion/Equipment_Electric_Power_model_b'+str(i)+'.pkl')
    i = i + 1

Done!
Done!
Done!


# Neighbourhood Level Predictors

### 1.) Carbon Intensity (kgCO2e/kWh)

In [None]:
# combine the datasets to one since we only have one CI 
comb = pd.concat([b_dataframe_list[0].reset_index(drop=True),
                  b_dataframe_list[1].reset_index(drop=True),
                  b_dataframe_list[2].reset_index(drop=True)])
    
    
if model_type == 'xgb':
    xgb = XGBoost_Model(comb,hyperparameter,'carbon_intensity')
    joblib.dump(xgb, 'my_models/models/Carbon_Intensity_Power_model_xgb.pkl')
if model_type == 'lgb':
    lgb = LightGBM_Model(comb,hyperparameter,'carbon_intensity')
    joblib.dump(lgb, 'my_models/models/Carbon_Intensity_Power_model_lightgbm.pkl')
if model_type == 'fusion':
    lgb_model  = LightGBM_Model(comb,hyperparameter,'carbon_intensity')
    lstm_model = LSTM_Model(comb,hyperparameter,'carbon_intensity')
        
    joblib.dump(lgb_model, 'my_models/models/fusion/LightGBM/Carbon_Intensity_model_2.pkl')
    joblib.dump(lstm_model, 'my_models/models/fusion/LSTM/Carbon_Intensity_model_2.pkl')

    #fusion_model = Fusion_Model(b,hyperparameter,'carbon_intensity',lstm_model,lgb_model)
    #joblib.dump(fusion_model, 'my_models/models/fusion/Fusion/Carbon_Intensity_model'+str(i)+'.pkl')

Done!


2023-10-24 01:03:28.571594: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2023-10-24 01:03:28.572786: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2023-10-24 01:03:28.573287: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (6b1cc6700774): /proc/driver/nvidia/version does not exist


Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

### 2.) Solar Generation (W/kW)

In [None]:
i = 1
for b in b_dataframe_list:
    
    if model_type == 'xgb':
        xgb = XGBoost_Model(b,hyperparameter,'solar_generation')
        joblib.dump(xgb, 'my_models/models/solar_generation_model_b'+str(i)+'_xgb.pkl')
    if model_type == 'lgb':
        lgb = LightGBM_Model(b,hyperparameter,'solar_generation')
        joblib.dump(lgb, 'my_models/models/solar_generation_model_b'+str(i)+'_lightgbm.pkl')
    if model_type == 'fusion':
        lgb_model  = LightGBM_Model(b,hyperparameter,'solar_generation')
        lstm_model = LSTM_Model(b,hyperparameter,'solar_generation')
        
        joblib.dump(lgb_model, 'my_models/models/fusion/LightGBM/solar_generation_model_b'+str(i)+'_2.pkl')
        joblib.dump(lstm_model, 'my_models/models/fusion/LSTM/solar_generation_model_b'+str(i)+'_2.pkl')
        
        #fusion_model = Fusion_Model(b,hyperparameter,'solar_generation',lstm_model,lgb_model)
        #joblib.dump(fusion_model, 'my_models/models/fusion/Fusion/solar_generation_model_b'+str(i)+'.pkl')
    i = i + 1

Done!
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/

## Helper Functions

### LightGBM Models

In [9]:
def LightGBM_Model(b,hpt,feature):

    # Load the feature selection
    f_l = pd.read_csv('data/features/feature_importance_cooling_demand.csv')
    
    
    # Generate the x,y
    features = b#[f_l['feature']]
    target   = b[feature]

    
    # Generate the test,train 
    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42, shuffle=False)
    
    if hpt == True:
        params = {
            'max_depth':        [3, 4, 5],
            'num_leaves':       [10, 15, 20],
            'learning_rate':    [0.05, 0.1, 0.15],
            'n_estimators':     [50, 100, 200],
            'subsample':        [0.5, 0.7, 0.9],
            'colsample_bytree': [0.5, 0.7, 0.9],
            'reg_alpha':        [0.01, 0.1, 1],
            'reg_lambda':       [0.01, 0.1, 1],
            'verbose':[-1]
        }
    
        lgb_mean = LGBMRegressor(boosting_type='gbdt', objective='regression')
        grid_search_mean = GridSearchCV(lgb_mean, params, cv=5, n_jobs=-1)
        grid_search_mean.fit(X_train, y_train)
        
        # Create an AdaBoost model with LightGBM as the base estimator
        #adaboost_model = AdaBoostRegressor(base_estimator=grid_search_mean, n_estimators=50)
        #adaboost_model.fit(X_train, y_train)
        
        print("Done!")
        return grid_search_mean
    
    
    else:
        lgb_params = {
        'n_jobs': 1,
        'max_depth': 4,
        'min_data_in_leaf': 10,
        'boosting_type': 'gbdt',
        'objective': 'regression',
        'subsample': 0.9,
        'n_estimators': 80,
        'learning_rate': 0.1,
        'colsample_bytree': 0.9,
        'steps':48,
        'verbose':-1,
        }
        
        # fitting the model
        gbm = LGBMRegressor(**lgb_params)
        gbm.fit(X_train, y_train)
        
        # Create an AdaBoost model with LightGBM as the base estimator
        #adaboost_model = AdaBoostRegressor(base_estimator=gbm, n_estimators=50)
        #adaboost_model.fit(X_train, y_train)

        return gbm#adaboost_model

### XGBoost Models

In [10]:
def XGBoost_Model(b,hpt,feature):
    
    # Load the feature selection
    f_l = pd.read_csv('data/features/feature_importance_'+str(feature)+'.csv')
    
    
    # Generate the x,y
    features = b#[f_l['feature']]
    target   = b[feature]

    
    # Generate the test,train 
    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42, shuffle=False)
    
    reg = XGBRegressor(n_estimators=100)
    reg.fit(X_train, y_train,
            eval_set=[(X_train, y_train), (X_test, y_test)],
            early_stopping_rounds=50)

    return reg
    

### LSTM Model

In [11]:
def LSTM_Model(b,hpt,feature):

    # Load the feature selection
    f_l = pd.read_csv('data/features/feature_importance_cooling_demand.csv')
    
    
    # Generate the x,y
    features = b#[f_l['feature']]
    target   = b[feature]

    
    # Generate the test,train 
    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42, shuffle=False)
    
    # Reshape data for LSTM input (samples, sequence_length, num_features)
    X_train = np.reshape(X_train.values, (X_train.shape[0], 1, X_train.shape[1]))
    X_test = np.reshape(X_test.values, (X_test.shape[0], 1, X_test.shape[1]))
    
    model = Sequential()
    model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(LSTM(units=50))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error')

    # Use early stopping to prevent overfitting
    #early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)


    model.fit(X_train, y_train, epochs=1000, batch_size=32, validation_data=(X_test, y_test))


    return model

In [12]:

class CustomKerasRegressor(BaseEstimator, RegressorMixin):
    def __init__(self, build_fn, input_shape, **kwargs):
        self.build_fn = build_fn
        self.input_shape = input_shape
        self.kwargs = kwargs
        self.estimator = KerasRegressor(build_fn=self.build_fn, **self.kwargs)

    def fit(self, X, y, **fit_params):
        # Reshape input data for LSTM (if necessary)
        if X.ndim == 2:
            X = np.reshape(X, (X.shape[0], self.input_shape[0], self.input_shape[1]))
        self.estimator.fit(X, y, **fit_params)
        return self

    def predict(self, X):
        # Reshape input data for LSTM (if necessary)
        if X.ndim == 2:
            X = np.reshape(X, (X.shape[0], self.input_shape[0], self.input_shape[1]))
        # Get predictions
        predictions = self.estimator.predict(X)
        # Reshape predictions back to 2D array
        predictions = np.reshape(predictions, (-1, 1))
        return predictions

In [13]:
def create_lstm_model(learning_rate=0.001, units=50, dropout_rate=0.0):
    model = Sequential()
    model.add(LSTM(units=units, input_shape=(1, num_features)))
    model.add(Dense(units=1))
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mean_squared_error')
    return model

In [14]:
# Create a function that returns the LSTM model
def create_lstm_model(input_shape):
    
    model = Sequential()
    model.add(LSTM(units=50, return_sequences=True, input_shape=input_shape))
    model.add(LSTM(units=50, return_sequences=True))
    model.add(LSTM(units=50))
    model.add(Dense(1, activation='linear'))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [15]:
def Fusion_Model(b,hpt,feature,lstm_model,lightgbm_model):
    
    # Load the feature selection
    f_l = pd.read_csv('data/features/feature_importance_'+str(feature)+'.csv')
    
    
    # Generate the x,y
    features = b#[f_l['feature']]
    target   = b[feature]

    
    # Generate the test,train 
    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42, shuffle=False)
    
    X_train = np.reshape(X_train.values, (X_train.shape[0], 1, X_train.shape[1]))
    X_test = np.reshape(X_test.values, (X_test.shape[0], 1, X_test.shape[1]))
    
    # Create an ensemble model using VotingRegressor
    ensemble_model = VotingRegressor(estimators=[('lstm', lstm_model), ('lgbm', lightgbm_model)])
    ensemble_model.fit(X_train, y_train)
    
    return ensemble_model
    