In [88]:
import os
import glob
import numpy as np 
import pandas as pd 
import lightgbm as lgb
import category_encoders as ce
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from lightgbm import LGBMRegressor

import matplotlib.ticker as ticker
import re
from sklearn.model_selection import GridSearchCV
from joblib import dump, load
import joblib

Python 3.10.10


In [2]:
# Loading the Datasets
filepath = 'data/schemas/warm_up/'

# Building information
b_1 = pd.read_csv(filepath + 'Building_1.csv')
b_2 = pd.read_csv(filepath + 'Building_2.csv')
b_3 = pd.read_csv(filepath + 'Building_3.csv')

# Other information
carbon_int = pd.read_csv(filepath + 'carbon_intensity.csv')
pricing    = pd.read_csv(filepath + 'pricing.csv')
weather    = pd.read_csv(filepath + 'weather.csv')

# Building level combine the dfs
comb_b_1 = pd.concat([b_1.reset_index(drop=True),
                      carbon_int.reset_index(drop=True),
                      pricing.reset_index(drop=True),
                      weather.reset_index(drop=True)], axis=1)

comb_b_2 = pd.concat([b_2.reset_index(drop=True),
                      carbon_int.reset_index(drop=True),
                      pricing.reset_index(drop=True),
                      weather.reset_index(drop=True)], axis=1)

comb_b_3 = pd.concat([b_3.reset_index(drop=True),
                      carbon_int.reset_index(drop=True),
                      pricing.reset_index(drop=True),
                      weather.reset_index(drop=True)], axis=1)

# Make a list of the buildings
b_list = [comb_b_1,comb_b_2,comb_b_3]

In [3]:
# Check if the dataframes contain inf

# Building 1
d = np.isfinite(comb_b_1) 

# Building 2
d = np.isfinite(comb_b_2) 


# Building 3
d = np.isfinite(comb_b_3) 

In [4]:
# Fix the titles
b_list_clear = []

for b in b_list:
    regex = re.compile(r"\[|\]|<", re.IGNORECASE)
    b.columns = [regex.sub("_", col) if any(x in str(col) for x in set(('[', ']', '<'))) else col for col in b.columns.values]
    b_list_clear.append(b)

In [5]:
# XGBoost Models

def XGBoost_Model(X_train, X_test, y_train, y_test,hpt):

    reg = XGBRegressor(n_estimators=1000)
    reg.fit(X_train, y_train,
            eval_set=[(X_train, y_train), (X_test, y_test)],
            early_stopping_rounds=50,
           verbose=False)
     
    y_pred  = reg.predict(X_test)
    
    # Generate the df
    df = pd.DataFrame(
        {'Actual Value': y_test,
        'Predicted Value': y_pred
        })

    return df, reg
    

In [11]:
# LightGBM Models

# Generating the LightGBM

def LightGBM_Model(X_train, X_test, y_train, y_test,hpt):

    if hpt == True:
        params = {
            'max_depth':        [3, 4, 5],
            'num_leaves':       [10, 15, 20],
            'learning_rate':    [0.05, 0.1, 0.15],
            'n_estimators':     [50, 100, 200],
            'subsample':        [0.5, 0.7, 0.9],
            'colsample_bytree': [0.5, 0.7, 0.9],
            'reg_alpha':        [0.01, 0.1, 1],
            'reg_lambda':       [0.01, 0.1, 1],
            'verbose':[-1]
        }
    
        lgb_mean = LGBMRegressor(boosting_type='gbdt', objective='regression')
        grid_search_mean = GridSearchCV(lgb_mean, params, cv=5, n_jobs=-1)
        grid_search_mean.fit(X_train, y_train)
        
        y_pred_mean  = grid_search_mean.predict(X_test)
    
        # Generate the df
        df = pd.DataFrame(
            {'Actual Value': y_test,
             'Predicted Value': y_pred_mean
            })
     
        return df, grid_search_mean
    
    
    else:
        lgb_params = {
        'n_jobs': 1,
        'max_depth': 4,
        'min_data_in_leaf': 10,
        'boosting_type': 'gbdt',
        'objective': 'regression',
        'subsample': 0.9,
        'n_estimators': 80,
        'learning_rate': 0.1,
        'colsample_bytree': 0.9,
        'steps':48,
        }
        
        # fitting the model
        gbm = LGBMRegressor(**lgb_params)
        gbm.fit(X_train, y_train)
        
        y_pred = gbm.predict(X_test)
        
        # Generate the df
        df = pd.DataFrame(
            {'Actual Value': y_test,
             'Predicted Value': y_pred
            })
     
        return df, gbm

In [12]:
model_type = 'lgb'

In [18]:
# 1.) Cooling Load (kWh)
i = 1
for b in b_list_clear:
    
    # Load the feature importance
    f_l = pd.read_csv('data/features/feature_importance_Cooling_Load__kWh_.csv')
    
    # Generate the x,y
    X = b[f_l['feature']]
    y = b['Cooling Load (kWh)']

    # Generate the test,train 
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, shuffle=False)

    if model_type == 'xgb':
        df, xgb = XGBoost_Model(X_train, X_test, y_train, y_test,False)
        xgb.save_model('my_models/models/cooling_load_model_b'+str(i)+'.json')
    if model_type == 'lgb':
        df, lgb = LightGBM_Model(X_train, X_test, y_train, y_test,False)
        joblib.dump(lgb, 'my_models/models/cooling_load_model_b'+str(i)+'.pkl')
        #lgb.booster_.save_model('my_models/models/cooling_load_model_b'+str(i)+'_hyper.txt')
    i = i + 1


In [None]:
# 2.) DHW Load (kWh)
i = 1
for b in b_list_clear:

    # Generate the x,y
    X = b
    y = b['DHW Heating (kWh)']

    # Generate the test,train 
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, shuffle=False)

    
    if model_type == 'xgb':
        df, xgb = XGBoost_Model(X_train, X_test, y_train, y_test,False)
        xgb.save_model('my_models/models/dhw_load_model_b'+str(i)+'.json')
    if model_type == 'lgb':
        df, lgb = LightGBM_Model(X_train, X_test, y_train, y_test,True)
        joblib.dump(lgb, 'my_models/models/dhw_load_model_b'+str(i)+'_hyper.pkl')
        #lgb.booster_.save_model('my_models/models/dhw_load_model_b'+str(i)+'_hyper.txt')
    i = i + 1

In [None]:
# 3.) Equipment Electric Power (kWh)
i = 1
for b in b_list_clear:
    
    # Generate the x,y
    X = b
    y = b['Equipment Electric Power (kWh)']

    # Generate the test,train 
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, shuffle=False)
    
    
    if model_type == 'xgb':
        df, xgb = XGBoost_Model(X_train, X_test, y_train, y_test,False)
        xgb.save_model('my_models/models/Equipment_Electric_Power_model_b'+str(i)+'.json')
    if model_type == 'lgb':
        df, lgb = LightGBM_Model(X_train, X_test, y_train, y_test,True)
        joblib.dump(lgb, 'my_models/models/Equipment_Electric_Power_model_b'+str(i)+'_hyper.pkl')
        #lgb.booster_.save_model('my_models/models/Equipment_Electric_Power_model_b'+str(i)+'_hyper.txt')
    i = i + 1

In [None]:
# Neighbour Level: Carbon Intensity (kgCO2e/kWh) ; Solar Generation (W/kW)

# 1.) Carbon Intensity (kgCO2e/kWh)
# combine the datasets to one since we only have one CI 
comb = pd.concat([b_list_clear[0].reset_index(drop=True),
                  b_list_clear[1].reset_index(drop=True),
                  b_list_clear[2].reset_index(drop=True)])
    
# Generate the x,y
X = comb
y = comb['kg_CO2/kWh']

# Generate the test,train 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, shuffle=False)

    
if model_type == 'xgb':
    df, xgb = XGBoost_Model(X_train, X_test, y_train, y_test,False)
    xgb.save_model('my_models/models/Carbon_Intensity_Power_model'+str(i)+'.json')
if model_type == 'lgb':
    df, lgb = LightGBM_Model(X_train, X_test, y_train, y_test,True)
    joblib.dump(lgb, 'my_models/models/Carbon_Intensity_Power_model_hyper.pkl')
    #lgb.booster_.save_model('my_models/models/Carbon_Intensity_Power_model_hyper.txt')


In [None]:
# 3.) Solar Generation (W/kW)
sg = []
i = 1

for b in b_list_clear:
    
    # Load the feature importance
    f_l = pd.read_csv('data/features/feature_importance_Solar_Generation__W_kW_.csv')

    # Generate the x,y
    X = b[f_l['feature']]
    y = b['Solar Generation (W/kW)']

    # Generate the test,train 
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, shuffle=False)
    
    if model_type == 'xgb':
        df, xgb = XGBoost_Model(X_train, X_test, y_train, y_test,False)
        xgb.save_model('my_models/models/solar_generation_model_b'+str(i)+'.json')
    if model_type == 'lgb':
        df, lgb = LightGBM_Model(X_train, X_test, y_train, y_test,True)
        joblib.dump(lgb, 'my_models/models/solar_generation_model_b'+str(i)+'_hyper.pkl')
        #lgb.booster_.save_model('my_models/models/solar_generation_model_b'+str(i)+'_hyper.txt')
        
    sg.append(df)
    i = i + 1

### FastAI Testing

In [14]:
from timeseries_fastai.imports import *
from timeseries_fastai.core import *
from timeseries_fastai.data import *
from timeseries_fastai.models import *

In [16]:
PATH = Path.cwd().parent
df_train, df_test = load_df_ucr(PATH, 'Adiac')
x_cols = df_train.columns[0:-2].to_list()
dls = TSDataLoaders.from_dfs(df_train, df_test, x_cols=x_cols, label_col='target', bs=16)
dls.show_batch()

Loading files from: /home/philaupk/work/CityLearn_Competition/Adiac
Error loading files: /home/philaupk/work/CityLearn_Competition/Adiac


TypeError: cannot unpack non-iterable NoneType object

In [None]:
inception = create_inception(1, len(dls.vocab))
learn = Learner(dls, inception, metrics=[accuracy])
learn.fit_one_cycle(1, 1e-3)

## Feature Selection

In [26]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_diabetes
from sklearn.feature_selection import mutual_info_regression


# Save the important features into files
b = 1
for data in b_list_clear:
    feature_selection(data,'Cooling Load (kWh)')
    feature_selection(data,'DHW Heating (kWh)')
    feature_selection(data,'Equipment Electric Power (kWh)')
    feature_selection(data,'kg_CO2/kWh')
    feature_selection(data,'Solar Generation (W/kW)')
    b = b + 1

In [None]:
# Mapping the Features










In [25]:
def feature_selection(data,obs_feature):
    # Split the dataset into features and target
    X = data
    y = data[obs_feature]
    
    # Apply Information Gain
    ig = mutual_info_regression(X, y)

    # Create a dictionary of feature importance scores
    feature_scores = {}
    i = 0
    for (columnName, columnData) in data.items():
        feature_scores[columnName] = ig[i]
        i = i + 1
    # Sort the features by importance score in descending order
    sorted_features = sorted(feature_scores.items(), key=lambda x: x[1], reverse=True)

    f_l = []
    s_l = []
    a_l = []
    a_l_s = []
    # Print the feature importance scores and the sorted features
    for feature, score in sorted_features:
        a_l.append(feature)
        a_l_s.append(score)
        if score > 0.10:
            # save the features
            f_l.append(feature)
            s_l.append(score)
            
    dic = {'feature': f_l, 'score': s_l}
    dic_a = {'feature': a_l, 'score': a_l_s}
    df2 = pd.DataFrame(dic_a)
    df = pd.DataFrame(dic)
    obs_feature = obs_feature.replace(" ", "_")
    obs_feature = obs_feature.replace(")", "_")
    obs_feature = obs_feature.replace("(", "_")
    obs_feature = obs_feature.replace("/", "_")
    df.to_csv('data/features/feature_importance_'+str(obs_feature)+'.csv')
    

## Simulator 

In [None]:
import numpy as np
import time
import os
from tqdm.auto import tqdm
import json

from citylearn.citylearn import CityLearnEnv
from my_models.user_model import SubmissionModel

In [2]:
# Create a test env
class WrapperEnv:
    """
    Env to wrap provide Citylearn Env data without providing full env
    Preventing attribute access outside of the available functions
    """
    def __init__(self, env_data):
        self.observation_names = env_data['observation_names']
        self.action_names = env_data['action_names']
        self.observation_space = env_data['observation_space']
        self.action_space = env_data['action_space']
        self.time_steps = env_data['time_steps']
        self.seconds_per_time_step = env_data['seconds_per_time_step']
        self.random_seed = env_data['random_seed']
        self.buildings_metadata = env_data['buildings_metadata']
        self.episode_tracker = env_data['episode_tracker']
    
    def get_metadata(self):
        return {'buildings': self.buildings_metadata}

In [3]:
def create_citylearn_env(config):
    env = CityLearnEnv(config.SCHEMA)

    env_data = dict(
        observation_names = env.observation_names,
        action_names = env.action_names,
        observation_space = env.observation_space,
        action_space = env.action_space,
        time_steps = env.time_steps,
        buildings_metadata = env.get_metadata()['buildings'],
        num_buildings = len(env.buildings),
        building_names = [b.name for b in env.buildings],
        b0_pv_capacity = env.buildings[0].pv.nominal_power,
    )

    # Turn off actions for all buildings and do not simulate power outage (forecasting only).
    for b in env.buildings:
        b.ignore_dynamics = True
        b.simulate_power_outage = False

    return env, env_data

In [5]:
class Config:
    data_dir = './data/'
    SCHEMA = os.path.join(data_dir, 'schemas/warm_up/schema.json')
config = Config()

In [6]:
env, env_data = create_citylearn_env(config)

## Generation of the Features

In [68]:
# Dataframes

b_1_dataframe = pd.DataFrame(columns=['day_type', 'hour', 'outdoor_dry_bulb_temperature', 'outdoor_dry_bulb_temperature_predicted_6h', 
                                      'outdoor_dry_bulb_temperature_predicted_12h', 'outdoor_dry_bulb_temperature_predicted_24h', 'diffuse_solar_irradiance',
                                      'diffuse_solar_irradiance_predicted_6h', 'diffuse_solar_irradiance_predicted_12h', 
                                      'diffuse_solar_irradiance_predicted_24h', 'direct_solar_irradiance', 'direct_solar_irradiance_predicted_6h',
                                      'direct_solar_irradiance_predicted_12h', 'direct_solar_irradiance_predicted_24h', 'carbon_intensity', 
                                      'indoor_dry_bulb_temperature', 'non_shiftable_load', 'solar_generation', 'dhw_storage_soc', 'electrical_storage_soc', 
                                      'electricity_pricing', 'electricity_pricing_predicted_6h', 
                                      'electricity_pricing_predicted_12h', 'electricity_pricing_predicted_24h', 'cooling_demand',
                                      'dhw_demand','indoor_dry_bulb_temperature_set_point'])

b_2_dataframe = pd.DataFrame(columns=['day_type', 'hour', 'outdoor_dry_bulb_temperature', 'outdoor_dry_bulb_temperature_predicted_6h', 
                                      'outdoor_dry_bulb_temperature_predicted_12h', 'outdoor_dry_bulb_temperature_predicted_24h', 'diffuse_solar_irradiance',
                                      'diffuse_solar_irradiance_predicted_6h', 'diffuse_solar_irradiance_predicted_12h', 
                                      'diffuse_solar_irradiance_predicted_24h', 'direct_solar_irradiance', 'direct_solar_irradiance_predicted_6h',
                                      'direct_solar_irradiance_predicted_12h', 'direct_solar_irradiance_predicted_24h', 'carbon_intensity', 
                                      'indoor_dry_bulb_temperature', 'non_shiftable_load', 'solar_generation', 'dhw_storage_soc', 'electrical_storage_soc', 
                                      'electricity_pricing', 'electricity_pricing_predicted_6h', 
                                      'electricity_pricing_predicted_12h', 'electricity_pricing_predicted_24h', 'cooling_demand',
                                      'dhw_demand','indoor_dry_bulb_temperature_set_point'])

b_3_dataframe = pd.DataFrame(columns=['day_type', 'hour', 'outdoor_dry_bulb_temperature', 'outdoor_dry_bulb_temperature_predicted_6h', 
                                      'outdoor_dry_bulb_temperature_predicted_12h', 'outdoor_dry_bulb_temperature_predicted_24h', 'diffuse_solar_irradiance',
                                      'diffuse_solar_irradiance_predicted_6h', 'diffuse_solar_irradiance_predicted_12h', 
                                      'diffuse_solar_irradiance_predicted_24h', 'direct_solar_irradiance', 'direct_solar_irradiance_predicted_6h',
                                      'direct_solar_irradiance_predicted_12h', 'direct_solar_irradiance_predicted_24h', 'carbon_intensity', 
                                      'indoor_dry_bulb_temperature', 'non_shiftable_load', 'solar_generation', 'dhw_storage_soc', 'electrical_storage_soc', 
                                      'electricity_pricing', 'electricity_pricing_predicted_6h', 
                                      'electricity_pricing_predicted_12h', 'electricity_pricing_predicted_24h', 'cooling_demand',
                                      'dhw_demand','indoor_dry_bulb_temperature_set_point'])

b_dataframe_list = [b_1_dataframe,b_2_dataframe,b_3_dataframe]


In [85]:
# Generate the Datasets for the different buildings:
# Here I only need to simulate the ones, which are not present in the dataset:
import pandas as pd

for idx, b in enumerate(env.buildings):
    indoor_dry_bulb_temperature           = b.energy_simulation.indoor_dry_bulb_temperature[env.time_step:env.time_step+720]
    non_shiftable_load                    = b.energy_simulation.non_shiftable_load[env.time_step:env.time_step+720]
    solar_generation                      = b.energy_simulation.solar_generation[env.time_step:env.time_step+720]
    dhw_storage_soc                       = b.dhw_storage.soc[env.time_step:env.time_step+720]
    electrical_storage_soc                = b.electrical_storage.soc[env.time_step:env.time_step+720]
    cooling_demand                        = b.energy_simulation.cooling_demand[env.time_step:env.time_step+720]
    dhw_demand                            = b.energy_simulation.dhw_demand[env.time_step:env.time_step+720]
    indoor_dry_bulb_temperature_set_point = b.energy_simulation.indoor_dry_bulb_temperature_set_point[env.time_step:env.time_step+720]
    
    # After the generation of the different features I will add the global features (which are independend from the houses!)
    day_type         = env.buildings[0].energy_simulation.day_type[env.time_step:env.time_step+720]
    hour             = env.buildings[0].energy_simulation.hour[env.time_step:env.time_step+720]
    carbon_intensity = env.buildings[0].carbon_intensity.carbon_intensity[env.time_step:env.time_step+720]

    # Loading the local features
    filepath = 'data/schemas/warm_up/'

    pricing    = pd.read_csv(filepath + 'pricing.csv')
    weather    = pd.read_csv(filepath + 'weather.csv')

    electricity_pricing                = pricing['Electricity Pricing [$/kWh]']
    electricity_pricing_predicted_6h   = pricing['6h Prediction Electricity Pricing [$/kWh]']
    electricity_pricing_predicted_12h  = pricing['12h Prediction Electricity Pricing [$/kWh]']
    electricity_pricing_predicted_24h  = pricing['24h Prediction Electricity Pricing [$/kWh]']

    outdoor_dry_bulb_temperature                = weather['Outdoor Drybulb Temperature (C)']
    outdoor_dry_bulb_temperature_predicted_6h   = weather['6h Outdoor Drybulb Temperature (C)']
    outdoor_dry_bulb_temperature_predicted_12h  = weather['12h Outdoor Drybulb Temperature (C)']
    outdoor_dry_bulb_temperature_predicted_24h  = weather['24h Outdoor Drybulb Temperature (C)']

    diffuse_solar_irradiance                    = weather['Diffuse Solar Radiation (W/m2)']
    diffuse_solar_irradiance_predicted_6h       = weather['6h Diffuse Solar Radiation (W/m2)']
    diffuse_solar_irradiance_predicted_12h      = weather['12h Diffuse Solar Radiation (W/m2)']
    diffuse_solar_irradiance_predicted_24h      = weather['24h Diffuse Solar Radiation (W/m2)']

    direct_solar_irradiance                     = weather['Direct Solar Radiation (W/m2)']
    direct_solar_irradiance_predicted_6h        = weather['6h Direct Solar Radiation (W/m2)']
    direct_solar_irradiance_predicted_12h       = weather['12h Direct Solar Radiation (W/m2)']
    direct_solar_irradiance_predicted_24h       = weather['24h Direct Solar Radiation (W/m2)']
    
    # Generate the Dataframe for the training
    b_dataframe_list[idx]['day_type']                                   = day_type
    b_dataframe_list[idx]['hour']                                       = hour
    b_dataframe_list[idx]['outdoor_dry_bulb_temperature']               = outdoor_dry_bulb_temperature
    b_dataframe_list[idx]['outdoor_dry_bulb_temperature_predicted_6h']  = outdoor_dry_bulb_temperature_predicted_6h
    b_dataframe_list[idx]['outdoor_dry_bulb_temperature_predicted_12h'] = outdoor_dry_bulb_temperature_predicted_12h
    b_dataframe_list[idx]['outdoor_dry_bulb_temperature_predicted_24h'] = outdoor_dry_bulb_temperature_predicted_24h
    b_dataframe_list[idx]['diffuse_solar_irradiance']                   = diffuse_solar_irradiance
    b_dataframe_list[idx]['diffuse_solar_irradiance_predicted_6h']      = diffuse_solar_irradiance_predicted_6h
    b_dataframe_list[idx]['diffuse_solar_irradiance_predicted_12h']     = diffuse_solar_irradiance_predicted_12h
    b_dataframe_list[idx]['diffuse_solar_irradiance_predicted_24h']     = diffuse_solar_irradiance_predicted_24h
    b_dataframe_list[idx]['direct_solar_irradiance']                    = direct_solar_irradiance
    b_dataframe_list[idx]['direct_solar_irradiance_predicted_6h']       = direct_solar_irradiance_predicted_6h
    b_dataframe_list[idx]['direct_solar_irradiance_predicted_12h']      = direct_solar_irradiance_predicted_12h
    b_dataframe_list[idx]['direct_solar_irradiance_predicted_24h']      = direct_solar_irradiance_predicted_24h
    b_dataframe_list[idx]['carbon_intensity']                           = carbon_intensity
    b_dataframe_list[idx]['indoor_dry_bulb_temperature']                = indoor_dry_bulb_temperature
    b_dataframe_list[idx]['non_shiftable_load']                         = non_shiftable_load
    b_dataframe_list[idx]['solar_generation']                           = solar_generation
    b_dataframe_list[idx]['dhw_storage_soc']                            = dhw_storage_soc
    b_dataframe_list[idx]['electrical_storage_soc']                     = electrical_storage_soc
    b_dataframe_list[idx]['electricity_pricing']                        = electricity_pricing
    b_dataframe_list[idx]['electricity_pricing_predicted_6h']           = electricity_pricing_predicted_6h
    b_dataframe_list[idx]['electricity_pricing_predicted_12h']          = electricity_pricing_predicted_12h
    b_dataframe_list[idx]['electricity_pricing_predicted_24h']          = electricity_pricing_predicted_24h
    b_dataframe_list[idx]['cooling_demand']                             = cooling_demand
    b_dataframe_list[idx]['dhw_demand']                                 = dhw_demand
    b_dataframe_list[idx]['indoor_dry_bulb_temperature_set_point']      = indoor_dry_bulb_temperature_set_point
    


## Train the Predictors (LightGBM)

In [86]:
model_type = 'lgb'
hyperparameter = 'False'

## Building Level Predictors

### 1.) Cooling Load (kWh)

In [90]:

i = 1
for b in b_dataframe_list:
    
    # Generate the x,y
    X = b
    y = b['cooling_demand']

    # Generate the test,train 
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, shuffle=False)

    if model_type == 'xgb':
        df, xgb = XGBoost_Model(X_train, X_test, y_train, y_test,hyperparameter)
        xgb.save_model('my_models/models/cooling_load_model_b'+str(i)+'_new.json')
    if model_type == 'lgb':
        df, lgb = LightGBM_Model(X_train, X_test, y_train, y_test,hyperparameter)
        joblib.dump(lgb, 'my_models/models/cooling_load_model_b'+str(i)+'_new.pkl')
    i = i + 1

### 2.) DHW Load (kWh)

In [92]:
i = 1
for b in b_dataframe_list:

    # Generate the x,y
    X = b
    y = b['dhw_demand']

    # Generate the test,train 
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, shuffle=False)

    
    if model_type == 'xgb':
        df, xgb = XGBoost_Model(X_train, X_test, y_train, y_test,hyperparameter)
        xgb.save_model('my_models/models/dhw_load_model_b'+str(i)+'.json')
    if model_type == 'lgb':
        df, lgb = LightGBM_Model(X_train, X_test, y_train, y_test,hyperparameter)
        joblib.dump(lgb, 'my_models/models/dhw_load_model_b'+str(i)+'_new.pkl')
    i = i + 1

### 3.) Equipment Electric Power (kWh)

In [93]:
i = 1
for b in b_dataframe_list:
    
    # Generate the x,y
    X = b
    y = b['non_shiftable_load']

    # Generate the test,train 
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, shuffle=False)
    
    
    if model_type == 'xgb':
        df, xgb = XGBoost_Model(X_train, X_test, y_train, y_test,hyperparameter)
        xgb.save_model('my_models/models/Equipment_Electric_Power_model_b'+str(i)+'.json')
    if model_type == 'lgb':
        df, lgb = LightGBM_Model(X_train, X_test, y_train, y_test,hyperparameter)
        joblib.dump(lgb, 'my_models/models/Equipment_Electric_Power_model_b'+str(i)+'_new.pkl')
    i = i + 1

# Neighbourhood Level Predictors

### 1.) Carbon Intensity (kgCO2e/kWh)

In [94]:
# combine the datasets to one since we only have one CI 
comb = pd.concat([b_dataframe_list[0].reset_index(drop=True),
                  b_dataframe_list[1].reset_index(drop=True),
                  b_dataframe_list[2].reset_index(drop=True)])
    
# Generate the x,y
X = comb
y = comb['carbon_intensity']

# Generate the test,train 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, shuffle=False)

    
if model_type == 'xgb':
    df, xgb = XGBoost_Model(X_train, X_test, y_train, y_test,hyperparameter)
    xgb.save_model('my_models/models/Carbon_Intensity_Power_model'+str(i)+'.json')
if model_type == 'lgb':
    df, lgb = LightGBM_Model(X_train, X_test, y_train, y_test,hyperparameter)
    joblib.dump(lgb, 'my_models/models/Carbon_Intensity_Power_model_new.pkl')

### 2.) Solar Generation (W/kW)

In [96]:
i = 1
for b in b_dataframe_list:

    # Generate the x,y
    X = b
    y = b['solar_generation']

    # Generate the test,train 
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, shuffle=False)
    
    if model_type == 'xgb':
        df, xgb = XGBoost_Model(X_train, X_test, y_train, y_test,hyperparameter)
        xgb.save_model('my_models/models/solar_generation_model_b'+str(i)+'.json')
    if model_type == 'lgb':
        df, lgb = LightGBM_Model(X_train, X_test, y_train, y_test,hyperparameter)
        joblib.dump(lgb, 'my_models/models/solar_generation_model_b'+str(i)+'_new.pkl')
    i = i + 1

## Helper Functions

In [89]:
# LightGBM Models

def LightGBM_Model(X_train, X_test, y_train, y_test,hpt):

    if hpt == True:
        params = {
            'max_depth':        [3, 4, 5],
            'num_leaves':       [10, 15, 20],
            'learning_rate':    [0.05, 0.1, 0.15],
            'n_estimators':     [50, 100, 200],
            'subsample':        [0.5, 0.7, 0.9],
            'colsample_bytree': [0.5, 0.7, 0.9],
            'reg_alpha':        [0.01, 0.1, 1],
            'reg_lambda':       [0.01, 0.1, 1],
            'verbose':[-1]
        }
    
        lgb_mean = LGBMRegressor(boosting_type='gbdt', objective='regression')
        grid_search_mean = GridSearchCV(lgb_mean, params, cv=5, n_jobs=-1)
        grid_search_mean.fit(X_train, y_train)
        
        y_pred_mean  = grid_search_mean.predict(X_test)
    
        # Generate the df
        df = pd.DataFrame(
            {'Actual Value': y_test,
             'Predicted Value': y_pred_mean
            })
     
        return df, grid_search_mean
    
    
    else:
        lgb_params = {
        'n_jobs': 1,
        'max_depth': 4,
        'min_data_in_leaf': 10,
        'boosting_type': 'gbdt',
        'objective': 'regression',
        'subsample': 0.9,
        'n_estimators': 80,
        'learning_rate': 0.1,
        'colsample_bytree': 0.9,
        'steps':48,
        'verbose':-1,
        }
        
        # fitting the model
        gbm = LGBMRegressor(**lgb_params)
        gbm.fit(X_train, y_train)
        
        y_pred = gbm.predict(X_test)
        
        # Generate the df
        df = pd.DataFrame(
            {'Actual Value': y_test,
             'Predicted Value': y_pred
            })
     
        return df, gbm

# Testing the Evaluation

In [97]:
import os
import glob
import numpy as np 
import pandas as pd 
import lightgbm as lgb
from sklearn.model_selection import GroupKFold
import category_encoders as ce
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import joblib
from difflib import SequenceMatcher


from lightgbm import LGBMRegressor
from sklearn.metrics import mean_pinball_loss

from sklearn.model_selection import train_test_split
import matplotlib.ticker as ticker
import re

from my_models.base_predictor_model import BasePredictorModel