In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
import lightgbm as lgb

In [2]:
url = "data/cleaned.csv"

In [3]:
df = pd.read_csv(url, index_col=0)
df[df.select_dtypes(["object"]).columns] = df.select_dtypes(["object"]).apply(
    lambda x: x.astype("category")
)

In [4]:
X = df.drop(columns='total_usage')
y = df['total_usage']

In [5]:
lgb_params = {
    "boosting_type": "gbdt",
    "objective": "regression",
    "metric": "rmse",
    #'max_depth': 8,
    "num_leaves": 20,
    "bagging_fraction": 0.2,
    "feature_fraction": 0.2,
    "learning_rate": 0.001,
    "verbose": 1,
    "lambda_l2": 2,
}

In [10]:
from sklearn.model_selection import cross_validate
import lightgbm as lgb
from lightgbm import LGBMRegressor

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15)

model = LGBMRegressor(
    boosting_type="gbdt",
    n_estimators = 40000,
    reg_lambda=2,
    num_leaves=20,
    learning_rate=0.001,
    subsample=0.2,
    colsample_bytree=0.2,
    n_jobs = 8,
)

In [11]:
model.fit(X=X_train, y=y_train)

LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=0.2,
              importance_type='split', learning_rate=0.001, max_depth=-1,
              min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
              n_estimators=40000, n_jobs=8, num_leaves=20, objective=None,
              random_state=None, reg_alpha=0.0, reg_lambda=2, silent=True,
              subsample=0.2, subsample_for_bin=200000, subsample_freq=0)

In [12]:
y_pred = model.predict(X_test)
r2_score(y_test, y_pred)

0.7512706917179017

In [169]:
house['total square footage'] 

1763    1676
Name: total square footage, dtype: int64

In [161]:
house = X_test.iloc[:1]
house 

Unnamed: 0,housing unit in census metropolitan statistical area or micropolitan statistical area,census 2010 urban type,type of housing unit,housing unit over a basement,finished basement,attic above the housing unit,finished attic,n. of stories in a single-family home,attached garage,size of attached garage,...,total unheated square footage,"cooling degree days, 30-year average 1981-2010, base temperature 65f",building america climate zone,iecc climate code,"heating degree days, 30-year average 1981-2010, base temperature 65f",weather & shielding factor for the housing unit,annual value for latent heat infiltration from outside air into the housing unit,annual average ground water temperature (f) for 2015,dry bulb design temperature (f) expected to be exceeded 1% of the time,dry bulb design temperature (f) expected to be exceeded 99% of the time
1763,MICRO,Rural,Single-family detached house,No,No,Yes,Yes,One story,No,Two-car garage,...,642,2213,Hot-Humid,IECC climate zone 3A,2133,0.36,15.75,68.23,91.4,27.8


In [164]:
original = model.predict(house)

In [165]:
house_modified = house.copy()
house_modified.loc[:, 'attached garage'].replace('No', 'Yes', inplace = True)
display_change(original, model.predict(house_modified))

Original consumption: 90945.05 BTU
Modified consumption: 90877.24 BTU
Absolute savings:     67.81 BTU
Percentage savings:   0.07 %


In [166]:
house_modified = house.copy()
house_modified.loc[:, 'smart thermostat'].replace('No', 'Yes', inplace = True)
display_change(original, model.predict(house_modified))

Original consumption: 90945.05 BTU
Modified consumption: 90579.37 BTU
Absolute savings:     365.68 BTU
Percentage savings:   0.4 %


In [170]:
house_modified = house.copy()
house_modified.loc[:, 'smart thermostat'].replace('No', 'Yes', inplace = True)
house_modified.loc[:, 'level of insulation'].replace('Adequately insulated', 'Well insulated', inplace = True)
display_change(original, model.predict(house_modified))

Original consumption: 90945.05 BTU
Modified consumption: 89658.33 BTU
Absolute savings:     1286.72 BTU
Percentage savings:   1.41 %


In [168]:
house_modified = house.copy()
house_modified.loc[:, "summer temperature when no one is home during the day"].replace(65, 75, inplace = True)
display_change(original, model.predict(house_modified))

Original consumption: 90945.05 BTU
Modified consumption: 90067.75 BTU
Absolute savings:     877.3 BTU
Percentage savings:   0.96 %


In [107]:
house["summer temperature when no one is home during the day"]

1763    65.0
Name: summer temperature when no one is home during the day, dtype: float64

____________________

In [156]:
def display_change(a, b):
    a = a[0]
    b = b[0]
    print("Original consumption: {} BTU".format(round(a, 2)))
    print("Modified consumption: {} BTU".format(round(b, 2)))
    print("Absolute savings:     {} BTU".format(round((a - b), 2)))
    print("Percentage savings:   {} %".format(round((1 - (b / a)) * 100, 2)))

In [144]:
house = X_test.iloc[3:4]
house['total square footage'] 

4514    2553
Name: total square footage, dtype: int64

In [149]:
original = model.predict(house)

In [141]:
house['main water heater age'] 

4514    20 years or older
Name: main water heater age, dtype: category
Categories (6, object): [10 to 14 years old, 15 to 19 years old, 2 to 4 years old, 20 years or older, 5 to 9 years old, Less than 2 years old]

In [154]:
house_modified = house.copy()
house_modified.loc[:, 'level of insulation'].replace('Poorly insulated', 'Well insulated', inplace = True)
display_change(original, model.predict(house_modified))

Original cunsumption: 127095.96 BTU
Modified cunsumption: 125607.74 BTU
Absolute savings:     1488.22 BTU
Percantage savings:   1.17 %


In [157]:
house_modified = house.copy()
house_modified.loc[:, 'energy star qualified windows'].replace('No', 'Yes', inplace = True)
display_change(original, model.predict(house_modified))

Original consumption: 127095.96 BTU
Modified consumption: 125794.58 BTU
Absolute savings:     1301.38 BTU
Percentage savings:   1.02 %


In [158]:
house_modified = house.copy()
house_modified.loc[:, "main space heating equipment type"].replace(
    "Central furnace", "Heat pump", inplace=True
)
display_change(original, model.predict(house_modified))

Original consumption: 127095.96 BTU
Modified consumption: 118916.53 BTU
Absolute savings:     8179.43 BTU
Percentage savings:   6.44 %


In [159]:
house_modified = house.copy()
house_modified.loc[:, "age of main space heating equipment"].replace(
    "5 to 9 years old", "Less than 2 years old", inplace=True
)
display_change(original, model.predict(house_modified))

Original consumption: 127095.96 BTU
Modified consumption: 125169.56 BTU
Absolute savings:     1926.4 BTU
Percentage savings:   1.52 %


In [160]:
house_modified = house.copy()
house_modified.loc[:, 'smart thermostat'].replace('No', 'Yes', inplace = True)
display_change(original, model.predict(house_modified))

Original consumption: 127095.96 BTU
Modified consumption: 126920.54 BTU
Absolute savings:     175.42 BTU
Percentage savings:   0.14 %
