In [5]:
!pip install xgboost



In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

In [3]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from sklearn.neighbors import KNeighborsRegressor

In [5]:
df_energy=pd.read_csv("energy_efficiency_data.csv")

In [6]:
df_energy.head()

Unnamed: 0,Relative_Compactness,Surface_Area,Wall_Area,Roof_Area,Overall_Height,Orientation,Glazing_Area,Glazing_Area_Distribution,Heating_Load,Cooling_Load
0,0.98,514.5,294.0,110.25,7.0,2,0.0,0,15.55,21.33
1,0.98,514.5,294.0,110.25,7.0,3,0.0,0,15.55,21.33
2,0.98,514.5,294.0,110.25,7.0,4,0.0,0,15.55,21.33
3,0.98,514.5,294.0,110.25,7.0,5,0.0,0,15.55,21.33
4,0.9,563.5,318.5,122.5,7.0,2,0.0,0,20.84,28.28


In [7]:
df_energy.drop("Surface_Area", inplace=True, axis=1)

In [8]:
TEST_SIZE=0.2    #test size of dataset
RANDOM_STATE=42
X=df_energy.drop(["Heating_Load", "Cooling_Load"], axis=1) #drop along the column
y1=df_energy["Heating_Load"]
y2=df_energy["Cooling_Load"]   #subset the column Cooling Load



In [None]:
#split dataset into train and test set
X_train, X_test, y1_train,y1_test, y2_train, y2_test=train_test_split(X,y1,y2, test_size=TEST_SIZE, random_state=RANDOM_STATE)

print(X_train.shape, X_test.shape)
print(y1_train.shape, y1_test.shape)
print(y2_train.shape, y2_test.shape)

In [19]:
var_to_scale=X_train.select_dtypes(include=["float64", "int64"]).columns 

In [20]:
# ===== Scaler =====
scaler = MinMaxScaler()

#column transformer instantiation
col_transformer = ColumnTransformer(
    transformers=[ ("scaler", scaler, var_to_scale)],
    remainder="passthrough",
)

col_transformer

In [21]:
X_train_trans=col_transformer.fit_transform(X_train) 
X_test_trans=col_transformer.transform(X_test)  

In [22]:
X_train

Unnamed: 0,Relative_Compactness,Wall_Area,Roof_Area,Overall_Height,Orientation,Glazing_Area,Glazing_Area_Distribution
60,0.82,318.5,147.00,7.0,2,0.10,1
618,0.64,343.0,220.50,3.5,4,0.40,2
346,0.86,294.0,147.00,7.0,4,0.25,2
294,0.90,318.5,122.50,7.0,4,0.25,1
231,0.66,318.5,220.50,3.5,5,0.10,4
...,...,...,...,...,...,...,...
71,0.76,416.5,122.50,7.0,5,0.10,1
106,0.86,294.0,147.00,7.0,4,0.10,2
270,0.71,269.5,220.50,3.5,4,0.10,5
435,0.98,294.0,110.25,7.0,5,0.25,4


In [23]:
df_train_trans=pd.DataFrame(data=X_train_trans, columns=X_train.columns)
df_train_trans

Unnamed: 0,Relative_Compactness,Wall_Area,Roof_Area,Overall_Height,Orientation,Glazing_Area,Glazing_Area_Distribution
0,0.555556,0.428571,0.333333,1.0,0.000000,0.250,0.2
1,0.055556,0.571429,1.000000,0.0,0.666667,1.000,0.4
2,0.666667,0.285714,0.333333,1.0,0.666667,0.625,0.4
3,0.777778,0.428571,0.111111,1.0,0.666667,0.625,0.2
4,0.111111,0.428571,1.000000,0.0,1.000000,0.250,0.8
...,...,...,...,...,...,...,...
609,0.388889,1.000000,0.111111,1.0,1.000000,0.250,0.2
610,0.666667,0.285714,0.333333,1.0,0.666667,0.250,0.4
611,0.250000,0.142857,1.000000,0.0,0.666667,0.250,1.0
612,1.000000,0.285714,0.000000,1.0,1.000000,0.625,0.8


In [24]:
#train and evaluate different regression models 
def train_and_evaluate_models(X_train,X_test, y_train, y_test):
    # Initialize different regression models
    models = [
        ("Linear Regression", LinearRegression()),
        ("Decision Tree", DecisionTreeRegressor()),
        ("Random Forest", RandomForestRegressor()),
        ("SVM", SVR()),  
        ("XGBoost", XGBRegressor()),
        ("K-Nearest Neighbors", KNeighborsRegressor())
    ]
    #Create lists to store the results
    model_names = []
    mse_values = []
    mae_values = []
    rmse_values=[]
    r2_values = []
    
    
    # Fit and evaluate each model
    for model_name, model in models:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        mse=mean_squared_error(y_test, y_pred)
        mae=mean_absolute_error(y_test, y_pred)
        rmse=mean_squared_error(y_test, y_pred, squared=False)
        r2 = r2_score(y_test, y_pred)
        model_names.append(model_name)
        mse_values.append(mse)
        mae_values.append(mae)
        rmse_values.append(rmse)
        r2_values.append(r2)
        
        
    metrics_df=pd.DataFrame({
            'Model': model_names,
            'Mean Absolute Error': mae_values,
            'Root Mean Squared Error': rmse_values,
            'Mean Squared Error': mse_values,
            'R-squared (R2)': r2_values
        })
    return metrics_df

In [28]:
from sklearn.metrics import mean_squared_error, mean_absolute_error,r2_score

In [48]:
train_and_evaluate_models(X_train,X_test,y1_train, y1_test)

Unnamed: 0,Model,Mean Absolute Error,Root Mean Squared Error,Mean Squared Error,R-squared (R2)
0,Linear Regression,2.182047,3.025424,9.153188,0.912184
1,Decision Tree,0.425065,0.620675,0.385238,0.996304
2,Random Forest,0.357918,0.495068,0.245093,0.997649
3,SVM,3.5486,4.9088,24.096314,0.768819
4,XGBoost,0.263622,0.407363,0.165944,0.998408
5,K-Nearest Neighbors,1.548065,2.200186,4.840817,0.953557


In [49]:
train_and_evaluate_models(X_train,X_test,y2_train, y2_test)

Unnamed: 0,Model,Mean Absolute Error,Root Mean Squared Error,Mean Squared Error,R-squared (R2)
0,Linear Regression,2.195295,3.145382,9.893428,0.893226
1,Decision Tree,1.154805,2.017107,4.068722,0.956088
2,Random Forest,1.076459,1.742677,3.036924,0.967224
3,SVM,3.372752,4.681165,21.91331,0.763501
4,XGBoost,0.448628,0.858288,0.736659,0.99205
5,K-Nearest Neighbors,1.450442,1.950237,3.803425,0.958952


In [30]:
train_and_evaluate_models(X_train_trans,X_test_trans,y_train, y_test)

Unnamed: 0,Model,Mean Absolute Error,Root Mean Squared Error,Mean Squared Error,R-squared (R2)
0,Linear Regression,2.195295,3.145382,9.893428,0.893226
1,Decision Tree,1.115584,1.986964,3.948025,0.957391
2,Random Forest,1.085565,1.774553,3.149037,0.966014
3,SVM,2.148086,3.298441,10.879713,0.882581
4,XGBoost,0.448628,0.858288,0.736659,0.99205
5,K-Nearest Neighbors,2.270338,3.274587,10.722922,0.884273


In [55]:
xgb_model_1 = XGBRegressor()
xgb_model_1.fit(X, y1)

xgb_model_2 = XGBRegressor()
xgb_model_2.fit(X, y2)

In [56]:
def predict_heating(relative_compactness,wall_area,roof_area,overall_height,orientation,glazing_area,glazing_area_distribution):    
    #loc_index = np.where(X.columns==location)[0][0]

    x = np.zeros(len(X.columns))
    x[0] = relative_compactness
    x[1] = wall_area
    x[2] = roof_area
    x[3]=overall_height
    x[4]=orientation
    x[5]=glazing_area
    x[6]=glazing_area_distribution
    
    return xgb_model_1.predict([x])[0]

In [57]:
def predict_cooling(relative_compactness,wall_area,roof_area,overall_height,orientation,glazing_area,glazing_area_distribution):    
    #loc_index = np.where(X.columns==location)[0][0]

    x = np.zeros(len(X.columns))
    x[0] = relative_compactness
    x[1] = wall_area
    x[2] = roof_area
    x[3]=overall_height
    x[4]=orientation
    x[5]=glazing_area
    x[6]=glazing_area_distribution
    
    return xgb_model_2.predict([x])[0]

In [61]:
print(predict_heating(0.98,294,110.25,7,2,0,0))
print(predict_cooling(0.98,294,110.25,7,2,0,0))

15.558349
21.332958


In [62]:
import pickle
with open('building_heating.pickle','wb') as f:
    pickle.dump(xgb_model_1,f)
    
with open('building_cooling.pickle','wb') as f:
    pickle.dump(xgb_model_2,f)

In [9]:
import json
columns = {
    'data_columns' : [col.lower() for col in X.columns]
}
with open("columns.json","w") as f:
    f.write(json.dumps(columns))