In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from xgboost import XGBRegressor
import numpy as np
import json
import dicttoxml

In [2]:
# Load dataset
data = pd.read_csv('../data/ENB2012_data_with_columns.csv')

In [3]:
# Check for NaN values and replace or drop them
data = data.replace([np.inf, -np.inf], np.nan)
data = data.dropna()

In [4]:
# Define input and output variables
X = data.iloc[:, :-2]
y_hl = data['Heating_Load']
y_cl = data['Cooling_Load']

In [5]:
# Split the data into training and testing sets
X_train_hl, X_test_hl, y_train_hl, y_test_hl = train_test_split(X, y_hl, test_size=0.2, random_state=42)
X_train_cl, X_test_cl, y_train_cl, y_test_cl = train_test_split(X, y_cl, test_size=0.2, random_state=42)

In [6]:
# Define the model
xgb_model = XGBRegressor()

In [7]:
# Define the parameter grid
param_grid = {
    'learning_rate': [0.01, 0.1, 0.2],
    'gamma': [0, 0.1, 0.2],
    'max_depth': [100, 150, 200],
    'n_estimators': [500, 1000, 2000],
    'reg_alpha': [0.01, 0.1, 0.5],
    'reg_lambda': [0.5, 0.8, 1],
    'subsample': [0.8, 0.9, 1],
    'colsample_bytree': [0.7, 0.8, 1],
    'min_child_weight': [1, 1.8, 3]
}

In [8]:
# Perform grid search for heating load
grid_search_hl = GridSearchCV(estimator=xgb_model, param_grid=param_grid, cv=10, scoring='neg_mean_squared_error', n_jobs=-1, verbose=2)
grid_search_hl.fit(X_train_hl, y_train_hl)
best_params_hl = grid_search_hl.best_params_

Fitting 10 folds for each of 19683 candidates, totalling 196830 fits


In [9]:
# Perform grid search for cooling load
grid_search_cl = GridSearchCV(estimator=xgb_model, param_grid=param_grid, cv=10, scoring='neg_mean_squared_error', n_jobs=-1, verbose=2)
grid_search_cl.fit(X_train_cl, y_train_cl)
best_params_cl = grid_search_cl.best_params_

Fitting 10 folds for each of 19683 candidates, totalling 196830 fits


### Save Best Parameters in TXT Format

In [10]:
# Save the best parameters
with open('../results/tables/best_params.txt', 'w') as f:
    f.write(f'Best parameters for heating load: {best_params_hl}\n')
    f.write(f'Best parameters for cooling load: {best_params_cl}\n')

### Save Best Parameters in JSON Format

In [11]:
# Save the best parameters to a JSON file
best_params = {
    'heating_load': best_params_hl,
    'cooling_load': best_params_cl
}
with open('../results/tables/best_params.json', 'w') as f:
    json.dump(best_params, f, indent=4)

### Save Best Parameters in XML Format

In [12]:
# Save the best parameters to an XML file
best_params = {
    'heating_load': best_params_hl,
    'cooling_load': best_params_cl
}
xml = dicttoxml.dicttoxml(best_params)
with open('../results/tables/best_params.xml', 'wb') as f:
    f.write(xml)