In [13]:
import xgboost as xgb
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error
import sklearn

In [1]:
class Xgboost:
    def __init__(self, data):
        
        self.A = data.A
        self.B = data.B
        self.C = data.C
        
        self.model_a = xgb.XGBRegressor(
             device='cuda',
             learning_rate =0.05,
             n_estimators=1693,
             max_depth=16,
             min_child_weight=9,
             gamma=4.709926652039647,
             subsample=0.5746022561573897,
             colsample_bytree=0.925119931399705,
             seed=42,
             eval_metric= 'mae',
             booster='gbtree',
             reg_alpha=77.7952642777926,
             reg_lambda=102.6220459955603,
             early_stopping_rounds=100
            )
        
        self.model_b = xgb.XGBRegressor(
             device='cuda',
             learning_rate =0.05,
             n_estimators=2880,
             max_depth=10,
             min_child_weight=2,
             gamma=6.9462927163070525,
             subsample=0.5425452253269976,
             colsample_bytree=0.8615770908405836,
             seed=42,
             eval_metric= 'mae',
             booster='gbtree',
             reg_alpha = 39.56391755892025,
             reg_lambda = 165.13746485969003,
             early_stopping_rounds=100
            )

        
        self.model_c = xgb.XGBRegressor(
             device='cuda',
             learning_rate =0.05,
             n_estimators=2049,
             max_depth=11,
             min_child_weight=6,
             gamma=2.700424640722136,
             subsample=0.7625820679319437,
             colsample_bytree=0.6696305568496206,
             seed=42,
             eval_metric= 'mae',
             booster='gbtree',
             reg_alpha = 146.95411105137276,
             reg_lambda = 125.49465203052867,
             early_stopping_rounds=100
            )
        
    def modelfit(self, alg, building):
    
        #Fit the algorithm on the data
        alg.fit(building.train_x, building.train_y, eval_set=[(building.train_x, building.train_y),
                                                              (building.val_x, building.val_y)], verbose=True)

        #Predict training set:
        dtrain_predictions = alg.predict(building.train_x)
        dval_predictions = alg.predict(building.val_x)

        mae = mean_absolute_error(dtrain_predictions, building.train_y)
        mae_val = mean_absolute_error(dval_predictions, building.val_y)
        print(mae, mae_val)
        
        return mae, mae_val

        
    def fit(self):
        mae_a, mae_a_val = self.modelfit(self.model_a, self.A)
        mae_b, mae_b_val =self.modelfit(self.model_b, self.B)
        mae_c, mae_c_val =self.modelfit(self.model_c, self.C)
        mae_avg = (mae_a + mae_b + mae_c) / 3
        mae_val_avg = (mae_a_val + mae_b_val + mae_c_val) / 3
        print(f'Average MAE: {mae_avg}, Average val MAE: {mae_val_avg}')
    
    def predict(self):
        preds_a_train = np.absolute(self.model_a.predict(self.A.train_x))
        preds_a_val = np.absolute(self.model_a.predict(self.A.val_x))
        preds_a_test = np.absolute(self.model_a.predict(self.A.test_x))

        preds_b_train = np.absolute(self.model_b.predict(self.B.train_x))
        preds_b_val = np.absolute(self.model_b.predict(self.B.val_x))
        preds_b_test = np.absolute(self.model_b.predict(self.B.test_x))

        preds_c_train = np.absolute(self.model_c.predict(self.C.train_x))
        preds_c_val = np.absolute(self.model_c.predict(self.C.val_x))
        preds_c_test = np.absolute(self.model_c.predict(self.C.test_x))
        
        
        return [
            [preds_a_train, preds_a_val, preds_a_test],
            [preds_b_train, preds_b_val, preds_b_test],
            [preds_c_train, preds_c_val, preds_c_test],
        ]