In [1]:
import xgboost as xgb
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.metrics import mean_absolute_error as MAE
from sklearn.metrics import mean_squared_error as MSE

from tfg_module import my_get_time_series as mgts
from tfg_module import my_metrics as mm
from tfg_module import my_process_data as mpd
from tfg_module import my_future as mf

In [4]:
class MyXGBoost:

    def __init__(self, target):
        self.target = target
        self.ts = None
        self.metric = None

    def prepare_data(self):
        print('Preparing data...')
        self.ts = mgts.get_ts(self.target)

        X, y = mpd.create_features(self.ts, target = self.target, informer = False)

        return X, y
    
    def create_model(self):
        model_XGB = xgb.XGBRegressor()

        return model_XGB
    
    def train(self, X, y):
        print('Training...')
        model = self.create_model()
        cross_val_split = TimeSeriesSplit(n_splits=3, test_size=20)

        params_grid = {'max_depth': [3,5,10],
                        'n_estimators': [50, 500, 1000]
                        }
                            #  'tree_method':['exact', 'approx']
                            # 'learning_rate': [0.1, 0.01],
                            #     'n_estimators': [50, 500, 1000],
                            #     'colsample_bytree': [0.3,  0.7],
                            #     'max_depth': [3, 5, 10],
                            #     'max_leaves': [1,2,3,4,5]
        self.metrics = ['neg_mean_absolute_error', 'neg_mean_squared_error']
        best_model = GridSearchCV(estimator = model, cv=cross_val_split, param_grid=params_grid, 
                                scoring = self.metrics,
                                refit = 'neg_mean_absolute_error')
        best_model.fit(X, y)

        results = best_model.cv_results_
        print('Training completed...')
        
        return results
    
    def save_results(self, X, y, results):
        print('Saving results...')
        models_by_metric = {}
        for metric in self.metrics:
            best_index = results[f'rank_test_{metric}'].argmin()
            best_score = results[f'mean_test_{metric}'][best_index]*-1
            best_params = results['params'][best_index]
            best_model = xgb.XGBRegressor(**best_params)
            best_model.fit(X,y)
            models_by_metric[metric] = (best_model, best_score, best_params)

        results_df = pd.DataFrame(models_by_metric)
        results_df.rename(columns = {'neg_mean_absolute_error':'MAE', 'neg_mean_squared_error':'MSE'}, 
                                inplace = True)
        
        return results_df
    

    def showing_results(self, df, metric):
        print('Showing results...')
        metric = metric.upper()
        best_model = df.loc[0,metric]
        best_df = mf.get_pred_df(self.ts, best_model)
        mf.show_pred(self.ts, best_df, metric)

        # best_mse_model = best_lgbm_models_df.loc[0,'MSE']
        # best_mse_df = mf.get_pred_df(ts, best_mse_model)
        # mf.show_pred(ts, best_mse_df, 'mse')


    def run(self):
        X, y = self.prepare_data()
        results = self.train(X, y)
        results_df = self.save_results(X, y, results)
        self.showing_results(results_df, 'MAE')
        self.showing_results(results_df, 'MSE')

In [5]:
my_xgboost = MyXGBoost(target = 'Ventas')
my_xgboost.run()

AttributeError: 'MyXGBoost' object has no attribute 'showing'