In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

In [2]:
df1 = pd.read_csv('entrenar2_st_1820.csv')
df2 = pd.read_csv('entrenar2_st_1921.csv')
df3 = pd.read_csv('entrenar2_st_2022.csv')
df4 = pd.read_csv('entrenar2_st_2123.csv')
archivos_st = [df1,df2,df3,df4]

In [3]:
def buscar_parametros_adaBoost_st(reg):
    suma_errores = 0
    for df in archivos_st:      
        X,y = df.drop(['ref_hash','segundos'],axis=1),df['segundos']
        X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.2, random_state=150)

        reg.fit(X_train,y_train)
        preds = reg.predict(X_test)
        rmse = np.sqrt(mean_squared_error(y_test, preds))
        suma_errores += rmse
       
    error_promedio = suma_errores / len(archivos_st)
    print('RMSE promedio: %f' % (error_promedio))
    return (error_promedio)

In [5]:
errores = []
for v in [10,15,30]:
    reg = AdaBoostRegressor(random_state=0, n_estimators=v)
    print('n_estimators: %f'% (v))
    error = buscar_parametros_adaBoost_st(reg)
    errores.append([error,v])
error,v = min(errores,key=lambda x: x[0])
print('Error minimo %f con valor %f' %(error,v))

n_estimators: 10.000000
RMSE promedio: 89000.523624
n_estimators: 15.000000
RMSE promedio: 89004.909281
n_estimators: 30.000000
RMSE promedio: 89004.909281
Error minimo 89000.523624 con valor 10.000000


In [6]:
errores = []
for v in [3,5,8]:
    reg = AdaBoostRegressor(random_state=0, n_estimators=v)
    print('n_estimators: %f'% (v))
    error = buscar_parametros_adaBoost_st(reg)
    errores.append([error,v])
error,v = min(errores,key=lambda x: x[0])
print('Error minimo %f con valor %f' %(error,v))

n_estimators: 3.000000
RMSE promedio: 88972.652675
n_estimators: 5.000000
RMSE promedio: 88930.003699
n_estimators: 8.000000
RMSE promedio: 88951.671796
Error minimo 88930.003699 con valor 5.000000


In [8]:
errores = []
for v in [10,50,100,150]:
    reg = AdaBoostRegressor(random_state=v, n_estimators=5)
    print('random_state: %f'% (v))
    error = buscar_parametros_adaBoost_st(reg)
    errores.append([error,v])
error,v = min(errores,key=lambda x: x[0])
print('Error minimo %f con valor %f' %(error,v))

random_state: 10.000000
RMSE promedio: 88828.705910
random_state: 50.000000
RMSE promedio: 88811.474493
random_state: 100.000000
RMSE promedio: 88993.317725
random_state: 150.000000
RMSE promedio: 88918.796848
Error minimo 88811.474493 con valor 50.000000


In [None]:
errores = []
for v in [0.1,0.2,0.3,0.4,0.5]:
    reg = AdaBoostRegressor(random_state=10, n_estimators=5, learning_rate=v)
    print('learning_rate: %f'% (v))
    error = buscar_parametros_adaBoost_st(reg)
    errores.append([error,v])
error,v = min(errores,key=lambda x: x[0])
print('Error minimo %f con valor %f' %(error,v))

learning_rate: 0.100000
RMSE promedio: 89258.407326
learning_rate: 0.200000
RMSE promedio: 89096.193215
learning_rate: 0.300000
RMSE promedio: 88920.200150
learning_rate: 0.400000


In [4]:
errores = []
for v in ['linear','square','exponential']:
    reg = AdaBoostRegressor(random_state=10, n_estimators=5, learning_rate=0.2,
                            loss=v)
    print('loss: %s'% (v))
    error = buscar_parametros_adaBoost_st(reg)
    errores.append([error,v])
error,v = min(errores,key=lambda x: x[0])
print('Error minimo %f con valor %s' %(error,v))

loss: linear
RMSE promedio: 89096.193215
loss: square
RMSE promedio: 88880.190219
loss: exponential
RMSE promedio: 88971.983669
Error minimo 88880.190219 con valor square


In [7]:
import xgboost as xgb
xg_reg = xgb.XGBRegressor(objective ='reg:linear', 
        colsample_bytree = 1, learning_rate = 0.5,
        max_depth = 5, alpha = 10, n_estimators = 30)
reg = AdaBoostRegressor(base_estimator=xg_reg, random_state=10, n_estimators=5,
                        learning_rate=0.2, loss='square')
buscar_parametros_adaBoost_st(reg)

RMSE promedio: 86696.842669


86696.84266928528

In [3]:
df5 = pd.read_csv('entrenar_sc_1820.csv')
df6 = pd.read_csv('entrenar_sc_1921.csv')
df7 = pd.read_csv('entrenar_sc_2022.csv')
df8 = pd.read_csv('entrenar_sc_2123.csv')
archivos_sc = [df5,df6,df7,df8]

In [8]:
def estimar_error_adaBoost(reg,archivos):
    suma_errores = 0
    for df in archivos:      
        X,y = df.drop(['ref_hash','segundos'],axis=1),df['segundos']
        X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.2, random_state=150)

        reg.fit(X_train,y_train)
        preds = reg.predict(X_test)
        rmse = np.sqrt(mean_squared_error(y_test, preds))
        suma_errores += rmse
       
    error_promedio = suma_errores / len(archivos)
    print('RMSE promedio: %f' % (error_promedio))
    return (error_promedio)

In [9]:
import xgboost as xgb
xg_reg = xgb.XGBRegressor(objective ='reg:linear', 
        colsample_bytree = 1, learning_rate = 0.5,
        max_depth = 5, alpha = 10, n_estimators = 30)
reg = AdaBoostRegressor(base_estimator=xg_reg, random_state=10, n_estimators=5,
                        learning_rate=0.2, loss='square')
estimar_error_adaBoost(reg,archivos_sc)

RMSE promedio: 54403.097144


54403.0971439499