In [1]:
import pandas as pd
import numpy as np

In [3]:
df1 = pd.read_csv('entrenar_sc_1820.csv')
df2 = pd.read_csv('entrenar_sc_1921.csv')
df3 = pd.read_csv('entrenar_sc_2022.csv')
df4 = pd.read_csv('entrenar_sc_2123.csv')
archivos = [df1,df2,df3,df4]

In [4]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

In [5]:
def buscar_parametros_randomForest_st(rf_reg):
    suma_errores = 0
    for df in archivos:
        X,y = df.drop(['ref_hash','segundos'],axis=1),df['segundos']
        X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.2, random_state=150)

        rf_reg.fit(X_train,y_train)
        preds = rf_reg.predict(X_test)
        rmse = np.sqrt(mean_squared_error(y_test, preds))
        suma_errores += rmse
       
    error_promedio = suma_errores / len(archivos)
    print('RMSE promedio: %f' % (error_promedio))
    return (error_promedio)

In [6]:
errores = []
for v in [5,10,15]:
    rf_reg = RandomForestRegressor(n_estimators=v)
    print('Estimators: %f'% (v))
    error = buscar_parametros_randomForest_st(rf_reg)
    errores.append([error,v])
error,v = min(errores,key=lambda x: x[0])
print('Error minimo %f con valor %f' %(error,v))

Estimators: 5.000000
RMSE promedio: 58229.257605
Estimators: 10.000000
RMSE promedio: 56778.399376
Estimators: 15.000000
RMSE promedio: 56225.170593
Error minimo 56225.170593 con valor 15.000000


In [8]:
errores = []
for v in [2,3,5,8,15]:
    rf_reg = RandomForestRegressor(n_estimators=10,max_features=v)
    print('Max_features: %f'% (v))
    error = buscar_parametros_randomForest_st(rf_reg)
    errores.append([error,v])
error,v = min(errores,key=lambda x: x[0])
print('Error minimo %f con valor %f' %(error,v))

Max_features: 2.000000
RMSE promedio: 56714.437619
Max_features: 3.000000
RMSE promedio: 56735.701712
Max_features: 5.000000
RMSE promedio: 56689.498668
Max_features: 8.000000
RMSE promedio: 56786.282957
Max_features: 15.000000
RMSE promedio: 56727.991496
Error minimo 56689.498668 con valor 5.000000


In [9]:
errores = []
for v in [2,5,8,10,15,20]:
    rf_reg = RandomForestRegressor(n_estimators=10, max_depth=v, max_features=5)
    print('Max_Depth: %f'% (v))
    error = buscar_parametros_randomForest_st(rf_reg)
    errores.append([error,v])
error,v = min(errores,key=lambda x: x[0])
print('Error minimo %f con valor %f' %(error,v))

Max_Depth: 2.000000
RMSE promedio: 56660.895128
Max_Depth: 5.000000
RMSE promedio: 54192.985787
Max_Depth: 8.000000
RMSE promedio: 54177.406335
Max_Depth: 10.000000
RMSE promedio: 54191.564430
Max_Depth: 15.000000
RMSE promedio: 54313.991327
Max_Depth: 20.000000
RMSE promedio: 54637.679932
Error minimo 54177.406335 con valor 8.000000


In [10]:
errores = []
for v in [7,8,9,10]:
    rf_reg = RandomForestRegressor(n_estimators=10, max_depth=8, max_features=5,
                                   min_samples_split=v)
    print('Min_samples_split: %f'% (v))
    error = buscar_parametros_randomForest_st(rf_reg)
    errores.append([error,v])
error,v = min(errores,key=lambda x: x[0])
print('Error minimo %f con valor %f' %(error,v))

Min_samples_split: 7.000000
RMSE promedio: 54173.764951
Min_samples_split: 8.000000
RMSE promedio: 54169.612628
Min_samples_split: 9.000000
RMSE promedio: 54179.309619
Min_samples_split: 10.000000
RMSE promedio: 54172.497682
Error minimo 54169.612628 con valor 8.000000


In [11]:
errores = []
for v in [1,2,3,4,5]:
    rf_reg = RandomForestRegressor(n_estimators=10, max_depth=14, max_features=5,
                                   min_samples_leaf=v)
    print('Min_samples_leaf: %f'% (v))
    error = buscar_parametros_randomForest_st(rf_reg)
    errores.append([error,v])
error,v = min(errores,key=lambda x: x[0])
print('Error minimo %f con valor %f' %(error,v))

Min_samples_leaf: 1.000000
RMSE promedio: 54279.094389
Min_samples_leaf: 2.000000
RMSE promedio: 54258.499220
Min_samples_leaf: 3.000000
RMSE promedio: 54251.248902
Min_samples_leaf: 4.000000
RMSE promedio: 54276.439465
Min_samples_leaf: 5.000000
RMSE promedio: 54225.201426
Error minimo 54225.201426 con valor 5.000000


In [12]:
errores = []
for v in [0.0,0.1,0.2,0.3]:
    rf_reg = RandomForestRegressor(n_estimators=10, max_depth=14, max_features=5,
                                   min_weight_fraction_leaf=v)
    print('min_weight_fraction_leaf: %f'% (v))
    error = buscar_parametros_randomForest_st(rf_reg)
    errores.append([error,v])
error,v = min(errores,key=lambda x: x[0])
print('Error minimo %f con valor %f' %(error,v))

min_weight_fraction_leaf: 0.000000
RMSE promedio: 54286.760820
min_weight_fraction_leaf: 0.100000
RMSE promedio: 56648.139536
min_weight_fraction_leaf: 0.200000
RMSE promedio: 63610.111783
min_weight_fraction_leaf: 0.300000
RMSE promedio: 65019.412771
Error minimo 54286.760820 con valor 0.000000


In [13]:
errores = []
for v in [0.0,0.1,0.2,0.3]:
    rf_reg = RandomForestRegressor(n_estimators=10, max_depth=14, max_features=5,
                                   min_impurity_decrease=v)
    print('min_impurity_decrease: %f'% (v))
    error = buscar_parametros_randomForest_st(rf_reg)
    errores.append([error,v])
error,v = min(errores,key=lambda x: x[0])
print('Error minimo %f con valor %f' %(error,v))

min_impurity_decrease: 0.000000
RMSE promedio: 54278.726561
min_impurity_decrease: 0.100000
RMSE promedio: 54284.036496
min_impurity_decrease: 0.200000
RMSE promedio: 54291.921313
min_impurity_decrease: 0.300000
RMSE promedio: 54270.267854
Error minimo 54270.267854 con valor 0.300000


In [14]:
errores = []
for v in ['True']:
    rf_reg = RandomForestRegressor(n_estimators=10, max_depth=14, max_features=5,
                                   min_impurity_decrease=0.3,bootstrap=True)
    print('bootstrap: %s'% (v))
    error = buscar_parametros_randomForest_st(rf_reg)
    errores.append([error,v])
error,v = min(errores,key=lambda x: x[0])
print('Error minimo %f con valor %s' %(error,v))

bootstrap: True
RMSE promedio: 54277.610541
Error minimo 54277.610541 con valor True


In [15]:
errores = []
for v in [10,20,30]:
    rf_reg = RandomForestRegressor(n_estimators=10, max_depth=14, max_features=5,
                                   min_impurity_decrease=0.3, n_jobs=v)
    print(' n_jobs: %f'% (v))
    error = buscar_parametros_randomForest_st(rf_reg)
    errores.append([error,v])
error,v = min(errores,key=lambda x: x[0])
print('Error minimo %f con valor %f' %(error,v))

 n_jobs: 10.000000
RMSE promedio: 54293.351482
 n_jobs: 20.000000
RMSE promedio: 54276.734197
 n_jobs: 30.000000
RMSE promedio: 54291.849494
Error minimo 54276.734197 con valor 20.000000


In [16]:
errores = []
for v in [1,3,5,7]:
    rf_reg = RandomForestRegressor(n_estimators=10, max_depth=14, max_features=5,
                                   min_impurity_decrease=0.3, random_state=v)
    print('random_state: %f'% (v))
    error = buscar_parametros_randomForest_st(rf_reg)
    errores.append([error,v])
error,v = min(errores,key=lambda x: x[0])
print('Error minimo %f con valor %f' %(error,v))

random_state: 1.000000
RMSE promedio: 54275.015952
random_state: 3.000000
RMSE promedio: 54290.264200
random_state: 5.000000
RMSE promedio: 54301.371991
random_state: 7.000000
RMSE promedio: 54273.948357
Error minimo 54273.948357 con valor 7.000000


In [18]:
errores = []
for v in [5,10,20,35]:
    rf_reg = RandomForestRegressor(n_estimators=v, max_depth=14, max_features=5,
                                   min_impurity_decrease=0.3,random_state=1)
    print('n_estimators: %f'% (v))
    error = buscar_parametros_randomForest_st(rf_reg)
    errores.append([error,v])
error,v = min(errores,key=lambda x: x[0])
print('Error minimo %f con valor %f' %(error,v))

n_estimators: 5.000000
RMSE promedio: 54393.507939
n_estimators: 10.000000
RMSE promedio: 54275.015952
n_estimators: 20.000000
RMSE promedio: 54222.138899
n_estimators: 35.000000
RMSE promedio: 54199.065454
Error minimo 54199.065454 con valor 35.000000


In [None]:
rf_reg = RandomForestRegressor(n_estimators=40, max_depth=14, max_features=5,
                                   min_impurity_decrease=0.3, random_state=1)