In [1]:
import psycopg2
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.impute import KNNImputer
from joblib import parallel_backend
from sklearn.model_selection import train_test_split, RepeatedKFold
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import PowerTransformer, StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import GridSearchCV
from hummingbird.ml import convert, load
import pickle

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
# Cargar las variables desde el archivo pickle
with open('modeling.pkl', 'rb') as f:
    variables_cargadas = pickle.load(f)

# Asignar las variables cargadas a las variables locales
resultados_elec_trial = variables_cargadas['resultados_elec_trial']
resultados_oil_trial = variables_cargadas['resultados_oil_trial']
grid_search_elec = variables_cargadas['grid_search_elec']
grid_search_oil = variables_cargadas['grid_search_oil']
ventas_elec_X_train = variables_cargadas['ventas_elec_X_train']
ventas_elec_y_train = variables_cargadas['ventas_elec_y_train']
ventas_oil_X_train = variables_cargadas['ventas_oil_X_train']
ventas_oil_y_train = variables_cargadas['ventas_oil_y_train']
ventas_elec_X_test = variables_cargadas['ventas_elec_X_test']
ventas_elec_y_test = variables_cargadas['ventas_elec_y_test']
ventas_oil_X_test = variables_cargadas['ventas_oil_X_test']
ventas_oil_y_test = variables_cargadas['ventas_oil_y_test']
best_tuning_oil_set = variables_cargadas['best_tuning_oil_set']
best_tuning_elec_set = variables_cargadas['best_tuning_elec_set']
grid_search_oil_nd = variables_cargadas['grid_search_oil_nd']
best_tuning_oil_set_nd = variables_cargadas['best_tuning_oil_set_nd']
grid_search_elec_nd = variables_cargadas['grid_search_elec_nd']
best_tuning_elec_set_nd = variables_cargadas['best_tuning_elec_set_nd']
resultados_oil_tuning_st = variables_cargadas['resultados_oil_tuning']
resultados_elec_tuning_st = variables_cargadas['resultados_elec_tuning']
resultados_oil_tuning_nd = variables_cargadas['resultados_oil_tuning_nd']
resultados_elec_tuning_nd = variables_cargadas['resultados_elec_tuning_nd']
resultados_oil_test = variables_cargadas['resultados_oil_test']
resultados_elec_test = variables_cargadas['resultados_elec_test']

print(resultados_elec_trial)
print(resultados_oil_trial)
print(grid_search_elec)
print(grid_search_oil)

{'Iteración_1': {'RMSE': 0.05402045629123839, 'MAE': 0.03449878193376891, 'R^2': 0.9317650820957611}, 'Iteración_2': {'RMSE': 0.05027020765367977, 'MAE': 0.03241016327985451, 'R^2': 0.9422539826713857}, 'Iteración_3': {'RMSE': 0.05138945271798852, 'MAE': 0.03286090359244774, 'R^2': 0.9335504016111816}, 'Iteración_4': {'RMSE': 0.04840236455998912, 'MAE': 0.03263983670090315, 'R^2': 0.9420970512139789}, 'Iteración_5': {'RMSE': 0.051640240682741416, 'MAE': 0.03165772067893205, 'R^2': 0.9341779295060888}}
{'Iteración_1': {'RMSE': 0.09822190682200777, 'MAE': 0.06247922172715622, 'R^2': 0.912146031285303}, 'Iteración_2': {'RMSE': 0.09490642943732669, 'MAE': 0.06131170574897003, 'R^2': 0.9189157692548141}, 'Iteración_3': {'RMSE': 0.09408358649433465, 'MAE': 0.06107023311121878, 'R^2': 0.9178573979312368}, 'Iteración_4': {'RMSE': 0.09557670518329872, 'MAE': 0.061695862949890386, 'R^2': 0.9161993551498107}, 'Iteración_5': {'RMSE': 0.09619293264458287, 'MAE': 0.06177340125524675, 'R^2': 0.915674

In [3]:
ventas_oil = pd.read_csv('ventas_oil_imputed.csv')
ventas_elec = pd.read_csv('ventas_elec_imputed.csv')

In [5]:
ventas_oil.dtypes


price_amount                     float64
makeid                           float64
modelid                          float64
versionid                        float64
manufacturerprice                float64
km                               float64
vehicleyear                      float64
etiqueta_type_id                 float64
provinceid                       float64
horsepower                       float64
maxspeed                         float64
acceleration                     float64
combustible_type_id              float64
body_type_id                     float64
transmision_type_id              float64
doors                            float64
seatingcapacity                  float64
colores_type_id                  float64
dimensionsinmillimeterswidth     float64
dimensionsinmillimetersheight    float64
dimensionsinmillimeterslength    float64
weight                           float64
tankcapacityinliters             float64
trunkcapacityinliters            float64
consumptionurban

In [118]:
ventas_oil['makeid'] = ventas_oil['makeid'].astype('category')
ventas_oil['etiqueta_type_id'] = ventas_oil['etiqueta_type_id'].astype('category')
ventas_oil['provinceid'] = ventas_oil['provinceid'].astype('category')
ventas_oil['transmision_type_id'] = ventas_oil['transmision_type_id'].astype('category')
ventas_oil['body_type_id'] = ventas_oil['body_type_id'].astype('category')
ventas_oil['combustible_type_id'] = ventas_oil['combustible_type_id'].astype('category')
ventas_oil['colores_type_id'] = ventas_oil['colores_type_id'].astype('category')
ventas_oil['lPrice'] = np.log10(ventas_oil['price_amount'])

In [3]:
ventas_oil.dtypes

NameError: name 'ventas_oil' is not defined

In [120]:
ventas_elec.dtypes

price_amount                     float64
makeid                           float64
manufacturerprice                float64
km                               float64
vehicleyear                      float64
etiqueta_type_id                 float64
provinceid                       float64
horsepower                       float64
maxspeed                         float64
acceleration                     float64
combustible_type_id              float64
body_type_id                     float64
transmision_type_id              float64
doors                            float64
seatingcapacity                  float64
colores_type_id                  float64
dimensionsinmillimeterswidth     float64
dimensionsinmillimetersheight    float64
dimensionsinmillimeterslength    float64
weight                           float64
tankcapacityinliters             float64
trunkcapacityinliters            float64
consumptionurban                 float64
consumptionmixed                 float64
consumptionextra

In [121]:
ventas_elec['makeid'] = ventas_elec['makeid'].astype('category')
ventas_elec['etiqueta_type_id'] = ventas_elec['etiqueta_type_id'].astype('category')
ventas_elec['provinceid'] = ventas_elec['provinceid'].astype('category')
ventas_elec['transmision_type_id'] = ventas_elec['transmision_type_id'].astype('category')
ventas_elec['body_type_id'] = ventas_elec['body_type_id'].astype('category')
ventas_elec['combustible_type_id'] = ventas_elec['combustible_type_id'].astype('category')
ventas_elec['colores_type_id'] = ventas_elec['colores_type_id'].astype('category')
ventas_elec['lPrice'] = np.log10(ventas_elec['price_amount'])

In [122]:
ventas_elec.dtypes

price_amount                      float64
makeid                           category
manufacturerprice                 float64
km                                float64
vehicleyear                       float64
etiqueta_type_id                 category
provinceid                       category
horsepower                        float64
maxspeed                          float64
acceleration                      float64
combustible_type_id              category
body_type_id                     category
transmision_type_id              category
doors                             float64
seatingcapacity                   float64
colores_type_id                  category
dimensionsinmillimeterswidth      float64
dimensionsinmillimetersheight     float64
dimensionsinmillimeterslength     float64
weight                            float64
tankcapacityinliters              float64
trunkcapacityinliters             float64
consumptionurban                  float64
consumptionmixed                  

In [2]:
# Separamos en training y test 50% para training y 50% para test
ventas_oil_X = ventas_oil.drop(columns=['price_amount', 'lPrice'])
ventas_oil_y = ventas_oil['lPrice']
ventas_oil_X_train, ventas_oil_X_test, ventas_oil_y_train, ventas_oil_y_test = train_test_split(ventas_oil_X, ventas_oil_y, test_size=0.45, random_state=123)

# Separamos en training y test 75% para training y 25% para test
ventas_elec_X = ventas_elec.drop(columns=['price_amount', 'lPrice'])
ventas_elec_y = ventas_elec['lPrice']
ventas_elec_X_train, ventas_elec_X_test, ventas_elec_y_train, ventas_elec_y_test = train_test_split(ventas_elec_X, ventas_elec_y, test_size=0.25, random_state=123)

# El 55% de los datos de training se dividen en un 80% para training y 20% para validación
# No se usa
ventas_oil_X_train_split, ventas_oil_X_val, ventas_oil_y_train_split, ventas_oil_y_val = train_test_split(ventas_oil_X_train, ventas_oil_y_train, test_size=0.2, random_state=456)

# El 80% de los datos de training se dividen en un 80% para training y 20% para validación
# No se usa
ventas_elec_X_train_split, ventas_elec_X_val, ventas_elec_y_train_split, ventas_elec_y_val = train_test_split(ventas_elec_X_train, ventas_elec_y_train, test_size=0.2, random_state=456)


NameError: name 'ventas_oil' is not defined

In [124]:
# Crear objetos validacion cruzada para trial, tuning y assessment
rk_trial = RepeatedKFold(n_splits=5, n_repeats=1, random_state=123)
rk_tuning = RepeatedKFold(n_splits=5, n_repeats=1, random_state=456)
rk_assessment = RepeatedKFold(n_splits=5, n_repeats=1, random_state=789)

def rmse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

def mae(y_true, y_pred):
    return mean_absolute_error(y_true, y_pred)

def rsq(y_true, y_pred):
    return r2_score(y_true, y_pred)

numeric_features_oil = ventas_oil_X_train.select_dtypes(include=['int', 'float']).columns
preprocessor_oil = ColumnTransformer(
    transformers=[
        ('yeo_johnson', PowerTransformer(method='yeo-johnson'), ['km']),
        ('scaling', StandardScaler(), numeric_features_oil)
    ])

numeric_features_elec = ventas_elec_X_train.select_dtypes(include=['int', 'float']).columns
preprocessor_elec = ColumnTransformer(
    transformers=[
        ('yeo_johnson', PowerTransformer(method='yeo-johnson'), ['km']),
        ('scaling', StandardScaler(), numeric_features_elec)
    ])

rf_model_trial = RandomForestRegressor(n_estimators=1000, min_samples_leaf=15)
rf_pipeline_trial_oil = Pipeline(steps=[('preprocessor', preprocessor_oil), ('model', rf_model_trial)])
rf_pipeline_trial_elec = Pipeline(steps=[('preprocessor', preprocessor_elec), ('model', rf_model_trial)])

## Trial

In [125]:
resultados_oil_trial = {}
index = 0
for train_index, test_index in rk_trial.split(ventas_oil_X_train):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_trial_train, X_trial_test = ventas_oil_X_train.iloc[train_index], ventas_oil_X_train.iloc[test_index]
    y_trial_train, y_trial_test = ventas_oil_y_train.iloc[train_index], ventas_oil_y_train.iloc[test_index]

    with parallel_backend('threading', n_jobs=8):
        rf_pipeline_trial_oil.fit(X_trial_train, y_trial_train)
    
    y_pred = rf_pipeline_trial_oil.predict(X_trial_test)
    rmse_value = rmse(y_trial_test, y_pred)
    mae_value = mae(y_trial_test, y_pred)
    rsq_value = rsq(y_trial_test, y_pred)
    metricS = {'RMSE': rmse_value, 'MAE': mae_value, 'R^2': rsq_value}
    index += 1
    resultados_oil_trial[f'Iteración_{index}'] = {'RMSE': rmse_value,
                                      'MAE': mae_value,
                                      'R^2': rsq_value}
    

TRAIN: [    0     1     3 ... 85754 85756 85757] TEST: [    2     8    10 ... 85747 85755 85758]
TRAIN: [    0     2     3 ... 85755 85757 85758] TEST: [    1     5     7 ... 85751 85754 85756]
TRAIN: [    0     1     2 ... 85756 85757 85758] TEST: [   16    19    21 ... 85738 85739 85740]
TRAIN: [    0     1     2 ... 85755 85756 85758] TEST: [    3    14    28 ... 85746 85750 85757]
TRAIN: [    1     2     3 ... 85756 85757 85758] TEST: [    0     4     6 ... 85749 85752 85753]


In [126]:
resultados_elec_trial = {}
index = 0
for train_index, test_index in rk_trial.split(ventas_elec_X_train):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_trial_train, X_trial_test = ventas_elec_X_train.iloc[train_index], ventas_elec_X_train.iloc[test_index]
    y_trial_train, y_trial_test = ventas_elec_y_train.iloc[train_index], ventas_elec_y_train.iloc[test_index]

    with parallel_backend('threading', n_jobs=8):
        rf_pipeline_trial_elec.fit(X_trial_train, y_trial_train)
    
    y_pred = rf_pipeline_trial_elec.predict(X_trial_test)
    rmse_value = rmse(y_trial_test, y_pred)
    mae_value = mae(y_trial_test, y_pred)
    rsq_value = rsq(y_trial_test, y_pred)
    metricS = {'RMSE': rmse_value, 'MAE': mae_value, 'R^2': rsq_value}
    index += 1
    resultados_elec_trial[f'Iteración_{index}'] = {'RMSE': rmse_value,
                                      'MAE': mae_value,
                                      'R^2': rsq_value}

TRAIN: [   0    1    2 ... 7320 7321 7322] TEST: [   4    6    9 ... 7310 7312 7313]
TRAIN: [   0    1    2 ... 7320 7321 7322] TEST: [  10   12   15 ... 7299 7301 7318]
TRAIN: [   0    1    2 ... 7318 7319 7320] TEST: [   5   18   19 ... 7316 7321 7322]
TRAIN: [   2    3    4 ... 7320 7321 7322] TEST: [   0    1    7 ... 7306 7317 7319]
TRAIN: [   0    1    4 ... 7319 7321 7322] TEST: [   2    3    8 ... 7307 7315 7320]


In [127]:
for key, value in resultados_oil_trial.items():
    print(key, ": ", value)
    print()

Iteración_1 :  {'RMSE': 0.09822190682200777, 'MAE': 0.06247922172715622, 'R^2': 0.912146031285303}

Iteración_2 :  {'RMSE': 0.09490642943732669, 'MAE': 0.06131170574897003, 'R^2': 0.9189157692548141}

Iteración_3 :  {'RMSE': 0.09408358649433465, 'MAE': 0.06107023311121878, 'R^2': 0.9178573979312368}

Iteración_4 :  {'RMSE': 0.09557670518329872, 'MAE': 0.061695862949890386, 'R^2': 0.9161993551498107}

Iteración_5 :  {'RMSE': 0.09619293264458287, 'MAE': 0.06177340125524675, 'R^2': 0.9156746378581108}



In [128]:
for key, value in resultados_elec_trial.items():
    print(key, ": ", value)
    print()

Iteración_1 :  {'RMSE': 0.05402045629123839, 'MAE': 0.03449878193376891, 'R^2': 0.9317650820957611}

Iteración_2 :  {'RMSE': 0.05027020765367977, 'MAE': 0.03241016327985451, 'R^2': 0.9422539826713857}

Iteración_3 :  {'RMSE': 0.05138945271798852, 'MAE': 0.03286090359244774, 'R^2': 0.9335504016111816}

Iteración_4 :  {'RMSE': 0.04840236455998912, 'MAE': 0.03263983670090315, 'R^2': 0.9420970512139789}

Iteración_5 :  {'RMSE': 0.051640240682741416, 'MAE': 0.03165772067893205, 'R^2': 0.9341779295060888}



## Primer Tuneado

In [129]:
print("Número de columnas oil:", ventas_oil_X_train.shape[1])
print("Número de columnas elec:", ventas_elec_X_train.shape[1])

Número de columnas oil: 25
Número de columnas elec: 29


In [130]:
rf_pipeline_tuning_oil = Pipeline(steps=[('preprocessor', preprocessor_oil), ('model', rf_model_trial)])
param_grid = {
    'model__n_estimators': [1100,1300,1500,1700,1900,2000],
    'model__min_samples_leaf': [5, 10,15,20],
    'model__max_features': ['0.5', 'sqrt', 'log2']
}
grid_search_oil = GridSearchCV(rf_pipeline_tuning_oil, param_grid, cv=5, n_jobs=8)
grid_search_oil.fit(ventas_oil_X_train, ventas_oil_y_train)


240 fits failed out of a total of 480.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
31 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Pabma\AppData\Roaming\Python\Python312\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Pabma\AppData\Roaming\Python\Python312\site-packages\sklearn\base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Pabma\AppData\Roaming\Python\Python312\site-packages\sklearn\pipeline.py", line 475, in fit
    self._final_estimator.fit(Xt, y, **last_step_params["fit"])
  File "C:\U

In [131]:
# Crear un diccionario con las variables que deseas guardar
guardar_oil = {'grid_search_oil':grid_search_oil}

# Guardar las variables en un archivo pickle
with open('guardar_oil.pkl', 'wb') as f:
    pickle.dump(guardar_oil, f)

In [132]:
print("Resultados de la búsqueda de cuadrícula:")
print(grid_search_oil.cv_results_)

Resultados de la búsqueda de cuadrícula:
{'mean_fit_time': array([1.04727888e-01, 9.82796669e-02, 9.65757847e-02, 9.55177307e-02,
       9.40149307e-02, 9.46839809e-02, 9.38342571e-02, 9.36552048e-02,
       9.57126141e-02, 9.42988873e-02, 9.30900097e-02, 9.49405193e-02,
       9.33746338e-02, 9.43871975e-02, 9.23221111e-02, 9.41194057e-02,
       9.27170753e-02, 9.62388515e-02, 9.51129913e-02, 9.44875240e-02,
       9.46195126e-02, 9.29694176e-02, 9.35595989e-02, 9.38621998e-02,
       1.46421410e+02, 1.76249039e+02, 2.06267085e+02, 2.31871505e+02,
       2.60162455e+02, 2.73178895e+02, 1.34570496e+02, 1.59316574e+02,
       1.83968416e+02, 2.09078512e+02, 2.32962383e+02, 2.45703439e+02,
       1.26518117e+02, 1.50001389e+02, 1.72573831e+02, 1.95104540e+02,
       2.18482439e+02, 2.29610276e+02, 1.20652243e+02, 1.42834575e+02,
       1.64664761e+02, 1.86459065e+02, 2.08596695e+02, 2.18601542e+02,
       1.50835904e+02, 1.78026510e+02, 2.05009650e+02, 2.32234148e+02,
       2.60993425e

In [133]:
print("Mejores parámetros:", grid_search_oil.best_params_)
print("Mejor puntaje:", grid_search_oil.best_score_)
best_tuning_oil_set = grid_search_oil.best_estimator_

Mejores parámetros: {'model__max_features': 'log2', 'model__min_samples_leaf': 5, 'model__n_estimators': 2000}
Mejor puntaje: 0.928466112317814


In [134]:
resultados_oil_tuning_st = {}
index = 0
for train_index, test_index in rk_tuning.split(ventas_oil_X_train):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_tuning_train, X_tuning_test = ventas_oil_X_train.iloc[train_index], ventas_oil_X_train.iloc[test_index]
    y_tuning_train, y_tuning_test = ventas_oil_y_train.iloc[train_index], ventas_oil_y_train.iloc[test_index]

    with parallel_backend('threading', n_jobs=8):
        best_tuning_oil_set.fit(X_tuning_train, y_tuning_train)
    
    y_pred = best_tuning_oil_set.predict(X_tuning_test)
    rmse_value = rmse(y_tuning_test, y_pred)
    mae_value = mae(y_tuning_test, y_pred)
    rsq_value = rsq(y_tuning_test, y_pred)
    metricS = {'RMSE': rmse_value, 'MAE': mae_value, 'R^2': rsq_value}
    index += 1
    resultados_oil_tuning_st[f'Iteración_{index}'] = {'RMSE': rmse_value,
                                      'MAE': mae_value,
                                      'R^2': rsq_value}

TRAIN: [    0     1     2 ... 85756 85757 85758] TEST: [    6    15    17 ... 85716 85719 85731]
TRAIN: [    0     1     4 ... 85755 85757 85758] TEST: [    2     3     9 ... 85740 85747 85756]
TRAIN: [    1     2     3 ... 85752 85756 85758] TEST: [    0     7    10 ... 85754 85755 85757]
TRAIN: [    0     2     3 ... 85756 85757 85758] TEST: [    1     4     5 ... 85750 85751 85752]
TRAIN: [    0     1     2 ... 85755 85756 85757] TEST: [   11    16    20 ... 85737 85749 85758]


In [135]:
rf_pipeline_tuniing_elec = Pipeline(steps=[('preprocessor', preprocessor_elec), ('model', rf_model_trial)])
param_grid = {
    'model__n_estimators': [1100,1300,1500,1700,1900,2000],
    'model__min_samples_leaf': [5, 10,15,20],
    'model__max_features': ['0.5', 'sqrt', 'log2']
}
grid_search_elec = GridSearchCV(rf_pipeline_tuniing_elec, param_grid, cv=5, n_jobs=8)
grid_search_elec.fit(ventas_elec_X_train, ventas_elec_y_train) 

240 fits failed out of a total of 480.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
60 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Pabma\AppData\Roaming\Python\Python312\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Pabma\AppData\Roaming\Python\Python312\site-packages\sklearn\base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Pabma\AppData\Roaming\Python\Python312\site-packages\sklearn\pipeline.py", line 475, in fit
    self._final_estimator.fit(Xt, y, **last_step_params["fit"])
  File "C:\U

In [136]:
guardar_elec = {'grid_search_elec':grid_search_elec}
with open('guardar_elec.pkl', 'wb') as f:
    pickle.dump(guardar_elec, f)

In [137]:
print("Resultados de la búsqueda de cuadrícula:")
print(grid_search_elec.cv_results_)

Resultados de la búsqueda de cuadrícula:
{'mean_fit_time': array([1.34588718e-02, 1.42663002e-02, 1.32784843e-02, 1.25725746e-02,
       1.22159958e-02, 1.19928360e-02, 1.36389256e-02, 1.23281002e-02,
       1.29244804e-02, 1.23542786e-02, 1.31314754e-02, 1.18512154e-02,
       1.24299526e-02, 1.24592781e-02, 1.22796535e-02, 1.23634815e-02,
       1.19172096e-02, 1.22150898e-02, 1.23830795e-02, 1.25838757e-02,
       1.18564129e-02, 1.22397900e-02, 1.18177414e-02, 1.17979050e-02,
       8.00683403e+00, 9.84719782e+00, 1.15842411e+01, 1.32931706e+01,
       1.48807493e+01, 1.57275941e+01, 7.43251243e+00, 8.86556978e+00,
       1.03070875e+01, 1.16142831e+01, 1.30067406e+01, 1.37361750e+01,
       6.93841515e+00, 8.30231738e+00, 9.58128872e+00, 1.08988356e+01,
       1.21147258e+01, 1.27848146e+01, 6.62429104e+00, 7.92231536e+00,
       9.02166386e+00, 1.02748506e+01, 1.13646822e+01, 1.20870569e+01,
       8.93666239e+00, 1.05278759e+01, 1.20799730e+01, 1.37570792e+01,
       1.52043847e

In [138]:
print("Mejores parámetros:", grid_search_elec.best_params_)
best_tuning_elec_set = grid_search_elec.best_estimator_

Mejores parámetros: {'model__max_features': 'sqrt', 'model__min_samples_leaf': 5, 'model__n_estimators': 1500}


In [7]:
resultados_elec_tuning_st = {}
index = 0
for train_index, test_index in rk_tuning.split(ventas_elec_X_train):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_tuning_train, X_tuning_test = ventas_elec_X_train.iloc[train_index], ventas_elec_X_train.iloc[test_index]
    y_tuning_train, y_tuning_test = ventas_elec_y_train.iloc[train_index], ventas_elec_y_train.iloc[test_index]

    with parallel_backend('threading', n_jobs=8):
        best_tuning_elec_set.fit(X_tuning_train, y_tuning_train)
    
    y_pred = best_tuning_elec_set.predict(X_tuning_test)
    rmse_value = rmse(y_tuning_test, y_pred)
    mae_value = mae(y_tuning_test, y_pred)
    rsq_value = rsq(y_tuning_test, y_pred)
    metricS = {'RMSE': rmse_value, 'MAE': mae_value, 'R^2': rsq_value}
    index += 1
    resultados_elec_tuning_st[f'Iteración_{index}'] = {'RMSE': rmse_value,
                                      'MAE': mae_value,
                                      'R^2': rsq_value}

NameError: name 'rk_tuning' is not defined

In [140]:
print("Mejores parametros: ", grid_search_oil.best_params_)
for key, value in resultados_oil_tuning_st.items():
    print(key, ": ", value)
    print()

Mejores parametros:  {'model__max_features': 'log2', 'model__min_samples_leaf': 5, 'model__n_estimators': 2000}
Iteración_1 :  {'RMSE': 0.08809800794287795, 'MAE': 0.05649437128595136, 'R^2': 0.9288025503438677}

Iteración_2 :  {'RMSE': 0.08911960407077332, 'MAE': 0.05695497543719061, 'R^2': 0.9269369948858782}

Iteración_3 :  {'RMSE': 0.08713903916718434, 'MAE': 0.05607962305699682, 'R^2': 0.9310023273834533}

Iteración_4 :  {'RMSE': 0.0873998847720626, 'MAE': 0.0566779443161111, 'R^2': 0.9294281311011914}

Iteración_5 :  {'RMSE': 0.091524524067106, 'MAE': 0.05727053988610467, 'R^2': 0.9248107338849252}



In [141]:
print("Mejores parametros: ", grid_search_elec.best_params_)
for key, value in resultados_elec_tuning_st.items():
    print(key, ": ", value)
    print()

Mejores parametros:  {'model__max_features': 'sqrt', 'model__min_samples_leaf': 5, 'model__n_estimators': 1500}
Iteración_1 :  {'RMSE': 0.04895933393170097, 'MAE': 0.03120137298976637, 'R^2': 0.9439889423167549}

Iteración_2 :  {'RMSE': 0.04747556013599743, 'MAE': 0.03164547619184308, 'R^2': 0.9474099616066924}

Iteración_3 :  {'RMSE': 0.04967437570811596, 'MAE': 0.03098151520598413, 'R^2': 0.9398837538770731}

Iteración_4 :  {'RMSE': 0.04469746742151344, 'MAE': 0.030208222007371936, 'R^2': 0.9511321848200518}

Iteración_5 :  {'RMSE': 0.04820645864982604, 'MAE': 0.03199020776807696, 'R^2': 0.941363824831054}



## Segundo tuneado

In [143]:
rf_pipeline_tuning_oil_nd = Pipeline(steps=[('preprocessor', preprocessor_oil), ('model', rf_model_trial)])
param_grid = {
    'model__n_estimators': [1800,1900,2000,2100,2200],
    'model__min_samples_leaf': [3,4,5,6,7],
    'model__max_features': ['log2']
}
grid_search_oil_nd = GridSearchCV(rf_pipeline_tuning_oil_nd, param_grid, cv=5, n_jobs=5)
grid_search_oil_nd.fit(ventas_oil_X_train, ventas_oil_y_train)

In [144]:
print("Resultados de la búsqueda de cuadrícula:")
print(grid_search_oil_nd.cv_results_)

Resultados de la búsqueda de cuadrícula:
{'mean_fit_time': array([234.82482719, 253.63339939, 264.49956365, 278.49793749,
       291.40483422, 224.98792229, 237.5175118 , 251.52051969,
       269.20505342, 279.96458125, 212.36064777, 225.8152967 ,
       236.70819201, 243.20851035, 249.49319367, 215.69014344,
       223.79894338, 233.33850589, 234.40938911, 245.94596882,
       196.9048944 , 206.55742745, 227.8813334 , 238.91945372,
       246.96652894]), 'std_fit_time': array([1.21100279, 1.24920186, 1.09747043, 1.93078853, 1.16450412,
       1.52228969, 1.83258751, 1.97771862, 1.61472769, 1.64199403,
       0.61900374, 1.57832767, 1.30644849, 0.86161743, 3.01841903,
       1.58262754, 3.03301405, 1.75727583, 0.71942495, 1.81699901,
       1.09560568, 1.48159946, 2.58806354, 1.60398075, 3.51107015]), 'mean_score_time': array([11.20181789,  6.13307047,  6.45789151,  6.88849335,  6.94888406,
        5.28367333,  5.49224644,  5.84411483,  6.12690735,  6.49308529,
        4.84740357,  5.2

In [145]:
print("Mejores parámetros:", grid_search_oil_nd.best_params_)
print("Mejor puntaje:", grid_search_oil_nd.best_score_)
best_tuning_oil_set_nd = grid_search_oil_nd.best_estimator_

Mejores parámetros: {'model__max_features': 'log2', 'model__min_samples_leaf': 3, 'model__n_estimators': 1900}
Mejor puntaje: 0.931394125046087


In [146]:
resultados_oil_tuning_nd = {}
index = 0
for train_index, test_index in rk_tuning.split(ventas_oil_X_train):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_tuning_train, X_tuning_test = ventas_oil_X_train.iloc[train_index], ventas_oil_X_train.iloc[test_index]
    y_tuning_train, y_tuning_test = ventas_oil_y_train.iloc[train_index], ventas_oil_y_train.iloc[test_index]

    with parallel_backend('threading', n_jobs=5):
        best_tuning_oil_set_nd.fit(X_tuning_train, y_tuning_train)
    
    y_pred = best_tuning_oil_set_nd.predict(X_tuning_test)
    rmse_value = rmse(y_tuning_test, y_pred)
    mae_value = mae(y_tuning_test, y_pred)
    rsq_value = rsq(y_tuning_test, y_pred)
    metricS = {'RMSE': rmse_value, 'MAE': mae_value, 'R^2': rsq_value}
    index += 1
    resultados_oil_tuning_nd[f'Iteración_{index}'] = {'RMSE': rmse_value,
                                      'MAE': mae_value,
                                      'R^2': rsq_value}

TRAIN: [    0     1     2 ... 85756 85757 85758] TEST: [    6    15    17 ... 85716 85719 85731]
TRAIN: [    0     1     4 ... 85755 85757 85758] TEST: [    2     3     9 ... 85740 85747 85756]
TRAIN: [    1     2     3 ... 85752 85756 85758] TEST: [    0     7    10 ... 85754 85755 85757]
TRAIN: [    0     2     3 ... 85756 85757 85758] TEST: [    1     4     5 ... 85750 85751 85752]
TRAIN: [    0     1     2 ... 85755 85756 85757] TEST: [   11    16    20 ... 85737 85749 85758]


In [147]:
rf_pipeline_tuning_elec_nd = Pipeline(steps=[('preprocessor', preprocessor_elec), ('model', rf_model_trial)])
param_grid = {
    'model__n_estimators': [1300,1400,1500,1600,1700],
    'model__min_samples_leaf': [3,4,5,6,7],
    'model__max_features': ['sqrt']
}
grid_search_elec_nd = GridSearchCV(rf_pipeline_tuning_elec_nd, param_grid, cv=5, n_jobs=5)
grid_search_elec_nd.fit(ventas_elec_X_train, ventas_elec_y_train)

In [148]:
print("Resultados de la búsqueda de cuadrícula:")
print(grid_search_elec_nd.cv_results_)


Resultados de la búsqueda de cuadrícula:
{'mean_fit_time': array([ 9.89314871, 10.48327651, 11.44295397, 12.08632946, 12.84138865,
        9.19881864,  9.91329169, 10.65885816, 11.34997702, 12.06432238,
        8.7890049 ,  9.38367233, 10.12871146, 10.70951748, 11.48932796,
        8.34087415,  8.9439363 ,  9.65417051, 10.30356231, 10.82806263,
        8.02683072,  8.70934834,  9.3871336 , 10.10021248, 10.27100906]), 'std_fit_time': array([0.10380185, 0.122901  , 0.10328397, 0.12833504, 0.05051627,
       0.12386855, 0.08635499, 0.1491254 , 0.13402633, 0.12069424,
       0.05679481, 0.10894742, 0.09703761, 0.07718299, 0.08723705,
       0.09226601, 0.16592635, 0.09410045, 0.14566082, 0.10234986,
       0.04163289, 0.08842414, 0.06360024, 0.07606714, 0.23948015]), 'mean_score_time': array([0.23639193, 0.24558673, 0.26019921, 0.28329883, 0.299228  ,
       0.21072454, 0.22465091, 0.24891138, 0.26540623, 0.27627435,
       0.20422564, 0.21904821, 0.22964602, 0.24063668, 0.27470198,
      

In [149]:
print("Mejores parámetros:", grid_search_elec_nd.best_params_)
print("Mejor puntaje:", grid_search_elec_nd.best_score_)
best_tuning_elec_set_nd = grid_search_elec_nd.best_estimator_

Mejores parámetros: {'model__max_features': 'sqrt', 'model__min_samples_leaf': 3, 'model__n_estimators': 1700}
Mejor puntaje: 0.9490288318842278


In [3]:
resultados_elec_tuning_nd = {}
index = 0
for train_index, test_index in rk_tuning.split(ventas_elec_X_train):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_tuning_train, X_tuning_test = ventas_elec_X_train.iloc[train_index], ventas_elec_X_train.iloc[test_index]
    y_tuning_train, y_tuning_test = ventas_elec_y_train.iloc[train_index], ventas_elec_y_train.iloc[test_index]

    with parallel_backend('threading', n_jobs=5):
        best_tuning_elec_set_nd.fit(X_tuning_train, y_tuning_train)
    
    y_pred = best_tuning_elec_set_nd.predict(X_tuning_test)
    rmse_value = rmse(y_tuning_test, y_pred)
    mae_value = mae(y_tuning_test, y_pred)
    rsq_value = rsq(y_tuning_test, y_pred)
    metricS = {'RMSE': rmse_value, 'MAE': mae_value, 'R^2': rsq_value}
    index += 1
    resultados_elec_tuning_nd[f'Iteración_{index}'] = {'RMSE': rmse_value,
                                      'MAE': mae_value,
                                      'R^2': rsq_value}

NameError: name 'rk_tuning' is not defined

In [4]:
print("Segundo tuneado oil:")
print("Mejor puntaje:", grid_search_oil_nd.best_score_)
print("Mejores parámetros:", grid_search_oil_nd.best_params_) 

for key, value in resultados_oil_tuning_nd.items():
    print(key, ": ", value)
    print()

Segundo tuneado oil:


NameError: name 'grid_search_oil_nd' is not defined

In [152]:
print("Segundo tuneado elec:")
print("Mejor puntaje:", grid_search_elec_nd.best_score_)
print("Mejores parámetros:", grid_search_elec_nd.best_params_)

for key, value in resultados_elec_tuning_nd.items():
    print(key, ": ", value)
    print()

Segundo tuneado elec:
Mejor puntaje: 0.9490288318842278
Mejores parámetros: {'model__max_features': 'sqrt', 'model__min_samples_leaf': 3, 'model__n_estimators': 1700}
Iteración_1 :  {'RMSE': 0.0470486477503964, 'MAE': 0.029361412495659024, 'R^2': 0.9482754092186169}

Iteración_2 :  {'RMSE': 0.0439286083276847, 'MAE': 0.02911596092107306, 'R^2': 0.9549745374747369}

Iteración_3 :  {'RMSE': 0.04767611005615244, 'MAE': 0.028705085985454352, 'R^2': 0.9446230993180065}

Iteración_4 :  {'RMSE': 0.042313960027203855, 'MAE': 0.028048077686513607, 'R^2': 0.9562050098924905}

Iteración_5 :  {'RMSE': 0.044965240156886084, 'MAE': 0.02942517472801862, 'R^2': 0.948983693722995}



## Ultimo Assessment

In [153]:
resultados_oil_assessment = {}
index = 0
for train_index, test_index in rk_assessment.split(ventas_oil_X_train):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_assessment_train, X_assessment_test = ventas_oil_X_train.iloc[train_index], ventas_oil_X_train.iloc[test_index]
    y_assessment_train, y_assessment_test = ventas_oil_y_train.iloc[train_index], ventas_oil_y_train.iloc[test_index]

    with parallel_backend('threading', n_jobs=8):
        best_tuning_oil_set_nd.fit(X_assessment_train, y_assessment_train)
    
    y_pred = best_tuning_oil_set_nd.predict(X_assessment_test)
    rmse_value = rmse(y_assessment_test, y_pred)
    mae_value = mae(y_assessment_test, y_pred)
    rsq_value = rsq(y_assessment_test, y_pred)
    metricS = {'RMSE': rmse_value, 'MAE': mae_value, 'R^2': rsq_value}
    index += 1
    resultados_oil_assessment[f'Iteración_{index}'] = {'RMSE': rmse_value,
                                      'MAE': mae_value,
                                      'R^2': rsq_value}

TRAIN: [    0     1     2 ... 85756 85757 85758] TEST: [    6     9    13 ... 85741 85744 85751]
TRAIN: [    1     2     5 ... 85754 85755 85756] TEST: [    0     3     4 ... 85752 85757 85758]
TRAIN: [    0     2     3 ... 85755 85757 85758] TEST: [    1    15    16 ... 85748 85753 85756]
TRAIN: [    0     1     2 ... 85756 85757 85758] TEST: [    5    12    17 ... 85750 85754 85755]
TRAIN: [    0     1     3 ... 85756 85757 85758] TEST: [    2     8    10 ... 85719 85740 85746]


In [154]:
resultados_elec_assessment = {}
index = 0
for train_index, test_index in rk_assessment.split(ventas_elec_X_train):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_assessment_train, X_assessment_test = ventas_elec_X_train.iloc[train_index], ventas_elec_X_train.iloc[test_index]
    y_assessment_train, y_assessment_test = ventas_elec_y_train.iloc[train_index], ventas_elec_y_train.iloc[test_index]

    with parallel_backend('threading', n_jobs=8):
        best_tuning_elec_set_nd.fit(X_assessment_train, y_assessment_train)
    
    y_pred = best_tuning_elec_set_nd.predict(X_assessment_test)
    rmse_value = rmse(y_assessment_test, y_pred)
    mae_value = mae(y_assessment_test, y_pred)
    rsq_value = rsq(y_assessment_test, y_pred)
    metricS = {'RMSE': rmse_value, 'MAE': mae_value, 'R^2': rsq_value}
    index += 1
    resultados_elec_assessment[f'Iteración_{index}'] = {'RMSE': rmse_value,
                                      'MAE': mae_value,
                                      'R^2': rsq_value}

TRAIN: [   1    2    3 ... 7319 7320 7322] TEST: [   0    6    7 ... 7301 7312 7321]
TRAIN: [   0    2    3 ... 7314 7320 7321] TEST: [   1    9   13 ... 7318 7319 7322]
TRAIN: [   0    1    2 ... 7319 7321 7322] TEST: [   4    5   15 ... 7309 7311 7320]
TRAIN: [   0    1    2 ... 7320 7321 7322] TEST: [  11   16   24 ... 7307 7310 7313]
TRAIN: [   0    1    4 ... 7320 7321 7322] TEST: [   2    3    8 ... 7296 7302 7314]


In [155]:
print("Mejores parámetros:", grid_search_oil_nd.best_params_) 
for key, value in resultados_oil_assessment.items():
    print(key, ": ", value)
    print()

Mejores parámetros: {'model__max_features': 'log2', 'model__min_samples_leaf': 3, 'model__n_estimators': 1900}
Iteración_1 :  {'RMSE': 0.08821409664571328, 'MAE': 0.05569273063877018, 'R^2': 0.9290469826154446}

Iteración_2 :  {'RMSE': 0.08675462825374951, 'MAE': 0.05498029337255085, 'R^2': 0.931361150098708}

Iteración_3 :  {'RMSE': 0.0865356808491811, 'MAE': 0.05442829717590075, 'R^2': 0.9309121048678863}

Iteración_4 :  {'RMSE': 0.08601951167050323, 'MAE': 0.05464794327208654, 'R^2': 0.9329923245649012}

Iteración_5 :  {'RMSE': 0.08587425084144491, 'MAE': 0.05489417404749849, 'R^2': 0.9324882815606308}



In [156]:
print("Mejores parámetros:", grid_search_elec_nd.best_params_) 
for key, value in resultados_elec_assessment.items():
    print(key, ": ", value)
    print()

Mejores parámetros: {'model__max_features': 'sqrt', 'model__min_samples_leaf': 3, 'model__n_estimators': 1700}
Iteración_1 :  {'RMSE': 0.04499665573229906, 'MAE': 0.029650631941984154, 'R^2': 0.9538239469089596}

Iteración_2 :  {'RMSE': 0.04080499114344574, 'MAE': 0.026961873754190558, 'R^2': 0.9591788881872205}

Iteración_3 :  {'RMSE': 0.04757851014837482, 'MAE': 0.029522469294197235, 'R^2': 0.9482039743093637}

Iteración_4 :  {'RMSE': 0.04202303839746237, 'MAE': 0.027959673329648296, 'R^2': 0.9543264495841562}

Iteración_5 :  {'RMSE': 0.04886788392966534, 'MAE': 0.030353895733188488, 'R^2': 0.9406958017678045}



## Validacion con Test

In [157]:
y_pred_tuning_nd_oil = best_tuning_oil_set_nd.predict(ventas_oil_X_test)
rmse_value_nd = rmse(ventas_oil_y_test, y_pred_tuning_nd_oil)
mae_value_nd = mae(ventas_oil_y_test, y_pred_tuning_nd_oil)
rsq_value_nd = rsq(ventas_oil_y_test, y_pred_tuning_nd_oil)
resultados_oil_test = {'RMSE': rmse_value_nd, 'MAE': mae_value_nd, 'R^2': rsq_value_nd}

In [158]:
y_pred_tuning_nd_elec = best_tuning_elec_set_nd.predict(ventas_elec_X_test)
rmse_value_nd = rmse(ventas_elec_y_test, y_pred_tuning_nd_elec)
mae_value_nd = mae(ventas_elec_y_test, y_pred_tuning_nd_elec)
rsq_value_nd = rsq(ventas_elec_y_test, y_pred_tuning_nd_elec)
resultados_elec_test = {'RMSE': rmse_value_nd, 'MAE': mae_value_nd, 'R^2': rsq_value_nd}

In [6]:
print("Segundo tuneado oil:")
print("Mejor puntaje:", grid_search_oil_nd.best_score_)
print("Mejores parámetros:", grid_search_oil_nd.best_params_) 
print("Resultados test",resultados_oil_test) 

Segundo tuneado oil:
Mejor puntaje: 0.931394125046087
Mejores parámetros: {'model__max_features': 'log2', 'model__min_samples_leaf': 3, 'model__n_estimators': 1900}
Resultados test {'RMSE': 0.08570085781194274, 'MAE': 0.054603629284368994, 'R^2': 0.931308123325886}


In [5]:
print('Segundo tuneado elec:')
print("Mejor puntaje:", grid_search_elec_nd.best_score_)
print("Mejores parámetros:", grid_search_elec_nd.best_params_)
print("Resultados test", resultados_elec_test)

Segundo tuneado elec:
Mejor puntaje: 0.9490288318842278
Mejores parámetros: {'model__max_features': 'sqrt', 'model__min_samples_leaf': 3, 'model__n_estimators': 1700}
Resultados test {'RMSE': 0.05339704730472801, 'MAE': 0.02944376747845678, 'R^2': 0.9335293438408032}


In [4]:
import pandas as pd

# Define los datos para la nueva entrada
nueva_entrada = {
    'price_amount': 25000.0,
    'makeid': 102,  # Suponiendo que 'makeid' es el fabricante del vehículo
    'manufacturerprice': 27000.0,
    'km': 50000.0,
    'vehicleyear': 2018,
    'etiqueta_type_id': 1,  # Suponiendo que 'etiqueta_type_id' es el tipo de vehículo
    'provinceid': 14,
    'horsepower': 200.0,
    'maxspeed': 180.0,
    'acceleration': 8.0,
    'combustible_type_id': 2,
    'body_type_id': 'SUV',  # Suponiendo que 'body_type_id' es el tipo de carrocería
    'transmision_type_id': 2,
    'doors': 4.0,
    'seatingcapacity': 5.0,
    'colores_type_id': 9,
    'dimensionsinmillimeterswidth': 1800.0,
    'dimensionsinmillimetersheight': 1600.0,
    'dimensionsinmillimeterslength': 4500.0,
    'weight': 1500.0,
    'tankcapacityinliters': 60.0,
    'trunkcapacityinliters': 500.0,
    'consumptionurban': 10.0,
    'consumptionmixed': 8.0,
    'consumptionextraurban': 6.0,
    'co2emissionsgramsperkm': 150.0,
    'lPrice': 4.39794
}

# Crea un DataFrame con una sola fila que contiene la nueva entrada
nueva_entrada = pd.DataFrame([nueva_entrada])

# Imprime el nuevo DataFrame
print(nueva_entrada)


   price_amount  makeid  manufacturerprice       km  vehicleyear  \
0       25000.0     102            27000.0  50000.0         2018   

   etiqueta_type_id  provinceid  horsepower  maxspeed  acceleration  ...  \
0                 1          14       200.0     180.0           8.0  ...   

   dimensionsinmillimetersheight dimensionsinmillimeterslength  weight  \
0                         1600.0                        4500.0  1500.0   

   tankcapacityinliters  trunkcapacityinliters  consumptionurban  \
0                  60.0                  500.0              10.0   

   consumptionmixed  consumptionextraurban  co2emissionsgramsperkm   lPrice  
0               8.0                    6.0                   150.0  4.39794  

[1 rows x 27 columns]


In [5]:
oil_car_test_features = nueva_entrada.drop(columns=['price_amount', 'lPrice'])
oil_car_test_price = nueva_entrada['lPrice']
oil_car_test_prediction = best_tuning_oil_set_nd.predict(oil_car_test_features)

In [6]:
print("Predicción de precio de un carro de gasolina:", oil_car_test_prediction)
print("Precio: ", 10 ** oil_car_test_prediction)

Predicción de precio de un carro de gasolina: [4.31652206]
Precio:  [20726.31337974]


In [6]:
variables_a_guardar = {'ventas_oil':ventas_oil,
                        'ventas_elec':ventas_elec,
                        'best_tuning_oil_set_nd':best_tuning_oil_set_nd,
                        'best_tuning_elec_set_nd':best_tuning_elec_set_nd
                       }

# Guardar las variables en un archivo pickle
with open('predictors.pkl', 'wb') as f:
    pickle.dump(variables_a_guardar, f)

In [3]:
rf_model_oil_importance = best_tuning_oil_set_nd.named_steps['model']
if hasattr(rf_model_oil_importance, 'feature_importances_'):
    # Obtiene la importancia de las características
    importances = rf_model_oil_importance.feature_importances_

    # Ordena las importancias de las características de mayor a menor
    indices = np.argsort(importances)[::-1]

    # Imprime las características más importantes
    print("Características más importantes:")
    for i in range(len(indices)):
        print(f"{i + 1}. {ventas_oil_X_train.columns[indices[i]]}: {importances[indices[i]]}")
else:
    print("El modelo no tiene el atributo 'feature_importances_'.")

Características más importantes:
1. vehicleyear: 0.1941754753362542
2. manufacturerprice: 0.15029040677412808
3. makeid: 0.12771805924028953
4. km: 0.12378558389906225
5. etiqueta_type_id: 0.07418488746026126
6. horsepower: 0.06718313567192555
7. combustible_type_id: 0.0503404752320984
8. weight: 0.032705878812050684
9. doors: 0.03086955957216426
10. provinceid: 0.02907521292983948
11. dimensionsinmillimeterswidth: 0.023084848898744324
12. transmision_type_id: 0.018025772451002325
13. body_type_id: 0.017674499421375965
14. seatingcapacity: 0.017201049320928825
15. dimensionsinmillimetersheight: 0.014312032229926235
16. dimensionsinmillimeterslength: 0.013612679864940294
17. colores_type_id: 0.01056829217681007
18. acceleration: 0.0028244462365319242
19. maxspeed: 0.0023677044716663826


In [162]:
rf_model_elec_importance = best_tuning_elec_set_nd.named_steps['model']
if hasattr(rf_model_elec_importance, 'feature_importances_'):
    # Obtiene la importancia de las características
    importances = rf_model_elec_importance.feature_importances_

    # Ordena las importancias de las características de mayor a menor
    indices = np.argsort(importances)[::-1]

    # Imprime las características más importantes
    print("Características más importantes:")
    for i in range(len(indices)):
        print(f"{i + 1}. {ventas_elec_X_train.columns[indices[i]]}: {importances[indices[i]]}")

Características más importantes:
1. manufacturerprice: 0.14590037977597087
2. dimensionsinmillimeterswidth: 0.11418251510892746
3. doors: 0.10232250001170225
4. etiqueta_type_id: 0.09707585311273156
5. combustible_type_id: 0.087439658492359
6. vehicleyear: 0.05591460260703096
7. horsepower: 0.04988319378863884
8. provinceid: 0.04892910512742021
9. dimensionsinmillimeterslength: 0.04523258703904065
10. dimensionsinmillimetersheight: 0.035965217612232224
11. transmision_type_id: 0.03367045775524756
12. makeid: 0.032262570640881116
13. km: 0.030164525830323193
14. seatingcapacity: 0.029713183385190287
15. trunkcapacityinliters: 0.01750861630477506
16. weight: 0.015875727645954733
17. consumptionmixed: 0.013342558647403557
18. colores_type_id: 0.011415137220752969
19. body_type_id: 0.010544954792037882
20. tankcapacityinliters: 0.010288004563681383
21. consumptionurban: 0.007974114488460714
22. acceleration: 0.003197777206562353
23. maxspeed: 0.0011967588426751346


In [1]:
# Crear un diccionario con las variables que deseas guardar
variables_a_guardar = {'ventas_elec_X_train':ventas_elec_X_train,
                        'ventas_elec_y_train':ventas_elec_y_train,
                        'ventas_oil_X_train':ventas_oil_X_train,
                        'ventas_oil_y_train':ventas_oil_y_train,
                        'ventas_elec_X_test':ventas_elec_X_test,
                        'ventas_elec_y_test':ventas_elec_y_test,
                        'ventas_oil_X_test':ventas_oil_X_test,
                        'ventas_oil_y_test':ventas_oil_y_test,
                        'rk_assessment': rk_assessment,
                        'rk_tuning': rk_tuning,
                        'rk_trial': rk_trial,
                        'resultados_elec_trial':resultados_elec_trial,
                        'resultados_oil_trial':resultados_oil_trial,
                        'grid_search_oil':grid_search_oil,
                        'best_tuning_oil_set':best_tuning_oil_set,
                        'grid_search_elec':grid_search_elec,
                        'best_tuning_elec_set':best_tuning_elec_set,
                        'grid_search_oil_nd':grid_search_oil_nd,
                        'best_tuning_oil_set_nd':best_tuning_oil_set_nd,
                        'grid_search_elec_nd':grid_search_elec_nd,
                        'best_tuning_elec_set_nd':best_tuning_elec_set_nd,
                        'resultados_oil_tuning_st':resultados_oil_tuning_st,
                        'resultados_elec_tuning_st':resultados_elec_tuning_st,
                        'resultados_oil_tuning_nd':resultados_oil_tuning_nd,
                        'resultados_elec_tuning_nd':resultados_elec_tuning_nd,
                        'resultados_oil_test':resultados_oil_test,
                        'resultados_elec_test':resultados_elec_test,
                       }

# Guardar las variables en un archivo pickle
with open('modeling.pkl', 'wb') as f:
    pickle.dump(variables_a_guardar, f)

NameError: name 'ventas_elec_X_train' is not defined