In [1]:
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt 
import seaborn as sns 
%matplotlib inline

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics
from sklearn import svm
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor

from scr.encoder_categoricos import MyLabelEncoder
from scr.comprobaciones_ML import indicativos_regresion
from scr.chequeator import chequeator
from scr.regresion_lineal_non import itera_grados

In [2]:
df = pd.read_csv("diamonds_train.csv", index_col=0)
X_pred = pd.read_csv("diamonds_test.csv", index_col=0)
sample = pd.read_csv("./sample_submission.csv")
df

Unnamed: 0,carat,cut,color,clarity,depth,table,x,y,z,price
0,1.21,Ideal,H,VS2,63.0,57.0,6.73,6.70,4.23,6134
1,0.28,Very Good,D,VVS2,64.0,56.0,4.14,4.17,2.66,532
2,0.42,Premium,F,VS1,61.2,58.0,4.86,4.82,2.96,1103
3,0.26,Ideal,H,IF,61.1,57.0,4.16,4.12,2.53,600
4,1.10,Good,G,SI1,63.4,57.0,6.52,6.55,4.14,4997
...,...,...,...,...,...,...,...,...,...,...
40340,1.55,Premium,H,VS2,61.3,61.0,7.46,7.39,4.55,11708
40341,0.36,Ideal,D,SI1,60.6,56.0,4.58,4.63,2.79,619
40342,0.57,Very Good,I,VS2,62.2,55.0,5.33,5.34,3.32,1267
40343,1.01,Very Good,F,IF,59.6,62.0,6.47,6.56,3.88,9965


In [3]:
df = df.drop(df[df["x"]==0].index)
df = df.drop(df[df["y"]==0].index)
df = df.drop(df[df["z"]==0].index)

In [4]:
df = df[(df["depth"]<75)&(df["depth"]>45)]
df = df[(df["table"]<80)&(df["table"]>40)]
df = df[(df["x"]<30)]
df = df[(df["y"]<30)]
df = df[(df["z"]<30)&(df["z"]>2)]

In [5]:
object_cols = ['cut', 'color', 'clarity']

In [6]:
label_df = df.copy()
label_encoder = LabelEncoder()
for col in object_cols:
    label_df[col] = label_encoder.fit_transform(label_df[col])

In [3]:
from pycaret.regression import *

In [9]:
exp_reg101 = setup(data = df, target = 'price', session_id=123)
lightgbm = create_model('lightgbm')

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,299.4753,337201.3985,580.6904,0.9784,0.1147,0.0879
1,297.8996,325855.9057,570.8379,0.9797,0.1109,0.0861
2,301.8761,321174.4249,566.7225,0.9803,0.1091,0.0841
3,289.4923,288147.889,536.7941,0.9803,0.1144,0.0877
4,282.7795,268439.8562,518.1118,0.9829,0.1096,0.0848
5,295.9139,312576.6004,559.0855,0.9814,0.1105,0.0859
6,299.2888,321918.693,567.3788,0.9806,0.1115,0.085
7,288.5127,290223.5302,538.724,0.9813,0.1105,0.0856
8,292.3246,316785.1957,562.8367,0.9805,0.1171,0.0896
9,299.8691,306247.3882,553.3962,0.9805,0.1138,0.0876


In [10]:
lgbm_params = {'num_leaves': np.arange(10,200,10),
                        'max_depth': [int(x) for x in np.linspace(10, 110, num = 11)],
                        'learning_rate': np.arange(0.1,1,0.1)
                        }

In [11]:
tuned_lightgbm = tune_model(lightgbm, custom_grid = lgbm_params)

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,294.9104,345005.2282,587.3715,0.9779,0.1051,0.081
1,292.4938,327371.3059,572.1637,0.9796,0.103,0.0794
2,294.6055,329242.2746,573.7964,0.9798,0.1002,0.0772
3,279.646,298259.0887,546.131,0.9797,0.1047,0.08
4,275.7224,275705.5801,525.0767,0.9824,0.1012,0.0785
5,290.199,319398.7757,565.1538,0.981,0.1017,0.0784
6,291.0201,317724.3835,563.6705,0.9809,0.1026,0.0772
7,277.9097,283773.5284,532.704,0.9818,0.1006,0.0776
8,285.2187,315667.5759,561.843,0.9806,0.1102,0.0835
9,290.2734,295395.6504,543.5031,0.9812,0.1069,0.0817


In [12]:
evaluate_model(tuned_lightgbm)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…

In [13]:
predict_model(tuned_lightgbm)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Light Gradient Boosting Machine,278.9196,297012.7778,544.9888,0.981,0.1018,0.0787


Unnamed: 0,carat,depth,table,x,z,cut_Fair,cut_Good,cut_Ideal,cut_Premium,cut_Very Good,...,clarity_I1,clarity_IF,clarity_SI1,clarity_SI2,clarity_VS1,clarity_VS2,clarity_VVS1,clarity_VVS2,price,Label
0,0.40,61.099998,58.0,4.73,2.90,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,666.0,768.625715
1,0.32,61.700001,58.0,4.38,2.71,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,645.0,737.319992
2,0.71,62.599998,57.0,5.64,3.55,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1626.0,1903.888304
3,0.46,62.000000,58.0,4.95,3.08,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1294.0,1362.778787
4,1.55,62.099998,56.0,7.36,4.59,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,11869.0,11166.831757
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12091,0.32,61.299999,55.0,4.41,2.71,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,846.0,779.584026
12092,0.33,61.099998,59.0,4.45,2.71,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,631.0,611.786234
12093,0.40,62.900002,58.0,4.71,2.95,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1050.0,1036.323912
12094,0.73,62.200001,56.0,5.79,3.58,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,4252.0,4643.475899


In [14]:
final_lightgbm = finalize_model(tuned_lightgbm)

In [15]:
predict_model(final_lightgbm)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Light Gradient Boosting Machine,220.2115,141855.9239,376.6377,0.9909,0.0932,0.072


Unnamed: 0,carat,depth,table,x,z,cut_Fair,cut_Good,cut_Ideal,cut_Premium,cut_Very Good,...,clarity_I1,clarity_IF,clarity_SI1,clarity_SI2,clarity_VS1,clarity_VS2,clarity_VVS1,clarity_VVS2,price,Label
0,0.40,61.099998,58.0,4.73,2.90,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,666.0,777.179537
1,0.32,61.700001,58.0,4.38,2.71,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,645.0,720.725481
2,0.71,62.599998,57.0,5.64,3.55,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1626.0,1817.120224
3,0.46,62.000000,58.0,4.95,3.08,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1294.0,1370.838697
4,1.55,62.099998,56.0,7.36,4.59,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,11869.0,11343.930153
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12091,0.32,61.299999,55.0,4.41,2.71,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,846.0,768.791904
12092,0.33,61.099998,59.0,4.45,2.71,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,631.0,611.546545
12093,0.40,62.900002,58.0,4.71,2.95,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1050.0,1029.686216
12094,0.73,62.200001,56.0,5.79,3.58,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,4252.0,4667.277902


In [21]:
save_model(final_lightgbm,'Final LightGBM Model 19Jun2021')

Transformation Pipeline and Model Succesfully Saved


(Pipeline(memory=None,
          steps=[('dtypes',
                  DataTypes_Auto_infer(categorical_features=[],
                                       display_types=True, features_todrop=[],
                                       id_columns=[], ml_usecase='regression',
                                       numerical_features=[], target='price',
                                       time_features=[])),
                 ('imputer',
                  Simple_Imputer(categorical_strategy='not_available',
                                 fill_value_categorical=None,
                                 fill_value_numerical=None,
                                 numeric_strategy='...
                  LGBMRegressor(boosting_type='gbdt', class_weight=None,
                                colsample_bytree=1.0, importance_type='split',
                                learning_rate=0.1, max_depth=60,
                                min_child_samples=20, min_child_weight=0.001,
                  

In [39]:
saved_final_lightgbm = load_model('Final LightGBM Model 19Jun2021')

Transformation Pipeline and Model Successfully Loaded


In [40]:
evaluate_model(saved_final_lightgbm)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…

In [10]:
unseen_predictions = predict_model(saved_final_lightgbm, data=X_pred)
unseen_predictions.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,x,y,z,Label
0,0.3,Ideal,H,SI2,60.0,56.0,4.41,4.43,2.65,474.506973
1,0.34,Ideal,D,IF,62.1,57.0,4.52,4.46,2.79,1778.493492
2,1.57,Very Good,I,VS2,60.3,58.0,7.58,7.55,4.56,9580.096575
3,0.31,Ideal,H,VS2,61.8,57.0,4.32,4.36,2.68,578.936847
4,1.51,Good,I,VVS1,64.0,60.0,7.26,7.21,4.63,9739.261787


In [22]:
sample

Unnamed: 0,id,price
0,0,12132
1,1,11786
2,2,14684
3,3,15425
4,4,6724
...,...,...
13444,13444,8197
13445,13445,10757
13446,13446,8084
13447,13447,1948


In [11]:
respuesta2 = unseen_predictions['Label'].values

In [12]:
respuesta2

array([  474.506973  ,  1778.49349206,  9580.09657537, ...,
       14626.55779381, 10531.47553068,   726.91700216])

In [15]:
submission2

Unnamed: 0,id,price
0,0,474.506973
1,1,1778.493492
2,2,9580.096575
3,3,578.936847
4,4,9739.261787
...,...,...
13444,13444,4587.898948
13445,13445,465.256228
13446,13446,14626.557794
13447,13447,10531.475531


In [16]:
chequeator(submission2, sample)

You're ready to submit!


In [17]:
submission2.to_csv('resultado_2.csv', index=False)

In [14]:
saved_final_rf = load_model('Final RF Model 19Jun2021')

Transformation Pipeline and Model Successfully Loaded


In [28]:
rf_tuned = tune_model(rf)

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,415.4551,573565.5536,757.3411,0.9632,0.148,0.1157
1,405.7234,559425.3239,747.9474,0.9651,0.1414,0.1103
2,420.1695,602490.4191,776.2026,0.9631,0.1478,0.1141
3,400.5149,526034.9215,725.2826,0.9641,0.1491,0.1153
4,385.8574,478992.9462,692.0932,0.9694,0.1434,0.1114
5,393.0545,516217.1704,718.4825,0.9693,0.1379,0.1084
6,416.092,571132.5262,755.7331,0.9656,0.1452,0.1134
7,394.2237,505148.1433,710.7377,0.9675,0.1431,0.1127
8,405.2956,553809.2083,744.1836,0.9659,0.1491,0.1164
9,414.5354,564374.8803,751.2489,0.9641,0.1497,0.1174


In [15]:
evaluate_model(saved_final_rf)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…

In [33]:
predict_model(rf_tuned)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Random Forest Regressor,392.5546,515163.0414,717.7486,0.967,0.1435,0.1127


Unnamed: 0,carat,depth,table,x,z,cut_Fair,cut_Good,cut_Ideal,cut_Premium,cut_Very Good,...,clarity_I1,clarity_IF,clarity_SI1,clarity_SI2,clarity_VS1,clarity_VS2,clarity_VVS1,clarity_VVS2,price,Label
0,0.40,61.099998,58.0,4.73,2.90,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,666.0,770.316799
1,0.32,61.700001,58.0,4.38,2.71,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,645.0,711.367103
2,0.71,62.599998,57.0,5.64,3.55,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1626.0,2205.959955
3,0.46,62.000000,58.0,4.95,3.08,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1294.0,1277.452454
4,1.55,62.099998,56.0,7.36,4.59,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,11869.0,9906.029950
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12091,0.32,61.299999,55.0,4.41,2.71,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,846.0,710.146476
12092,0.33,61.099998,59.0,4.45,2.71,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,631.0,612.720883
12093,0.40,62.900002,58.0,4.71,2.95,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1050.0,991.849024
12094,0.73,62.200001,56.0,5.79,3.58,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,4252.0,3369.547055


In [34]:
final_rf = finalize_model(rf_tuned)

In [37]:
save_model(final_rf,'Final RF Model 19Jun2021')

Transformation Pipeline and Model Succesfully Saved


(Pipeline(memory=None,
          steps=[('dtypes',
                  DataTypes_Auto_infer(categorical_features=[],
                                       display_types=True, features_todrop=[],
                                       id_columns=[], ml_usecase='regression',
                                       numerical_features=[], target='price',
                                       time_features=[])),
                 ('imputer',
                  Simple_Imputer(categorical_strategy='not_available',
                                 fill_value_categorical=None,
                                 fill_value_numerical=None,
                                 numeric_strategy='...
                  RandomForestRegressor(bootstrap=True, ccp_alpha=0.0,
                                        criterion='mse', max_depth=9,
                                        max_features=1.0, max_leaf_nodes=None,
                                        max_samples=None,
                                   

In [4]:
data_modified = pd.read_csv('./data_modified.csv')
data_pred_modified = pd.read_csv('./data_pred_modified.csv')

In [19]:
exp_reg104 = setup(data = data_modified, target = 'price', session_id=7632, numeric_features = ['cut', 'color', 'clarity'],)
lightgbm2 = create_model('lightgbm')

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,309.1817,336282.5068,579.8987,0.98,0.112,0.0874
1,312.992,337159.7909,580.6546,0.9805,0.1134,0.087
2,287.1673,296743.3715,544.7416,0.9802,0.1111,0.0861
3,290.011,316782.8545,562.8347,0.9788,0.1167,0.0886
4,290.2251,279742.7153,528.9071,0.9832,0.1145,0.0877
5,285.9553,287148.1369,535.8621,0.9817,0.1063,0.0824
6,279.9448,294850.2454,543.0011,0.9796,0.1151,0.0872
7,305.667,384482.8202,620.0668,0.9767,0.1158,0.0879
8,283.3923,261130.9604,511.0097,0.9836,0.1085,0.0837
9,283.8411,285914.8479,534.7101,0.9813,0.1129,0.0872


In [20]:
lgbm_params = {'num_leaves': np.arange(10,200,10),
                        'max_depth': [int(x) for x in np.linspace(10, 110, num = 11)],
                        'learning_rate': np.arange(0.1,1,0.1)
                        }

In [22]:
tuned_lightgbm2 = tune_model(lightgbm2, custom_grid = lgbm_params)

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,297.7038,349761.4529,591.4063,0.9792,0.0991,0.076
1,301.893,325187.3026,570.252,0.9812,0.0992,0.076
2,279.4453,298077.345,545.9646,0.9801,0.0968,0.0738
3,281.3264,317670.962,563.6231,0.9787,0.1014,0.0765
4,275.8417,278639.9605,527.8636,0.9832,0.1,0.0751
5,280.2617,295994.6314,544.0539,0.9811,0.0956,0.0731
6,270.4561,298332.1397,546.1979,0.9793,0.1004,0.0752
7,290.2205,362717.0324,602.2599,0.9781,0.1013,0.0757
8,266.2466,253932.5396,503.9172,0.9841,0.0939,0.0717
9,272.7726,293194.674,541.4745,0.9808,0.099,0.0747


In [23]:
evaluate_model(tuned_lightgbm)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…

In [25]:
predict_model(tuned_lightgbm2)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Light Gradient Boosting Machine,278.046,313369.1271,559.7938,0.9802,0.0989,0.0744


Unnamed: 0,carat,depth,table,x,y,z,shape_ratio,cut_0,cut_1,cut_2,...,clarity_0,clarity_1,clarity_2,clarity_3,clarity_4,clarity_5,clarity_6,clarity_7,price,Label
0,0.34,62.500000,57.000000,4.49,4.47,2.80,0.912000,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,530.0,522.877057
1,0.71,58.099998,58.000000,5.92,5.88,3.43,0.998279,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,3347.0,3554.680419
2,0.50,61.599998,56.000000,5.08,5.11,3.14,0.909091,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2052.0,2083.461957
3,0.38,58.900002,58.000000,4.69,4.81,2.80,0.984720,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1064.0,1035.359528
4,0.34,62.700001,55.000000,4.49,4.48,2.81,0.877193,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,765.0,723.979566
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12099,1.22,62.200001,56.000000,6.84,6.89,4.27,0.900322,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,6541.0,6612.299797
12100,0.92,58.700001,61.000000,6.34,6.43,3.75,1.039182,0.0,0.0,1.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2880.0,3121.794546
12101,0.90,61.900002,55.000000,6.21,6.19,3.84,0.888530,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,4101.0,3679.027892
12102,0.56,61.799999,54.099998,5.31,5.34,3.29,0.875405,0.0,0.0,1.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1576.0,1517.285227


In [27]:
final_lightgbm2 = finalize_model(tuned_lightgbm2)

In [28]:
save_model(final_lightgbm2,'Final lightgbm2 Model 19Jun2021')

Transformation Pipeline and Model Succesfully Saved


(Pipeline(memory=None,
          steps=[('dtypes',
                  DataTypes_Auto_infer(categorical_features=[],
                                       display_types=True, features_todrop=[],
                                       id_columns=['Unnamed: 0'],
                                       ml_usecase='regression',
                                       numerical_features=[], target='price',
                                       time_features=[])),
                 ('imputer',
                  Simple_Imputer(categorical_strategy='not_available',
                                 fill_value_categorical=None,
                                 fill_value_numerical=None,
                                 numeric...
                  LGBMRegressor(boosting_type='gbdt', class_weight=None,
                                colsample_bytree=1.0, importance_type='split',
                                learning_rate=0.1, max_depth=90,
                                min_child_samples=20, mi

In [32]:
unseen_predictions = predict_model(final_lightgbm2, data=data_pred_modified)
unseen_predictions.head()

Unnamed: 0.1,Unnamed: 0,carat,cut,color,clarity,depth,table,x,y,z,shape_ratio,Label
0,0,0.3,4,2,1,60.0,56.0,4.41,4.43,2.65,0.933333,457.673413
1,1,0.34,4,6,7,62.1,57.0,4.52,4.46,2.79,0.917874,1803.326939
2,2,1.57,2,1,3,60.3,58.0,7.58,7.55,4.56,0.961857,9227.28552
3,3,0.31,4,2,3,61.8,57.0,4.32,4.36,2.68,0.92233,570.923453
4,4,1.51,1,1,6,64.0,60.0,7.26,7.21,4.63,0.9375,10021.526522


In [51]:
respuesta3 = unseen_predictions['Label'].values
submission3 = pd.DataFrame({"id": range(len(respuesta3)), "price": respuesta3})
chequeator(submission3, sample)
submission3.to_csv('resultado_3.csv', index=False)

You're ready to submit!


In [45]:
exp_reg104 = setup(data = data_modified, target = 'price', session_id=7632, numeric_features = ['cut', 'color', 'clarity'],)
lightgbm4 = create_model('lightgbm')

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,298.2854,314751.0691,561.0268,0.9813,0.1063,0.0827
1,302.5005,316350.1803,562.4502,0.9817,0.1085,0.0833
2,279.6268,278953.1907,528.1602,0.9813,0.105,0.0817
3,277.825,294303.5833,542.4975,0.9803,0.1084,0.0834
4,279.8434,266478.8198,516.2159,0.984,0.109,0.0831
5,282.084,282551.9714,531.5562,0.982,0.1036,0.0805
6,275.8587,291034.2123,539.4759,0.9798,0.1128,0.0844
7,301.0756,366116.388,605.0755,0.9779,0.1112,0.0848
8,275.4558,246908.6305,496.899,0.9845,0.104,0.0806
9,274.2956,279596.7688,528.7691,0.9817,0.1072,0.0821


In [46]:
lgbm_params = {'num_leaves': np.arange(10,200,10),
                        'max_depth': [int(x) for x in np.linspace(10, 110, num = 11)],
                        'learning_rate': np.arange(0.1,1,0.1)
                        }
tuned_lightgbm4 = tune_model(lightgbm2, custom_grid = lgbm_params)

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,287.6059,319172.684,564.9537,0.981,0.0961,0.0736
1,293.3432,316836.8127,562.8826,0.9817,0.0963,0.0728
2,274.6188,300802.3279,548.4545,0.9799,0.095,0.0725
3,269.6978,297426.3764,545.3681,0.98,0.097,0.0736
4,269.9496,269435.619,519.0719,0.9838,0.0976,0.0724
5,275.2445,281965.8523,531.0046,0.982,0.0925,0.0712
6,267.8414,297861.2799,545.7667,0.9794,0.0983,0.0728
7,288.2303,357264.6258,597.7162,0.9784,0.0999,0.0747
8,262.5876,244725.6226,494.6975,0.9846,0.0931,0.0701
9,265.3748,280458.1158,529.583,0.9817,0.0955,0.0719


In [47]:
evaluate_model(tuned_lightgbm4)
predict_model(tuned_lightgbm4)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Light Gradient Boosting Machine,272.0567,299168.606,546.9631,0.9811,0.0957,0.0721


Unnamed: 0,carat,cut,color,clarity,depth,table,x,y,z,shape_ratio,price,Label
0,0.34,4.0,6.0,0.0,62.500000,57.000000,4.49,4.47,2.80,0.912000,530.0,584.583655
1,0.71,3.0,4.0,6.0,58.099998,58.000000,5.92,5.88,3.43,0.998279,3347.0,3599.633504
2,0.50,4.0,4.0,5.0,61.599998,56.000000,5.08,5.11,3.14,0.909091,2052.0,2107.319204
3,0.38,4.0,2.0,7.0,58.900002,58.000000,4.69,4.81,2.80,0.984720,1064.0,1059.139953
4,0.34,4.0,3.0,3.0,62.700001,55.000000,4.49,4.48,2.81,0.877193,765.0,734.309044
...,...,...,...,...,...,...,...,...,...,...,...,...
12099,1.22,4.0,2.0,2.0,62.200001,56.000000,6.84,6.89,4.27,0.900322,6541.0,6815.644890
12100,0.92,2.0,0.0,1.0,58.700001,61.000000,6.34,6.43,3.75,1.039182,2880.0,2992.536801
12101,0.90,4.0,2.0,1.0,61.900002,55.000000,6.21,6.19,3.84,0.888530,4101.0,3593.827268
12102,0.56,2.0,2.0,3.0,61.799999,54.099998,5.31,5.34,3.29,0.875405,1576.0,1438.463764


In [48]:
final_lightgbm4 = finalize_model(tuned_lightgbm4)

In [49]:
save_model(final_lightgbm4,'Final lightgbm4 Model 19Jun2021')

Transformation Pipeline and Model Succesfully Saved


(Pipeline(memory=None,
          steps=[('dtypes',
                  DataTypes_Auto_infer(categorical_features=[],
                                       display_types=True, features_todrop=[],
                                       id_columns=['Unnamed: 0'],
                                       ml_usecase='regression',
                                       numerical_features=['cut', 'color',
                                                           'clarity'],
                                       target='price', time_features=[])),
                 ('imputer',
                  Simple_Imputer(categorical_strategy='not_available',
                                 fill_value_categorical=None,
                                 fill_value...
                  LGBMRegressor(boosting_type='gbdt', class_weight=None,
                                colsample_bytree=1.0, importance_type='split',
                                learning_rate=0.1, max_depth=90,
                             

In [52]:
unseen_predictions4 = predict_model(final_lightgbm4, data=data_pred_modified)
respuesta4 = unseen_predictions['Label'].values
submission4 = pd.DataFrame({"id": range(len(respuesta4)), "price": respuesta4})
chequeator(submission4, sample)
submission4.to_csv('resultado_4.csv', index=False)

You're ready to submit!


In [73]:
data_modified2 = data_modified.copy()

In [74]:
data_modified2['cut'] = data_modified2['cut'] ** 2

In [59]:
exp_reg105 = setup(data = data_modified, target = 'price', session_id=72048, numeric_features = ['cut', 'color', 'clarity'],)
lightgbm5 = create_model('lightgbm')
lgbm_params = {'num_leaves': np.arange(10,200,10),
                        'max_depth': [int(x) for x in np.linspace(10, 110, num = 11)],
                        'learning_rate': np.arange(0.1,1,0.1)}
tuned_lightgbm5 = tune_model(lightgbm5, custom_grid = lgbm_params)
evaluate_model(tuned_lightgbm5)
predict_model(tuned_lightgbm5)

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,269.0352,277967.0662,527.2258,0.9821,0.0922,0.0703
1,281.6131,313139.9976,559.5891,0.9799,0.0961,0.0729
2,276.9009,293385.6357,541.6508,0.9819,0.0926,0.0698
3,256.3607,264480.6663,514.2768,0.9828,0.0915,0.0683
4,267.6498,281658.1959,530.7148,0.9822,0.0917,0.0705
5,281.0159,312264.4066,558.8062,0.9816,0.1015,0.0734
6,259.1195,255704.0544,505.6719,0.9837,0.0962,0.0702
7,260.1925,256172.4819,506.1348,0.9838,0.097,0.0711
8,268.7275,288122.2633,536.7702,0.9822,0.0942,0.0713
9,262.8858,271437.6537,520.9968,0.9816,0.0933,0.0712


interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Light Gradient Boosting Machine,280.0466,310816.0322,557.5088,0.9806,0.0952,0.0728


Unnamed: 0,carat,cut,color,clarity,depth,table,x,y,z,shape_ratio,price,Label
0,1.21,3.0,4.0,1.0,59.000000,60.000000,6.99,6.94,4.11,1.016949,6092.0,5363.525891
1,1.33,4.0,2.0,4.0,59.299999,57.000000,7.22,7.18,4.27,0.961214,8714.0,8013.095774
2,1.42,2.0,4.0,6.0,58.400002,62.000000,7.35,7.38,4.30,1.061644,15665.0,13754.733765
3,0.38,4.0,2.0,5.0,62.299999,53.299999,4.65,4.69,2.91,0.855538,832.0,866.684766
4,1.01,1.0,3.0,1.0,64.000000,59.000000,6.34,6.31,4.05,0.921875,3461.0,3841.628653
...,...,...,...,...,...,...,...,...,...,...,...,...
12099,1.13,4.0,4.0,2.0,59.599998,57.000000,6.83,6.79,4.06,0.956376,6391.0,5883.224387
12100,0.76,3.0,5.0,3.0,60.299999,60.000000,5.97,5.94,3.59,0.995025,3107.0,3145.318183
12101,0.54,2.0,5.0,2.0,60.200001,58.000000,5.23,5.37,3.19,0.963455,1358.0,1417.771684
12102,0.53,3.0,5.0,1.0,60.500000,60.000000,5.21,5.24,3.16,0.991736,1141.0,1216.062583


In [62]:
final_lightgbm5 = finalize_model(tuned_lightgbm5)
save_model(final_lightgbm5,'Final lightgbm5 Model 19Jun2021')
unseen_predictions5 = predict_model(final_lightgbm5, data=data_pred_modified)
respuesta5 = unseen_predictions5['Label'].values
submission5 = pd.DataFrame({"id": range(len(respuesta5)), "price": respuesta5})
chequeator(submission5, sample)
submission5.to_csv('resultado_5.csv', index=False)

Transformation Pipeline and Model Succesfully Saved
You're ready to submit!


In [None]:
data_modified2 = data_modified.copy()
data_modified2['cut'] = data_modified2['cut'] ** 2

In [75]:
exp_reg105 = setup(data = data_modified2, target = 'price', session_id=72048, numeric_features = ['cut', 'color', 'clarity'])
lightgbm6 = create_model('lightgbm')
lgbm_params = {'num_leaves': np.arange(10,200,10),
                        'max_depth': [int(x) for x in np.linspace(10, 110, num = 11)],
                        'learning_rate': np.arange(0.1,1,0.1)}
tuned_lightgbm6 = tune_model(lightgbm6, custom_grid = lgbm_params)
evaluate_model(tuned_lightgbm6)
predict_model(tuned_lightgbm6)

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,269.0352,277967.0662,527.2258,0.9821,0.0922,0.0703
1,281.6131,313139.9976,559.5891,0.9799,0.0961,0.0729
2,276.9009,293385.6357,541.6508,0.9819,0.0926,0.0698
3,256.3607,264480.6663,514.2768,0.9828,0.0915,0.0683
4,267.6498,281658.1959,530.7148,0.9822,0.0917,0.0705
5,281.0159,312264.4066,558.8062,0.9816,0.1015,0.0734
6,259.1195,255704.0544,505.6719,0.9837,0.0962,0.0702
7,260.1925,256172.4819,506.1348,0.9838,0.097,0.0711
8,268.7275,288122.2633,536.7702,0.9822,0.0942,0.0713
9,262.8858,271437.6537,520.9968,0.9816,0.0933,0.0712


interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Light Gradient Boosting Machine,280.0466,310816.0322,557.5088,0.9806,0.0952,0.0728


Unnamed: 0,carat,cut,color,clarity,depth,table,x,y,z,shape_ratio,price,Label
0,1.21,9.0,4.0,1.0,59.000000,60.000000,6.99,6.94,4.11,1.016949,6092.0,5363.525891
1,1.33,16.0,2.0,4.0,59.299999,57.000000,7.22,7.18,4.27,0.961214,8714.0,8013.095774
2,1.42,4.0,4.0,6.0,58.400002,62.000000,7.35,7.38,4.30,1.061644,15665.0,13754.733765
3,0.38,16.0,2.0,5.0,62.299999,53.299999,4.65,4.69,2.91,0.855538,832.0,866.684766
4,1.01,1.0,3.0,1.0,64.000000,59.000000,6.34,6.31,4.05,0.921875,3461.0,3841.628653
...,...,...,...,...,...,...,...,...,...,...,...,...
12099,1.13,16.0,4.0,2.0,59.599998,57.000000,6.83,6.79,4.06,0.956376,6391.0,5883.224387
12100,0.76,9.0,5.0,3.0,60.299999,60.000000,5.97,5.94,3.59,0.995025,3107.0,3145.318183
12101,0.54,4.0,5.0,2.0,60.200001,58.000000,5.23,5.37,3.19,0.963455,1358.0,1417.771684
12102,0.53,9.0,5.0,1.0,60.500000,60.000000,5.21,5.24,3.16,0.991736,1141.0,1216.062583


In [93]:
data_modified2 = data_modified.copy()
# data_modified2['cut'] = data_modified2['cut'] ** 2
scaler = StandardScaler()
scaler.fit(data_modified2['cut'].values.reshape(-1, 1))
data_modified2['cut'] = scaler.transform(data_modified2['cut'].values.reshape(-1, 1))


In [95]:
df = data_modified.copy()

In [15]:
df = data_modified.copy()
df = df.drop(df[df["x"]==0].index)
df = df.drop(df[df["y"]==0].index)
df = df.drop(df[df["z"]==0].index)
df = df[(df["depth"]<75)&(df["depth"]>45)]
df = df[(df["table"]<80)&(df["table"]>40)]
df = df[(df["x"]<30)]
df = df[(df["y"]<30)]
df = df[(df["z"]<30)&(df["z"]>2)]

In [16]:
exp_reg106 = setup(data = df, target = 'price', session_id=72048, numeric_features = ['cut', 'color', 'clarity'])
lightgbm6 = create_model('lightgbm')
lgbm_params = {'num_leaves': np.arange(10,200,10),
                        'max_depth': [int(x) for x in np.linspace(10, 110, num = 11)],
                        'learning_rate': np.arange(0.1,1,0.1)}
tuned_lightgbm6 = tune_model(lightgbm6, custom_grid = lgbm_params)
evaluate_model(tuned_lightgbm6)
predict_model(tuned_lightgbm6)

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,277.2871,292751.7617,541.0654,0.9806,0.1038,0.0798
1,272.5791,272606.5123,522.1173,0.9821,0.1022,0.0774
2,288.6826,308818.1283,555.7141,0.9812,0.0988,0.0778
3,283.8115,300828.3237,548.4782,0.9812,0.1042,0.0783
4,278.3653,263835.3438,513.649,0.9839,0.0984,0.0772
5,279.11,287373.8953,536.0727,0.9816,0.103,0.0798
6,274.6286,307626.4007,554.6408,0.9807,0.0997,0.0762
7,272.2009,267079.3913,516.7972,0.9823,0.0991,0.0763
8,280.5065,299180.1372,546.9736,0.9807,0.1019,0.0801
9,293.6132,357898.271,598.246,0.9773,0.1041,0.0792


interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Light Gradient Boosting Machine,280.0233,294408.9473,542.5946,0.9818,0.1025,0.08


Unnamed: 0,Unnamed 0,carat,cut,color,clarity,depth,table,y,z,shape_ratio,price,Label
0,32910.0,0.73,4.0,1.0,3.0,61.599998,54.0,5.84,3.59,0.876623,2327.0,2425.379003
1,5708.0,2.01,1.0,1.0,3.0,63.599998,57.0,7.95,5.03,0.896226,16003.0,15382.172542
2,38936.0,0.31,4.0,4.0,3.0,61.299999,57.0,4.34,2.65,0.929853,625.0,727.472938
3,25088.0,0.61,2.0,3.0,3.0,62.000000,57.0,5.47,3.38,0.919355,1861.0,1830.478399
4,32406.0,1.18,1.0,6.0,1.0,61.700001,63.0,6.72,4.13,1.021070,5315.0,5208.796097
...,...,...,...,...,...,...,...,...,...,...,...,...
12091,26670.0,0.90,3.0,1.0,4.0,61.200001,61.0,6.15,3.78,0.996732,3484.0,3463.920880
12092,10173.0,2.14,2.0,0.0,2.0,62.299999,59.0,8.26,5.14,0.947030,14556.0,14171.940941
12093,37734.0,0.72,4.0,2.0,4.0,62.299999,55.0,5.74,3.57,0.882825,2949.0,2862.178284
12094,3371.0,0.93,1.0,3.0,2.0,63.799999,58.0,6.09,3.90,0.909091,4375.0,3760.584207


In [100]:
final_lightgbm6 = finalize_model(tuned_lightgbm6)
save_model(final_lightgbm6,'Final lightgbm6 Model 19Jun2021')
unseen_predictions6 = predict_model(final_lightgbm6, data=data_pred_modified)
respuesta6 = unseen_predictions6['Label'].values
submission6 = pd.DataFrame({"id": range(len(respuesta6)), "price": respuesta6})
chequeator(submission6, sample)
submission6.to_csv('resultado_6.csv', index=False)

Transformation Pipeline and Model Succesfully Saved
You're ready to submit!


In [104]:
exp_reg107 = setup(data = df, target = 'price', session_id=65, numeric_features = ['cut', 'color', 'clarity'])
lightgbm7 = create_model('lightgbm')
lgbm_params = {'num_leaves': np.arange(10,200,10),
                        'max_depth': [int(x) for x in np.linspace(10, 110, num = 11)],
                        'learning_rate': np.arange(0.1,1,0.1)}
tuned_lightgbm7 = tune_model(lightgbm7, custom_grid = lgbm_params)
evaluate_model(tuned_lightgbm7)
predict_model(tuned_lightgbm7)

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,293.3085,319311.2233,565.0763,0.9808,0.1089,0.0839
1,279.2883,279547.3293,528.7224,0.9822,0.1095,0.0831
2,278.6666,256898.3548,506.8514,0.9839,0.1042,0.0804
3,274.5182,273134.6741,522.6229,0.9826,0.1053,0.0823
4,297.6205,378433.4339,615.1694,0.9759,0.1087,0.0826
5,278.2426,262910.5156,512.748,0.9838,0.1048,0.0823
6,290.0015,310413.2956,557.1475,0.9802,0.109,0.0842
7,286.9162,294947.7634,543.0909,0.982,0.1134,0.0856
8,270.7615,270541.129,520.1357,0.9826,0.1045,0.0813
9,293.0947,316347.4072,562.4477,0.9807,0.1051,0.0819


interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Light Gradient Boosting Machine,275.4722,266385.8196,516.1258,0.9828,0.1069,0.0836


Unnamed: 0,Unnamed 0,carat,cut,color,clarity,depth,table,y,z,shape_ratio,price,Label
0,23113.0,1.74,4.0,5.0,1.0,62.299999,57.0,7.70,4.83,0.914928,10085.0,12144.808296
1,14246.0,1.18,3.0,5.0,1.0,62.500000,60.0,6.70,4.20,0.960000,5181.0,5069.231569
2,20539.0,0.53,4.0,5.0,3.0,61.000000,57.0,5.23,3.20,0.934426,1727.0,1741.572599
3,22851.0,0.82,4.0,3.0,4.0,61.500000,56.0,6.05,3.71,0.910569,3652.0,3807.412740
4,3922.0,0.41,0.0,3.0,5.0,64.699997,56.0,4.72,3.04,0.865533,833.0,1025.744165
...,...,...,...,...,...,...,...,...,...,...,...,...
12091,906.0,1.00,1.0,0.0,6.0,63.500000,59.0,6.34,4.01,0.929134,4633.0,4475.986591
12092,12475.0,0.56,4.0,2.0,2.0,61.900002,58.0,5.30,3.27,0.936995,1343.0,1358.353947
12093,40081.0,0.55,1.0,0.0,3.0,64.099998,54.0,5.21,3.33,0.842434,1034.0,1144.096684
12094,1024.0,2.10,0.0,3.0,0.0,67.400002,59.0,7.76,5.24,0.875371,6597.0,6834.566538


In [105]:
final_lightgbm7 = finalize_model(tuned_lightgbm7)
save_model(final_lightgbm7,'Final lightgbm7 Model 19Jun2021')
unseen_predictions7 = predict_model(final_lightgbm7, data=data_pred_modified)
respuesta7 = unseen_predictions7['Label'].values
submission7 = pd.DataFrame({"id": range(len(respuesta7)), "price": respuesta7})
chequeator(submission7, sample)
submission7.to_csv('resultado_7.csv', index=False)

Transformation Pipeline and Model Succesfully Saved
You're ready to submit!


In [106]:
df

Unnamed: 0.1,Unnamed: 0,carat,cut,color,clarity,depth,table,x,y,z,price,shape_ratio
0,0,1.21,4,2,3,63.0,57.0,6.73,6.70,4.23,6134,0.904762
1,1,0.28,2,6,5,64.0,56.0,4.14,4.17,2.66,532,0.875000
2,2,0.42,3,4,4,61.2,58.0,4.86,4.82,2.96,1103,0.947712
3,3,0.26,4,2,7,61.1,57.0,4.16,4.12,2.53,600,0.932897
4,4,1.10,1,3,2,63.4,57.0,6.52,6.55,4.14,4997,0.899054
...,...,...,...,...,...,...,...,...,...,...,...,...
40340,40340,1.55,3,2,3,61.3,61.0,7.46,7.39,4.55,11708,0.995106
40341,40341,0.36,4,6,2,60.6,56.0,4.58,4.63,2.79,619,0.924092
40342,40342,0.57,2,1,3,62.2,55.0,5.33,5.34,3.32,1267,0.884244
40343,40343,1.01,2,4,7,59.6,62.0,6.47,6.56,3.88,9965,1.040268


In [20]:
df = data_modified.copy()
df = df.drop(df[df["x"]==0].index)
df = df.drop(df[df["y"]==0].index)
df = df.drop(df[df["z"]==0].index)
df = df[(df["depth"]<75)&(df["depth"]>45)]
df = df[(df["table"]<80)&(df["table"]>40)]
df = df[(df["x"]<30)]
df = df[(df["y"]<30)]
df = df[(df["z"]<30)&(df["z"]>2)]
df['peso_esp'] = df['carat'] / (df['x'] * df['y'] * df['z'])

In [10]:
exp_reg108 = setup(data = df, target = 'price', session_id=72048, numeric_features = ['cut', 'color', 'clarity'])
lightgbm8 = create_model('lightgbm')
lgbm_params = {'num_leaves': np.arange(10,200,10),
                        'max_depth': [int(x) for x in np.linspace(10, 110, num = 11)],
                        'learning_rate': np.arange(0.1,1,0.1)}
tuned_lightgbm8 = tune_model(lightgbm8, custom_grid = lgbm_params)
evaluate_model(tuned_lightgbm8)
predict_model(tuned_lightgbm8)

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,281.2034,298508.8305,546.3596,0.9802,0.1051,0.0809
1,271.9961,276258.004,525.6025,0.9819,0.1034,0.0782
2,287.0079,307994.4091,554.9724,0.9813,0.0993,0.078
3,281.9845,302664.1365,550.1492,0.9811,0.105,0.0792
4,276.4356,268513.6503,518.183,0.9836,0.0999,0.0781
5,277.9205,282857.3771,531.8434,0.9819,0.1036,0.0803
6,273.8413,296460.8828,544.4822,0.9814,0.1014,0.0775
7,275.5288,265862.2856,515.6184,0.9824,0.1006,0.0778
8,279.3553,293314.9493,541.5856,0.981,0.1016,0.0796
9,292.3112,340594.6118,583.6048,0.9784,0.1045,0.0794


interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Light Gradient Boosting Machine,276.9673,287862.176,536.5279,0.9822,0.103,0.0802


Unnamed: 0,Unnamed 0,carat,cut,color,clarity,depth,table,y,z,shape_ratio,peso_esp,price,Label
0,32910.0,0.73,4.0,1.0,3.0,61.599998,54.0,5.84,3.59,0.876623,0.005993,2327.0,2408.171167
1,5708.0,2.01,1.0,1.0,3.0,63.599998,57.0,7.95,5.03,0.896226,0.006387,16003.0,15332.489228
2,38936.0,0.31,4.0,4.0,3.0,61.299999,57.0,4.34,2.65,0.929853,0.006268,625.0,722.589863
3,25088.0,0.61,2.0,3.0,3.0,62.000000,57.0,5.47,3.38,0.919355,0.006065,1861.0,1823.777586
4,32406.0,1.18,1.0,6.0,1.0,61.700001,63.0,6.72,4.13,1.021070,0.006374,5315.0,5321.294511
...,...,...,...,...,...,...,...,...,...,...,...,...,...
12091,26670.0,0.90,3.0,1.0,4.0,61.200001,61.0,6.15,3.78,0.996732,0.006244,3484.0,3466.475003
12092,10173.0,2.14,2.0,0.0,2.0,62.299999,59.0,8.26,5.14,0.947030,0.006117,14556.0,14373.858819
12093,37734.0,0.72,4.0,2.0,4.0,62.299999,55.0,5.74,3.57,0.882825,0.006143,2949.0,2854.792441
12094,3371.0,0.93,1.0,3.0,2.0,63.799999,58.0,6.09,3.90,0.909091,0.006377,4375.0,3846.929837


In [14]:
final_lightgbm8 = finalize_model(tuned_lightgbm8)
save_model(final_lightgbm8,'Final lightgbm8 Model 19Jun2021')
unseen_predictions8 = predict_model(final_lightgbm8, data=df_pred)
respuesta8 = unseen_predictions8['Label'].values
submission8 = pd.DataFrame({"id": range(len(respuesta8)), "price": respuesta8})
chequeator(submission8, sample)
submission8.to_csv('resultado_8.csv', index=False)

Transformation Pipeline and Model Succesfully Saved
You're ready to submit!
