In [38]:
import pandas as pd
import numpy as np
import time
from tqdm import tqdm
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.metrics import mean_squared_error, make_scorer
from sklearn.preprocessing import PowerTransformer
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import SimpleImputer, IterativeImputer, KNNImputer
from sklearn.compose import TransformedTargetRegressor
from sklearn.cluster import KMeans

# import regressors
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, AdaBoostRegressor, GradientBoostingRegressor, BaggingRegressor, StackingRegressor
from sklearn.neighbors import KNeighborsRegressor
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor, XGBRFRegressor
from catboost import CatBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression, Ridge, SGDRegressor, PassiveAggressiveRegressor, Perceptron, RidgeClassifier, LogisticRegression
from sklearn.linear_model import Lasso, ElasticNet, Lars, BayesianRidge, ARDRegression, OrthogonalMatchingPursuit, HuberRegressor, TheilSenRegressor, RANSACRegressor
from sklearn.linear_model import LassoLars, LassoLarsIC
from sklearn.neural_network import MLPRegressor
from sklearn.gaussian_process import GaussianProcessRegressor

# pandas deactivate future warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

SUBMIT = False
USE_ORIGINAL = True
SEED = 15
SAMPLE = 1

train = pd.read_csv('datasets/train.csv')
test = pd.read_csv('datasets/test.csv')
orig = pd.read_csv('datasets/cubic_zirconia.csv')

base_cols = train.columns

for i, df in enumerate([train, test, orig]):
    df.drop(['id'], axis=1, inplace=True)
    df.drop_duplicates(inplace=True)
    # df['dataset'] = i

# Define test set
if not SUBMIT:
    train, test = train_test_split(train, test_size=0.2, random_state=SEED) 

if USE_ORIGINAL:
    train = pd.concat([train, orig], axis=0)
    train.reset_index(inplace=True, drop=True)

# Sampling for faster training
if SAMPLE < 1:
    train = train.sample(frac=SAMPLE, random_state=SEED)

del orig

# set training data
X_train = train.copy()
y_train = X_train.pop('price')
X_test = test.copy()

if not SUBMIT:
    y_test = X_test.pop('price')
else:
    y_test = None

# transform categorical features
def transform_categorical(df):
    df['cut'] = df['cut'].map({'Fair': 0, 'Good': 1, 'Very Good': 2, 'Premium': 3, 'Ideal': 4})
    df['color'] = df['color'].map({'J': 0, 'I': 1, 'H': 2, 'G': 3, 'F': 4, 'E': 5, 'D': 6})
    df['clarity'] = df['clarity'].map({'I1': 0, 'SI2': 1, 'SI1': 2, 'VS2': 3, 'VS1': 4, 'VVS2': 5, 'VVS1': 6, 'IF': 7})
    return df

def remove_outliers(df):
    # Drop extreme values
    min = 2
    max = 20
    df = df[(df['x'] < max) & (df['y'] < max) & (df['z'] < max)]
    df = df[(df['x'] > min) & (df['y'] > min) & (df['z'] > min)]
    return df

def add_volume_ratio(df):
    df['volume_ratio1'] = (df['x'] * df['y']) / (df['z'] * df['z'])
    df['volume_ratio2'] = (df['x'] * df['z']) / (df['y'] * df['y'])
    df['volume_ratio3'] = (df['y'] * df['z']) / (df['x'] * df['x'])
    df['volume_ratio4'] = (df['x']) / (df['z'])
    df['volume_ratio5'] = (df['y']) / (df['z'])
    df['volume_ratio6'] = (df['x']) / (df['y'])
    df['volume_ratio7'] = (df['x'] + df['y']) / df['z']
    df['volume_ratio8'] = (df['x'] + df['z']) / df['y']
    df['volume_ratio9'] = (df['y'] + df['z']) / df['x']
    df['volume_ratio10'] = (df['x'] * df['y'] * df['z']) / (df['x'].mean() * df['y'].mean() * df['z'].mean())
    df['volume_ratio11'] = (df['x'] * df['y'] * df['z']) / (df['x'].max() * df['y'].max() * df['z'].max())
    # df['volume_ratio12'] = (df['x'] * df['y'] * df['z']) / (df['x'].min() * df['y'].min() * df['z'].min())
    df['volume_ratio13'] = (df['x'] * df['y'] * df['z']) / (df['x'].median() * df['y'].median() * df['z'].median())
    df['volume_ratio14'] = (df['x'] * df['y'] * df['z']) / (df['x'].std() * df['y'].std() * df['z'].std())
    return df

def feature_engineering(df):
    df["volume"] = df["x"] * df["y"] * df["z"]
    df["surface_area"] = 2 * (df["x"] * df["y"] + df["y"] * df["z"] + df["z"] * df["x"])
    df["aspect_ratio_xy"] = df["x"] / df["y"]
    df["aspect_ratio_yz"] = df["y"] / df["z"]
    df["aspect_ratio_zx"] = df["z"] / df["x"]
    df["diagonal_distance"] = np.sqrt(df["x"] ** 2 + df["y"] ** 2 + df["z"] ** 2)
    # df["relative_height"] = (df["z"] - df["z"].min()) / (df["z"].max() - df["z"].min())
    # df["relative_position"] = (df["x"] + df["y"] + df["z"]) / (df["x"] + df["y"] + df["z"]).sum()
    # df["volume_ratio"] = df["x"] * df["y"] * df["z"] / (df["x"].mean() * df["y"].mean() * df["z"].mean())
    # df["length_ratio"] = df["x"] / df["x"].mean()
    # df["width_ratio"] = df["y"] / df["y"].mean()
    # df["height_ratio"] = df["z"] / df["z"].mean()
    df["sphericity"] = 1.4641 * (6 * df["volume"])**(2/3) / df["surface_area"]
    df["compactness"] = df["volume"]**(1/3) / df["x"]
    df['density'] = df['carat'] / df['volume']
    df['table_percentage'] = (df['table'] / ((df['x'] + df['y']) / 2)) * 100
    df['depth_percentage'] = (df['depth'] / ((df['x'] + df['y']) / 2)) * 100
    df['symmetry'] = (abs(df['x'] - df['z']) + abs(df['y'] - df['z'])) / (df['x'] + df['y'] + df['z'])
    df['surface_area'] = 2 * ((df['x'] * df['y']) + (df['x'] * df['z']) + (df['y'] * df['z']))
    df['depth_to_table_ratio'] = df['depth'] / df['table']
    df['girdle_diameter'] = 100 * df['z'] / df['depth']
    df['girdle_thickness'] = 100 * df['z'] / df['table']
    df['girdle_ratio'] = df['girdle_diameter'] / df['girdle_thickness']
    return df

def target_transform(serie):
    serie = np.log1p(serie)
    return serie

def inverse_target_transform(serie):
    serie = np.expm1(serie)
    return serie

def set_categorical(df):
    df['cut'] = df['cut'].astype('category')
    df['color'] = df['color'].astype('category')
    df['clarity'] = df['clarity'].astype('category')
    return df

def add_girdle_parameters(df):
    df['girdle_diameter'] = 100 * df['z'] / df['depth']
    df['girdle_thickness'] = 100 * df['z'] / df['table']
    df['girdle_ratio'] = df['girdle_diameter'] / df['girdle_thickness']
    return df

def impute_x_y_z(df):
    df['is_imputed'] = df.isna().any(axis=1).astype(int)
    df['girdle_diameter'].fillna((df['x'] + df['y']) / 2, inplace=True)
    df['x'].fillna(2*df['girdle_diameter'] - df['y'], inplace=True)
    df['y'].fillna(2*df['girdle_diameter'] - df['x'], inplace=True)
    df['z'].fillna(df['girdle_diameter'] * df['depth'] / 100, inplace=True)
    df = add_girdle_parameters(df)
    return df

def set_nan(df):
    for col in ['x', 'y', 'z']:
        df[col].replace(0, np.nan, inplace=True)
    return df

def drop_girdle_parameters(df):
    df.drop(['girdle_diameter', 'girdle_thickness', 'girdle_ratio'], axis=1, inplace=True)
    return df

# Make data preparation pipeline
def data_prepation(X_train, X_test):
    
    for df in [X_train, X_test]:
        # df = set_nan(df)
        df = transform_categorical(df)
        # df = set_categorical(df)
        # df = add_girdle_parameters(df)
        # df = impute_x_y_z(df)
        # df = drop_girdle_parameters(df)
        
    
    # imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
    # imputer = IterativeImputer(max_iter=10, random_state=0)
    # imputer = KNNImputer(n_neighbors=1, weights="uniform")
    # X_train = pd.DataFrame(imputer.fit_transform(X_train), columns=X_train.columns, index=X_train.index)
    # X_test = pd.DataFrame(imputer.transform(X_test), columns=X_test.columns, index=X_test.index)
    
    # selected_cols = base_cols
    # selected_cols = ['surface_area', 'clarity', 'color', 'cut', 'carat', 'depth_percentage', 'depth', 'compactness', 'depth_to_table_ratio']
    
    for df in [X_train, X_test]:
        df = add_volume_ratio(df)
        df = feature_engineering(df)
        # df.fillna(0, inplace=True)
        # df.drop([col for col in df.columns if col not in selected_cols], axis=1, inplace=True)
        
    # Scaling
    # scaler = PowerTransformer()
    # X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
    # X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)
    
    # Clustering features
    # model = KMeans(n_clusters=20, random_state=42)
    # X_train['cluster'] = model.fit_predict(X_train)
    # X_test['cluster'] = model.predict(X_test)
        
    return X_train, X_test
            
data_prep_has_fit_method = False

if not data_prep_has_fit_method:
    X_train, X_test = data_prepation(X_train, X_test)
    X_train_prep, X_test_prep = X_train.copy(), X_test.copy()
else:
    X_train_prep, X_test_prep = data_prepation(X_train.copy(), X_test.copy())
    
# X_train_prep, X_test_prep = data_prepation(X_train.copy(), X_test.copy())
# pd.DataFrame(X_train_prep.isna().sum(), columns=['train']).join(pd.DataFrame(X_test_prep.isna().sum(), columns=['test']))
X_train.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,x,y,z,volume_ratio1,...,sphericity,compactness,density,table_percentage,depth_percentage,symmetry,depth_to_table_ratio,girdle_diameter,girdle_thickness,girdle_ratio
0,1.38,4,5,1,61.7,56.0,7.11,7.17,4.4,2.633197,...,0.784066,0.854563,0.006152,784.313725,864.145658,0.293362,1.101786,7.13128,7.857143,0.907618
1,1.56,4,2,4,61.5,56.0,7.51,7.49,4.61,2.646793,...,0.783838,0.849117,0.006016,746.666667,820.0,0.294748,1.098214,7.495935,8.232143,0.910569
2,0.36,4,6,1,60.9,56.0,4.59,4.61,2.81,2.679791,...,0.783274,0.850343,0.006055,1217.391304,1323.913043,0.298085,1.0875,4.614122,5.017857,0.91954
3,0.35,3,5,2,62.2,59.0,4.51,4.48,2.79,2.59565,...,0.784708,0.850178,0.006209,1312.569522,1383.759733,0.289474,1.054237,4.485531,4.728814,0.948553
4,0.39,2,6,2,61.3,56.0,4.7,4.74,2.89,2.667353,...,0.783483,0.85276,0.006057,1186.440678,1298.728814,0.296837,1.094643,4.714519,5.160714,0.91354


In [22]:
cv = KFold(n_splits=5, shuffle=True, random_state=SEED)

# Set categorical features for catboost
cat_features = [col for col in X_train_prep.columns if X_train_prep[col].dtype == 'category']

regressors = {
    'LGBMRegressor1': LGBMRegressor(random_state=SEED, n_jobs=-1, boosting_type='gbdt'),
    # 'LGBMRegressor2': LGBMRegressor(random_state=SEED, n_jobs=-1, boosting_type='dart'),
    # 'LGBMRegressor3': LGBMRegressor(random_state=SEED, n_jobs=-1, boosting_type='goss'),
    # 'LGBMRegressor4': LGBMRegressor(random_state=SEED, n_jobs=-1, boosting_type='rf', subsample=.632, subsample_freq=1),
    # 'LGBMRegressor5': LGBMRegressor(random_state=SEED, n_jobs=-1, class_weight='balanced'),
    # 'LGBMRegressor6': LGBMRegressor(random_state=SEED, n_jobs=-1, subsample=0.7),
    # 'LGBMRegressor7': LGBMRegressor(random_state=SEED, n_jobs=-1, colsample_bytree=0.7),
    # 'LGBMRegressor8': LGBMRegressor(random_state=SEED, n_jobs=-1, subsample=0.7, colsample_bytree=0.7),
    # 'LGBMRegressor9': LGBMRegressor(random_state=SEED, n_jobs=-1, boosting_type='dart', colsample_bytree=0.7),
    # 'XGBRegressor1': XGBRegressor(random_state=SEED, n_jobs=-1),
    # 'XGBRegressor2': XGBRegressor(random_state=SEED, n_jobs=-1, booster='dart'),
    # 'XGBRegressor3': XGBRegressor(random_state=SEED, n_jobs=-1, booster='gblinear'),
    # 'XGBRegressor4': XGBRegressor(random_state=SEED, n_jobs=-1, colsample_bytree=0.7),
    # 'XGBRegressor5': XGBRegressor(random_state=SEED, n_jobs=-1, subsample=0.7),
    # 'XGBRFRegressor6': XGBRegressor(random_state=SEED, n_jobs=-1, objective='reg:squarederror'),
    # 'XGBRandomForestRegressor': XGBRFRegressor(random_state=SEED, n_jobs=-1),
    # 'CatBoostRegressor': CatBoostRegressor(random_state=SEED, silent=True, cat_features=cat_features), # Promising but fails on the cv
    # 'RandomForestRegressor': RandomForestRegressor(random_state=SEED, n_jobs=-1),
    # 'ExtraTreesRegressor': ExtraTreesRegressor(random_state=SEED, n_jobs=-1),
    # 'AdaBoostRegressor': AdaBoostRegressor(random_state=SEED),
    # 'GradientBoostingRegressor': GradientBoostingRegressor(random_state=SEED),
    # 'BaggingRegressor': BaggingRegressor(random_state=SEED, n_jobs=-1),
    # 'KNeighborsRegressor': KNeighborsRegressor(n_jobs=-1),
    # 'DecisionTreeRegressor': DecisionTreeRegressor(random_state=SEED),
    # 'GaussianProcessRegressor': GaussianProcessRegressor(random_state=SEED),
    # 'MLPRegressor1': MLPRegressor(random_state=SEED, max_iter=1000, activation='relu', solver='adam'),
    # 'MLPRegressor2': MLPRegressor(random_state=SEED, max_iter=1000, activation='relu', solver='lbfgs'), # promising but long to train
    # 'MLPRegressor3': MLPRegressor(random_state=SEED, max_iter=5000, activation='tanh', solver='adam'),
    # 'MLPRegressor4': MLPRegressor(random_state=SEED, max_iter=1000, activation='tanh', solver='lbfgs'),  # promising but long to train
    # 'MLPRegressor5': MLPRegressor(random_state=SEED, max_iter=1000, activation='logistic', solver='adam'),
    # 'MLPRegressor6': MLPRegressor(random_state=SEED, max_iter=1000, activation='logistic', solver='lbfgs'),
    # 'MLPRegressor7': MLPRegressor(random_state=SEED, max_iter=1000, activation='identity', solver='adam'),
    # 'MLPRegressor8': MLPRegressor(random_state=SEED, max_iter=1000, activation='identity', solver='lbfgs'),
    # 'Ridge': Ridge(random_state=SEED),
    # 'SGDRegressor': SGDRegressor(random_state=SEED, max_iter=1000, tol=1e-3),
    # 'PassiveAggressiveRegressor': PassiveAggressiveRegressor(random_state=SEED, max_iter=1000, tol=1e-3),
    # 'Perceptron': Perceptron(random_state=SEED, max_iter=1000, tol=1e-3),
    # 'LinearRegression': LinearRegression(),
    # 'Lasso': Lasso(random_state=SEED),
    # 'ElasticNet': ElasticNet(random_state=SEED, max_iter=1e6),
    # 'HuberRegressor': HuberRegressor(max_iter=1000),
    # 'BayesianRidge': BayesianRidge(),
    # 'ARDRegression': ARDRegression(),
    # 'TheilSenRegressor': TheilSenRegressor(random_state=SEED),
    # 'RANSACRegressor': RANSACRegressor(random_state=SEED),
    # 'OrthogonalMatchingPursuit': OrthogonalMatchingPursuit(normalize=False),
    # 'Lars': Lars(),
    # 'LassoLars': LassoLars(),
    # 'LassoLarsIC': LassoLarsIC(normalize=False),
    # 'StackingRegressor': StackingRegressor(
    #         estimators=[
    #             ('LGBMRandomForestRegressor', LGBMRegressor(random_state=SEED, n_jobs=-1, boosting_type='rf', subsample=.632, subsample_freq=1)),
    #             ('XGBRandomForestRegressor', XGBRFRegressor(random_state=SEED, n_jobs=-1)),
    #             ('RandomForestRegressor', RandomForestRegressor(random_state=SEED, n_jobs=-1)),
    #             # ('ExtraTreesRegressor', ExtraTreesRegressor(random_state=SEED, n_jobs=-1))
    #             ], 
    #         final_estimator=Ridge(random_state=SEED),
    #         # cv=cv,
    #         # n_jobs=-1,
    #         )
}

for model_name, regressor in regressors.items():
    t0 = time.time()
    scores = []
    feature_importances = pd.DataFrame()
    ttr = TransformedTargetRegressor(regressor=regressor, func=target_transform, inverse_func=inverse_target_transform, check_inverse=False)
    
    for i, (train_index, test_index) in tqdm(enumerate(cv.split(X_train))):
        
        X_train_cv, X_test_cv = X_train.iloc[train_index].copy(), X_train.iloc[test_index].copy()
        y_train_cv, y_test_cv = y_train.iloc[train_index].copy(), y_train.iloc[test_index].copy()
        
        if data_prep_has_fit_method:
            X_train_cv, X_test_cv = data_prepation(X_train_cv, X_test_cv)
        
        ttr.fit(X_train_cv, y_train_cv)        
        y_pred = ttr.predict(X_test_cv)
        score_eval = mean_squared_error(y_test_cv, y_pred, squared=False)
        scores.append(score_eval)
        
        try:
            feature_importance = pd.Series(ttr.regressor_.feature_importances_, index=X_train_cv.columns, name=f'fold{i}')
        except:
            feature_importance = pd.Series(ttr.regressor_.coef_, index=X_train_cv.columns, name=f'fold{i}')
        feature_importances = pd.concat([feature_importances, feature_importance], axis=1)
    
    feature_importances['mean'] = feature_importances.mean(axis=1)
    
    ttr.fit(X_train_prep, y_train)
    y_pred = ttr.predict(X_test_prep)
    
    if not SUBMIT:
        score_eval = mean_squared_error(y_test, y_pred, squared=False)
    
    print(f'{model_name}: {np.mean(scores):.4f} ± {np.std(scores):.4f}, Time: {time.time() - t0:.2f} seconds, RMSE: {score_eval:.4f}')
    print(feature_importances.sort_values('mean', ascending=False))

5it [00:13,  2.80s/it]


LGBMRegressor1: 581.0963 ± 0.6164, Time: 16.96 seconds, RMSE: 571.6996
         fold0  fold1  fold2  fold3  fold4   mean
clarity    680    698    701    731    687  699.4
color      577    557    571    570    580  571.0
y          464    450    436    446    443  447.8
x          400    377    389    398    439  400.6
carat      339    358    350    338    309  338.8
z          172    182    179    168    172  174.6
depth      181    170    171    146    171  167.8
cut        106    120    116    116    123  116.2
table       81     88     87     87     76   83.8


In [39]:
regressor = LGBMRegressor(random_state=SEED, n_jobs=-1, colsample_bytree=.632, subsample=.632, subsample_freq=1)
ttr = TransformedTargetRegressor(regressor=regressor, func=target_transform, inverse_func=inverse_target_transform, check_inverse=False)

selected_cols = X_train.columns
len_cols = len(selected_cols)

# drop one feature at a time based on feature importance
for _ in range(len_cols):
    t0 = time.time()
    scores = []
    feature_importances = pd.DataFrame()
    for i, (train_index, test_index) in tqdm(enumerate(cv.split(X_train))):

        X_train_cv, X_test_cv = X_train.iloc[train_index][selected_cols].copy(), X_train.iloc[test_index][selected_cols].copy()
        y_train_cv, y_test_cv = y_train.iloc[train_index].copy(), y_train.iloc[test_index].copy()
        
        if data_prep_has_fit_method:
            X_train_cv, X_test_cv = data_prepation(X_train_cv, X_test_cv)
        
        ttr.fit(X_train_cv, y_train_cv)        
        y_pred = ttr.predict(X_test_cv)
        score_eval = mean_squared_error(y_test_cv, y_pred, squared=False)
        scores.append(score_eval)
        
        try:
            feature_importance = pd.Series(ttr.regressor_.feature_importances_, index=X_train_cv.columns, name=f'fold{i}')
        except:
            feature_importance = pd.Series(ttr.regressor_.coef_, index=X_train_cv.columns, name=f'fold{i}')
        feature_importances = pd.concat([feature_importances, feature_importance], axis=1)

    feature_importances['mean'] = feature_importances.mean(axis=1)

    ttr.fit(X_train_prep[selected_cols], y_train)
    y_pred = ttr.predict(X_test_prep[selected_cols])
    
    selected_cols = feature_importances.sort_values('mean', ascending=True).index[1:]

    if not SUBMIT:
        score_eval = mean_squared_error(y_test, y_pred, squared=False)

    print(f'{model_name}: {np.mean(scores):.4f} ± {np.std(scores):.4f}, Time: {time.time() - t0:.2f} seconds, RMSE: {score_eval:.4f}')
    print(feature_importances.sort_values('mean', ascending=False))
    # print(f'Selected columns: {selected_cols}')

5it [00:25,  5.19s/it]


LGBMRegressor1: 582.2221 ± 1.3039, Time: 38.59 seconds, RMSE: 574.5906
                      fold0  fold1  fold2  fold3  fold4   mean
clarity                 525    535    529    541    529  531.8
color                   425    422    425    419    405  419.2
carat                   242    237    246    242    228  239.0
y                       156    147    151    149    162  153.0
density                 134    133    132    136    132  133.4
z                       118    128    103    115    132  119.2
x                       126    107    114     97    133  115.4
volume_ratio10          131    113    102     98     99  108.6
cut                      89    100     98    102     94   96.6
surface_area             81     81     81     82     80   81.0
volume_ratio6            83     81     84     77     74   79.8
diagonal_distance        75     77     75     79     93   79.8
table_percentage         74     86     83     77     76   79.2
depth                    76     67     72     5

5it [00:17,  3.50s/it]


LGBMRegressor1: 582.7259 ± 1.6420, Time: 20.94 seconds, RMSE: 575.9152
                      fold0  fold1  fold2  fold3  fold4   mean
clarity                 496    500    495    509    500  500.0
color                   435    437    422    430    444  433.6
carat                   283    305    272    287    269  283.2
y                       141    137    128    148    136  138.0
volume_ratio14          132    123    103    118    130  121.2
x                       109    122    121    114    114  116.0
density                 124    109    121    103    100  111.4
z                       107    111    118    109    111  111.2
cut                      95     98     98     99     93   96.6
diagonal_distance        92     87     98     81    100   91.6
table_percentage         84     83     84     74     87   82.4
aspect_ratio_xy          75     80     83     82     89   81.8
girdle_diameter          72     71     67     58     69   67.4
depth_percentage         63     58     68     7

5it [00:16,  3.27s/it]


LGBMRegressor1: 581.0719 ± 0.8640, Time: 22.81 seconds, RMSE: 728.4849
                      fold0  fold1  fold2  fold3  fold4   mean
clarity                 563    557    567    547    571  561.0
color                   418    419    404    416    409  413.2
carat                   259    246    251    226    241  244.6
z                       133    123    146    153    129  136.8
y                       129    141    127    131    130  131.6
density                 130    123    121    131    125  126.0
volume_ratio11          132    107    111     98    122  114.0
cut                     110    121     94    109    111  109.0
x                        93    105     94    103    104   99.8
surface_area             70     80     85     91     90   83.2
diagonal_distance        88     80     81     75     76   80.0
volume_ratio6            75     67     85     77     84   77.6
table_percentage         69     72     88     79     75   76.6
depth_percentage         68     79     54     6

5it [00:23,  4.69s/it]


LGBMRegressor1: 581.3686 ± 1.1244, Time: 27.74 seconds, RMSE: 573.9160
                      fold0  fold1  fold2  fold3  fold4   mean
clarity                 590    576    563    566    568  572.6
color                   411    417    429    430    413  420.0
carat                   263    279    259    264    241  261.2
y                       148    137    134    142    142  140.6
z                       139    128    130    107    122  125.2
density                 108    117    125    120    115  117.0
x                       118    100     96    104    121  107.8
cut                      97    109    104    106    119  107.0
volume_ratio14          117     97    107    100    108  105.8
aspect_ratio_xy          80     99     92    105     89   93.0
table_percentage         80     72     88     74     87   80.2
diagonal_distance        62     82     95     72     79   78.0
girdle_diameter          78     75     63     68     66   70.0
surface_area             52     62     54     6

5it [00:17,  3.50s/it]


LGBMRegressor1: 582.2402 ± 0.6077, Time: 20.83 seconds, RMSE: 596.7582
                      fold0  fold1  fold2  fold3  fold4   mean
color                   533    520    524    509    518  520.8
clarity                 470    474    469    472    456  468.2
carat                   249    261    252    255    228  249.0
y                       142    140    149    141    137  141.8
x                       124    128    125    139    115  126.2
z                       132    112    128    112    135  123.8
density                  93    103    123    108    117  108.8
volume_ratio11          131     92     91    111    110  107.0
cut                      97    119    107     99    105  105.4
volume_ratio6           106     93     95    113    103  102.0
table_percentage         89     87    103     86    100   93.0
surface_area             70     67     69     89     69   72.8
depth_percentage         72     69     62     71     66   68.0
diagonal_distance        60     68     64     6

5it [00:13,  2.78s/it]


LGBMRegressor1: 581.4560 ± 0.8472, Time: 17.15 seconds, RMSE: 575.6916
                      fold0  fold1  fold2  fold3  fold4   mean
clarity                 582    577    562    573    584  575.6
color                   433    452    445    455    448  446.6
carat                   257    256    260    268    241  256.4
y                       164    153    142    128    143  146.0
volume_ratio13          145    117    140    131    121  130.8
z                       114    130    121    116    137  123.6
x                       121    106    114    114    113  113.6
density                 103    108    107    113    104  107.0
cut                      95     93     92     88     89   91.4
table_percentage         80     82     89     77     98   85.2
aspect_ratio_xy          83     82     86     86     87   84.8
depth_percentage         67     83     70     70     66   71.2
diagonal_distance        77     76     65     58     77   70.6
girdle_thickness         66     62     65     6

5it [00:16,  3.29s/it]


LGBMRegressor1: 580.8111 ± 1.2987, Time: 19.56 seconds, RMSE: 682.3577
                      fold0  fold1  fold2  fold3  fold4   mean
clarity                 599    595    578    583    583  587.6
color                   378    389    395    390    387  387.8
carat                   242    264    266    259    255  257.2
y                       159    149    149    136    149  148.4
density                 120    121    141    130    119  126.2
volume_ratio11          115    117    111    121    137  120.2
z                       121    121    120    103    124  117.8
x                       118    103    114    113    117  113.0
cut                     101    109    116    106    109  108.2
volume_ratio6           116     88    102     98    103  101.4
table_percentage         76     83     79     89     69   79.2
depth_percentage         68     74     65     84     82   74.6
diagonal_distance        73     68     88     59     71   71.8
surface_area             73     67     61     6

5it [00:12,  2.58s/it]


LGBMRegressor1: 582.1272 ± 1.2332, Time: 15.89 seconds, RMSE: 574.4111
                      fold0  fold1  fold2  fold3  fold4   mean
clarity                 589    585    584    595    580  586.6
color                   445    433    437    417    432  432.8
carat                   244    251    239    256    245  247.0
y                       131    150    149    139    152  144.2
density                 134    128    130    114    129  127.0
z                       117    122    124     96    122  116.2
volume_ratio13          124     98    115    119    116  114.4
aspect_ratio_xy         111    107    104    106    103  106.2
x                        97    104     96    104    110  102.2
cut                      99    107     95    100     93   98.8
table_percentage         88    103     89     92     94   93.2
diagonal_distance        79     81     86     79     84   81.8
girdle_diameter          77     62     69     80     72   72.0
depth                    74     77     66     6

5it [00:14,  2.90s/it]


LGBMRegressor1: 581.3291 ± 1.2504, Time: 18.15 seconds, RMSE: 571.2174
                      fold0  fold1  fold2  fold3  fold4   mean
clarity                 573    571    564    585    565  571.6
color                   439    427    457    432    438  438.6
carat                   259    277    279    257    235  261.4
density                 133    139    135    147    138  138.4
x                       135    126    123    141    130  131.0
z                       114    125    123    122    130  122.8
volume_ratio13          124    127    110    102    115  115.6
y                       111    121     99    108    121  112.0
cut                     117    116     98    109    117  111.4
volume_ratio6           114     92    111     96    119  106.4
diagonal_distance        88     76     75     74     88   80.2
table_percentage         90     76     79     74     75   78.8
surface_area             81     79     75     83     74   78.4
depth_percentage         73     52     69     5

5it [00:13,  2.68s/it]


LGBMRegressor1: 582.2591 ± 1.7110, Time: 26.58 seconds, RMSE: 570.6061
                      fold0  fold1  fold2  fold3  fold4   mean
clarity                 542    535    533    538    537  537.0
color                   497    507    497    492    485  495.6
carat                   253    248    239    252    219  242.2
y                       136    129    138    147    154  140.8
x                       128    129    106    105    120  117.6
volume_ratio13          110    117    108    122    107  112.8
density                 107    122    114    108    109  112.0
z                       111     98    114     97    119  107.8
cut                     109    106     99     98    106  103.6
surface_area             83     80     99     76    109   89.4
aspect_ratio_xy          84     81     78    100     98   88.2
diagonal_distance        76     78     81     84     84   80.6
girdle_diameter          85     80     67     85     66   76.6
table_percentage         54     84     79     7

5it [00:12,  2.47s/it]


LGBMRegressor1: 581.6727 ± 1.1879, Time: 15.39 seconds, RMSE: 569.3570
                      fold0  fold1  fold2  fold3  fold4   mean
clarity                 529    536    522    549    565  540.2
color                   477    471    477    472    461  471.6
carat                   257    245    216    253    215  237.2
z                       161    148    173    133    159  154.8
y                       141    142    140    118    132  134.6
x                       107    126    121    150    142  129.2
density                 108    115    119    114    113  113.8
cut                     104    114    110     99    105  106.4
diagonal_distance        95     98     85     98     90   93.2
volume_ratio6            96     93     85     91     89   90.8
table_percentage         82     87     93     79     70   82.2
surface_area             73     72     92     84     82   80.6
depth_percentage         84     86     79     85     67   80.2
volume_ratio13           90     71     75     7

5it [00:14,  2.99s/it]


LGBMRegressor1: 581.8540 ± 1.1754, Time: 18.59 seconds, RMSE: 570.3681
                      fold0  fold1  fold2  fold3  fold4   mean
clarity                 602    597    609    610    617  607.0
color                   435    422    419    417    417  422.0
carat                   237    246    258    252    241  246.8
y                       156    164    146    138    144  149.6
z                       139    133    134    123    102  126.2
density                 108    112    126    101    132  115.8
x                       103    116    109    120    112  112.0
volume_ratio13          107    108     99    109    110  106.6
cut                     102     95     97    107     90   98.2
aspect_ratio_xy         107     94    106     85     80   94.4
table_percentage         95     81     93     93    104   93.2
diagonal_distance        92     90     80     96    101   91.8
surface_area             77     83     95     92     84   86.2
depth                    87     65     85     7

5it [00:11,  2.36s/it]


LGBMRegressor1: 580.7451 ± 1.2730, Time: 14.62 seconds, RMSE: 571.0478
                      fold0  fold1  fold2  fold3  fold4   mean
clarity                 548    560    554    542    541  549.0
color                   457    457    445    450    464  454.6
carat                   236    242    249    241    240  241.6
z                       138    121    153    137    140  137.8
y                       127    132    125    132    130  129.2
density                 118    120    109    122    127  119.2
aspect_ratio_xy         124    123    107    124    117  119.0
x                       121    111    112    126    106  115.2
surface_area            111     91    109    109    116  107.2
cut                     104    107    106    106    110  106.6
volume_ratio13          104     95     94    116     99  101.6
table_percentage         79     92     93     79     73   83.2
diagonal_distance        87     85     79     79     74   80.8
volume_ratio8            68     81     66     6

5it [00:11,  2.23s/it]


LGBMRegressor1: 581.2420 ± 0.8197, Time: 14.04 seconds, RMSE: 571.3111
                      fold0  fold1  fold2  fold3  fold4   mean
clarity                 518    533    526    523    520  524.0
color                   437    409    418    424    427  423.0
carat                   287    299    289    273    269  283.4
z                       148    139    157    143    141  145.6
density                 133    156    138    149    144  144.0
y                       137    138    139    137    135  137.2
x                       113    103    109    115    129  113.8
cut                     103    122    103    102    102  106.4
aspect_ratio_xy         110    105     97    101    104  103.4
volume_ratio13           95     86     97    103    105   97.2
table_percentage         88     83     97     91     85   88.8
surface_area             83     85     81     86     75   82.0
diagonal_distance        83     76     78     74     94   81.0
volume_ratio8            68     70     64     9

5it [00:12,  2.50s/it]


LGBMRegressor1: 581.4321 ± 1.2011, Time: 15.12 seconds, RMSE: 571.6996
                      fold0  fold1  fold2  fold3  fold4   mean
clarity                 520    533    532    519    531  527.0
color                   436    424    429    410    426  425.0
carat                   273    272    284    300    247  275.2
z                       143    156    154    165    148  153.2
y                       156    152    159    148    143  151.6
density                 139    143    139    152    151  144.8
x                       121    137    108    124    138  125.6
aspect_ratio_xy         133    118    117    125    116  121.8
cut                     116    108    117    115    108  112.8
surface_area            113    115    101    104    115  109.6
volume_ratio13           98     99     90     80    102   93.8
table_percentage         75     81     79     76     70   76.2
depth                    81     68     66     74     69   71.6
diagonal_distance        80     66     67     6

5it [00:13,  2.69s/it]


LGBMRegressor1: 582.6114 ± 1.5226, Time: 16.13 seconds, RMSE: 572.5558
                   fold0  fold1  fold2  fold3  fold4   mean
clarity              494    490    480    495    493  490.4
color                476    502    475    494    482  485.8
carat                302    291    300    292    274  291.8
y                    156    165    161    148    155  157.0
z                    147    149    149    157    162  152.8
aspect_ratio_xy      129    112    113    117    124  119.0
cut                  110    122    117    124    114  117.4
x                    114    114    111    111    112  112.4
density              110     99    112    116    106  108.6
surface_area          93     87    120     94     85   95.8
table_percentage      88     81     99     98     97   92.6
volume_ratio13        95     97     75     68     89   84.8
depth                 76     76     74     80     77   76.6
diagonal_distance     63     74     80     70     94   76.2
volume_ratio8         72     

5it [00:14,  2.95s/it]


LGBMRegressor1: 580.4304 ± 1.3391, Time: 18.31 seconds, RMSE: 571.5005
                   fold0  fold1  fold2  fold3  fold4   mean
color                507    540    508    529    525  521.8
clarity              486    496    491    489    484  489.2
carat                278    282    283    257    250  270.0
density              121    121    127    136    122  125.4
y                    134    104    136    126    117  123.4
x                    128    115    109    126    125  120.6
z                    125    122    128     99    122  119.2
cut                  112    118    112    114    115  114.2
aspect_ratio_xy      112    111    111    104    111  109.8
volume_ratio13       106    111    104     98    106  105.0
surface_area          89    103     90     95    104   96.2
girdle_ratio          88     95     94     90     92   91.8
diagonal_distance    102     81     79     84     95   88.2
table_percentage      84     79     89    104     81   87.4
volume_ratio8         81     

5it [00:15,  3.11s/it]


LGBMRegressor1: 580.7901 ± 1.7745, Time: 18.94 seconds, RMSE: 571.4353
                   fold0  fold1  fold2  fold3  fold4   mean
clarity              666    659    664    651    656  659.2
color                386    404    382    390    390  390.4
carat                263    298    277    280    244  272.4
z                    127    138    151    123    159  139.6
y                    141    135    135    128    127  133.2
x                    125    121    131    141    125  128.6
aspect_ratio_xy      110    107    105    107    115  108.8
surface_area         108     92     91    114    113  103.6
table_percentage     113    102    100     99     98  102.4
cut                   97    103    101    105    105  102.2
density               89     97    105     98     99   97.6
girdle_ratio          84     98    100     99     85   93.2
volume_ratio13        95     77     81     74     97   84.8
diagonal_distance     89     87     77     77     88   83.6
depth                 83     

5it [00:14,  2.94s/it]


LGBMRegressor1: 582.3861 ± 0.7172, Time: 19.58 seconds, RMSE: 571.7007
                   fold0  fold1  fold2  fold3  fold4   mean
clarity              529    535    527    538    532  532.2
color                455    453    449    449    452  451.6
carat                265    285    283    263    254  270.0
y                    151    152    160    165    149  155.4
x                    137    170    147    143    160  151.4
z                    155    134    159    142    166  151.2
density              128    124    117    131    121  124.2
table_percentage     132    119    125    122    121  123.8
cut                  121    118    108    107    106  112.0
girdle_ratio          92    118    102    114    117  108.6
aspect_ratio_xy      107    123    101    104    106  108.2
volume_ratio13       108    101    103     99    115  105.2
girdle_diameter      111     94     95     88     88   95.2
depth                 75     76     88     88     90   83.4
surface_area          78     

5it [00:12,  2.44s/it]


LGBMRegressor1: 581.3872 ± 1.5570, Time: 14.53 seconds, RMSE: 571.9128
                   fold0  fold1  fold2  fold3  fold4   mean
clarity              521    505    492    496    494  501.6
color                481    504    508    501    504  499.6
carat                304    314    284    284    312  299.6
y                    164    173    171    190    175  174.6
z                    172    143    149    145    149  151.6
density              127    147    134    151    123  136.4
x                    130    122    135    126    132  129.0
volume_ratio13       125    111    125    111    108  116.0
table_percentage     110    106    117    112    112  111.4
aspect_ratio_xy      122    113    100    103    117  111.0
cut                   98    107    113    107    111  107.2
surface_area         105    112    112    103     97  105.8
depth                102     98    119    104    106  105.8
diagonal_distance    100     94     83     95     98   94.0
girdle_diameter       73     

5it [00:13,  2.77s/it]


LGBMRegressor1: 582.1518 ± 1.3323, Time: 29.05 seconds, RMSE: 572.5887
                   fold0  fold1  fold2  fold3  fold4   mean
clarity              573    558    555    551    563  560.0
color                418    412    415    414    421  416.0
carat                278    282    287    276    259  276.4
y                    163    154    153    159    153  156.4
z                    139    163    152    145    142  148.2
aspect_ratio_xy      128    141    137    147    145  139.6
density              125    133    137    131    133  131.8
volume_ratio13       144    127    121    136    130  131.6
x                    120    134    111    131    136  126.4
depth                130    104    130    134    120  123.6
table_percentage     116    120    122    123    115  119.2
girdle_ratio         105    111    119    106    120  112.2
surface_area          94    107    108    102    108  103.8
diagonal_distance     94    103     94     89    107   97.4
girdle_diameter      113     

5it [00:13,  2.65s/it]


LGBMRegressor1: 580.8168 ± 1.5001, Time: 16.51 seconds, RMSE: 572.0801
                   fold0  fold1  fold2  fold3  fold4   mean
clarity              541    535    552    549    553  546.0
color                468    468    445    452    458  458.2
carat                253    280    268    277    245  264.6
z                    207    199    200    178    203  197.4
y                    196    183    171    193    160  180.6
aspect_ratio_xy      142    148    160    152    166  153.6
x                    143    128    121    139    126  131.4
table_percentage     124    123    137    124    119  125.4
depth                128    120    124    128    126  125.2
volume_ratio13       130    106    124    113    127  120.0
density              106    117    129    124    117  118.6
cut                  116    106    104    106    111  108.6
surface_area         101    110     91    107    107  103.2
diagonal_distance    102    109     98     99    104  102.4
girdle_ratio          92     

5it [00:09,  1.92s/it]


LGBMRegressor1: 581.4900 ± 1.3481, Time: 12.54 seconds, RMSE: 571.9485
                   fold0  fold1  fold2  fold3  fold4   mean
clarity              537    530    525    524    530  529.2
color                464    466    468    496    460  470.8
carat                292    266    280    254    272  272.8
z                    178    188    204    182    181  186.6
y                    192    176    175    181    187  182.2
x                    173    177    147    170    176  168.6
aspect_ratio_xy      148    151    148    153    151  150.2
density              137    144    142    134    141  139.6
surface_area         125    116    126    132    125  124.8
depth                135    114    135    113    123  124.0
volume_ratio13       116    120    106    124    111  115.4
table_percentage     123    110    117    114    105  113.8
cut                   99    122    111    107    112  110.2
girdle_ratio          95    117    108    112    114  109.2
diagonal_distance     97    1

5it [00:16,  3.21s/it]


LGBMRegressor1: 580.9576 ± 0.7274, Time: 18.21 seconds, RMSE: 570.9902
                   fold0  fold1  fold2  fold3  fold4   mean
clarity              593    574    600    590    589  589.2
color                367    379    377    381    391  379.0
carat                271    281    267    258    249  265.2
z                    163    195    181    174    177  178.0
x                    166    163    180    180    200  177.8
y                    199    173    173    166    174  177.0
aspect_ratio_xy      157    162    161    153    158  158.2
depth                157    154    156    158    155  156.0
density              148    150    150    148    147  148.6
table_percentage     153    140    155    163    131  148.4
volume_ratio13       166    142    152    120    147  145.4
girdle_ratio         111    128    123    148    128  127.6
diagonal_distance    123    118    106    118    124  117.8
cut                  120    122    114    113    118  117.4
surface_area         106    1

5it [00:11,  2.28s/it]


LGBMRegressor1: 582.7217 ± 1.5318, Time: 13.76 seconds, RMSE: 571.3349
                   fold0  fold1  fold2  fold3  fold4   mean
color                505    521    515    515    526  516.4
clarity              515    515    505    515    499  509.8
carat                278    267    292    273    245  271.0
y                    212    212    205    209    219  211.4
z                    201    202    188    195    203  197.8
depth                171    163    168    180    174  171.2
aspect_ratio_xy      170    175    148    169    165  165.4
x                    148    171    147    152    158  155.2
diagonal_distance    149    143    156    152    147  149.4
volume_ratio13       148    140    157    131    161  147.4
density              140    126    143    133    140  136.4
table_percentage     125    130    137    137    122  130.2
girdle_ratio         122    110    127    124    121  120.8
cut                  116    125    112    115    120  117.6


5it [00:15,  3.01s/it]


LGBMRegressor1: 583.1933 ± 1.9796, Time: 16.91 seconds, RMSE: 573.4601
                   fold0  fold1  fold2  fold3  fold4   mean
clarity              613    626    612    596    601  609.6
color                377    378    385    390    382  382.4
carat                298    290    300    315    291  298.8
x                    204    191    200    209    230  206.8
z                    195    206    208    198    201  201.6
depth                178    177    192    203    192  188.4
y                    202    190    181    190    179  188.4
aspect_ratio_xy      190    190    179    173    187  183.8
table_percentage     170    171    171    169    162  168.6
diagonal_distance    128    151    162    150    168  151.8
volume_ratio13       167    156    144    148    142  151.4
density              141    140    138    129    138  137.2
girdle_ratio         137    134    128    130    127  131.2


5it [00:08,  1.66s/it]


LGBMRegressor1: 582.5984 ± 1.4794, Time: 11.13 seconds, RMSE: 574.4446
                   fold0  fold1  fold2  fold3  fold4   mean
clarity              598    599    602    620    612  606.2
color                426    418    418    413    398  414.6
carat                302    305    340    317    312  315.2
depth                239    233    245    231    230  235.6
y                    247    213    219    209    210  219.6
z                    190    193    187    198    212  196.0
aspect_ratio_xy      190    193    181    203    188  191.0
x                    181    179    170    175    183  177.6
table_percentage     170    181    180    171    172  174.8
volume_ratio13       182    177    160    169    178  173.2
density              160    176    163    162    161  164.4
diagonal_distance    115    133    135    132    144  131.8


5it [00:09,  1.86s/it]


LGBMRegressor1: 583.3548 ± 1.6393, Time: 11.74 seconds, RMSE: 573.4294
                  fold0  fold1  fold2  fold3  fold4   mean
color               530    538    512    533    538  530.2
clarity             522    512    525    525    496  516.0
carat               335    332    339    335    316  331.4
depth               267    250    256    268    261  260.4
volume_ratio13      247    250    252    233    245  245.4
x                   203    221    209    219    229  216.2
y                   215    198    201    203    192  201.8
aspect_ratio_xy     205    201    198    194    203  200.2
z                   171    190    201    170    212  188.8
table_percentage    173    171    167    188    166  173.0
density             132    137    140    132    142  136.6


5it [00:11,  2.22s/it]


LGBMRegressor1: 583.4028 ± 1.2147, Time: 13.35 seconds, RMSE: 574.3988
                  fold0  fold1  fold2  fold3  fold4   mean
clarity             636    634    634    623    645  634.4
color               418    440    422    438    441  431.8
carat               397    390    383    375    336  376.2
y                   288    259    279    276    284  277.2
x                   246    257    246    269    229  249.4
depth               246    236    246    254    253  247.0
z                   212    203    231    209    241  219.2
table_percentage    212    214    207    202    212  209.4
aspect_ratio_xy     165    186    182    172    184  177.8
volume_ratio13      180    181    170    182    175  177.6


5it [00:08,  1.62s/it]


LGBMRegressor1: 582.4616 ± 1.9388, Time: 9.86 seconds, RMSE: 572.0307
                  fold0  fold1  fold2  fold3  fold4   mean
clarity             623    629    612    616    614  618.8
color               447    442    446    465    464  452.8
carat               401    395    427    387    377  397.4
y                   367    350    339    350    333  347.8
x                   273    259    272    247    268  263.8
depth               241    272    235    261    252  252.2
z                   234    225    243    224    262  237.6
aspect_ratio_xy     209    217    212    223    228  217.8
table_percentage    205    211    214    227    202  211.8


5it [00:09,  1.91s/it]


LGBMRegressor1: 584.5356 ± 1.7502, Time: 11.61 seconds, RMSE: 574.8884
                 fold0  fold1  fold2  fold3  fold4   mean
clarity            572    577    577    545    543  562.8
color              485    469    477    496    489  483.2
y                  427    403    396    418    410  410.8
z                  362    347    382    371    378  368.0
carat              362    392    362    342    331  357.8
x                  307    323    316    338    352  327.2
depth              259    257    263    262    262  260.6
aspect_ratio_xy    226    232    227    228    235  229.6


5it [00:07,  1.48s/it]


LGBMRegressor1: 587.4387 ± 2.0577, Time: 8.93 seconds, RMSE: 575.4723
         fold0  fold1  fold2  fold3  fold4   mean
y          558    567    558    563    553  559.8
color      528    525    541    536    535  533.0
x          490    490    477    475    507  487.8
clarity    465    462    444    465    458  458.8
carat      355    357    361    360    334  353.4
z          343    345    353    326    346  342.6
depth      261    254    266    275    267  264.6


5it [00:07,  1.48s/it]


LGBMRegressor1: 587.0267 ± 1.6382, Time: 9.05 seconds, RMSE: 576.6399
         fold0  fold1  fold2  fold3  fold4   mean
clarity    651    625    644    633    643  639.2
y          529    523    520    525    538  527.0
x          507    518    494    520    522  512.2
color      507    501    502    512    509  506.2
carat      416    443    453    403    379  418.8
z          390    390    387    407    409  396.6


5it [00:07,  1.41s/it]


LGBMRegressor1: 589.7247 ± 2.6382, Time: 9.01 seconds, RMSE: 578.5290
         fold0  fold1  fold2  fold3  fold4   mean
carat      744    736    764    735    706  737.0
clarity    628    646    615    640    644  634.6
y          586    570    604    590    588  587.6
color      574    572    540    554    569  561.8
x          468    476    477    481    493  479.0


5it [00:06,  1.25s/it]


LGBMRegressor1: 591.6567 ± 2.5517, Time: 7.59 seconds, RMSE: 581.5495
         fold0  fold1  fold2  fold3  fold4   mean
carat      863    864    876    863    854  864.0
y          789    799    764    788    805  789.0
clarity    756    755    747    748    733  747.8
color      592    582    613    601    608  599.2


5it [00:16,  3.23s/it]


LGBMRegressor1: 919.1052 ± 9.9074, Time: 18.14 seconds, RMSE: 885.0564
         fold0  fold1  fold2  fold3  fold4    mean
carat     1842   1824   1856   1820   1826  1833.6
clarity   1158   1176   1144   1180   1174  1166.4
y            0      0      0      0      0     0.0


5it [00:04,  1.17it/s]


LGBMRegressor1: 4228.2854 ± 33.2994, Time: 5.17 seconds, RMSE: 4216.9971
         fold0  fold1  fold2  fold3  fold4   mean
clarity    700    700    700    700    700  700.0
carat        0      0      0      0      0    0.0


5it [00:03,  1.30it/s]


LGBMRegressor1: 4228.2854 ± 33.2994, Time: 4.69 seconds, RMSE: 4216.9971
         fold0  fold1  fold2  fold3  fold4   mean
clarity    700    700    700    700    700  700.0


In [None]:
# y_pred_test = test_predictions.mean(axis=1).round().astype(int)

# sub = pd.read_csv('submissions/sample_submission.csv')
# sub['quality'] = y_pred_test
# now = time.strftime("%Y-%m-%d %H_%M_%S")
# sub.to_csv(f'submissions/submission{now}.csv', index=False)
# # Copy the leaked values from the original dataset before submitting
# # Transform the price column back to the original scale