Used to generate XGB predictions

In [18]:
from xgboost import XGBRegressor

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.metrics import r2_score, mean_squared_error

import seaborn as sns
import shap
from xgb_params import xgb_params as params, num_top_features as top_n

In [19]:
params

{'n_estimators': 200,
 'device': 'cuda',
 'objective': 'reg:squarederror',
 'eval_metric': 'rmsle',
 'enable_categorical': True,
 'early_stopping_rounds': 20,
 'colsample_bytree': 0.7,
 'eta': 0.1,
 'gamma': 0,
 'max_depth': 6,
 'min_child_weight': 3.0,
 'reg_alpha': 93.0,
 'reg_lambda': 0.8685796539747039,
 'n_jobs': 4}

In [20]:
# File Paths
test_df_path = '../../Dataset/test.csv/test.csv'
process_train_path = './processed/processed_train_2.csv'
processed_test_path = './processed/processed_test_2.csv'
feats_path = './best_feats/price_index_feats.csv'
output_path = './output/xgb_pred.csv'


In [21]:
feats_df = pd.read_csv(feats_path)
feats = feats_df['col_name'].values.tolist()[:top_n]
processed_df = pd.read_csv(process_train_path)
processed_df = processed_df[feats+['price_doc']]
X = processed_df.drop(['price_doc'], axis=1)
y = processed_df['price_doc']
X = X[feats]

test_df = pd.read_csv(test_df_path)
processed_test_df = pd.read_csv(processed_test_path)
processed_test_df.head()

Unnamed: 0,id,full_sq,life_sq,floor,max_floor,build_year,num_room,kitch_sq,state,area_m,...,big_road1_1line_yes,railroad_1line_no,railroad_1line_yes,material_1.0,material_2.0,material_3.0,material_4.0,material_5.0,material_6.0,material_nan
0,30474,39.0,20.7,2,9,1998.0,1,8.9,3.0,26155140.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1,30475,79.2,34.404467,8,17,0.0,3,1.0,1.0,25536300.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
2,30476,40.5,25.1,3,5,1960.0,2,4.8,2.0,9946335.0,...,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,30477,62.8,36.0,17,17,2016.0,2,62.8,3.0,21494090.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,30478,40.0,40.0,17,17,0.0,1,1.0,1.0,25536300.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [22]:
def train(X, y, evalset=None):    
    model = XGBRegressor(
                    # verbosity=0,
                    n_estimators=params['n_estimators'],
                    device=params['device'],
                    objective=params['objective'],
                    eval_metric=params['eval_metric'],
                    enable_categorical=params['enable_categorical'],
                    early_stopping_rounds=params['early_stopping_rounds'] if evalset else None,

                    eta=params['eta'],
                    max_depth = params['max_depth'],
                    gamma = params['gamma'],
                    # reg_alpha = params['reg_alpha'],
                    min_child_weight=params['min_child_weight'],
                    colsample_bytree=params['colsample_bytree'],
                    n_jobs=params['n_jobs']
                )
    if not evalset:
        model.fit(X, y, verbose=True)
    else:
        model.fit(X, y, eval_set=evalset, verbose=True)

    return model

def find_best_model(processed_df):
    # Return best model from kfolds
    best_model = None
    min_loss = float('inf')
    cv = KFold(n_splits=10, shuffle=True, random_state=42)
    for fold, (train_idx, test_idx) in enumerate(cv.split(processed_df)):
        X_train = processed_df.iloc[train_idx]
        y_train = X_train["price_doc"]
        X_train.drop(["price_doc"], axis=1, inplace=True)

        X_val = processed_df.iloc[test_idx]
        y_val = X_val["price_doc"]
        X_val.drop(["price_doc"], axis=1, inplace=True)

        evalset = [(X_val, y_val)]
        model = train(X_train, y_train, evalset)

        pred = model.predict(X_val)
        loss = mean_squared_error(y_val, pred, squared=False)

        if loss < min_loss:
            min_loss = loss
            best_model = model
    
    return best_model, min_loss

In [23]:
model, loss = find_best_model(processed_df)

[0]	validation_0-rmsle:0.61009
[1]	validation_0-rmsle:0.59282



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.Spars

[2]	validation_0-rmsle:0.57619
[3]	validation_0-rmsle:0.56244
[4]	validation_0-rmsle:0.55155
[5]	validation_0-rmsle:0.54386
[6]	validation_0-rmsle:0.53548
[7]	validation_0-rmsle:0.52847
[8]	validation_0-rmsle:0.52230
[9]	validation_0-rmsle:0.51799
[10]	validation_0-rmsle:0.51355
[11]	validation_0-rmsle:0.50959
[12]	validation_0-rmsle:0.50627
[13]	validation_0-rmsle:0.50337
[14]	validation_0-rmsle:0.50099
[15]	validation_0-rmsle:0.49919
[16]	validation_0-rmsle:0.49772
[17]	validation_0-rmsle:0.49577
[18]	validation_0-rmsle:0.49408
[19]	validation_0-rmsle:0.49260
[20]	validation_0-rmsle:0.49171
[21]	validation_0-rmsle:0.49036
[22]	validation_0-rmsle:0.48970
[23]	validation_0-rmsle:0.48852
[24]	validation_0-rmsle:0.48764
[25]	validation_0-rmsle:0.48673
[26]	validation_0-rmsle:0.48627
[27]	validation_0-rmsle:0.48581
[28]	validation_0-rmsle:0.48530
[29]	validation_0-rmsle:0.48480
[30]	validation_0-rmsle:0.48416
[31]	validation_0-rmsle:0.48375
[32]	validation_0-rmsle:0.48307
[33]	validation_

is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.Spars

[0]	validation_0-rmsle:0.59034
[1]	validation_0-rmsle:0.57371
[2]	validation_0-rmsle:0.55700
[3]	validation_0-rmsle:0.54342
[4]	validation_0-rmsle:0.53172
[5]	validation_0-rmsle:0.52348
[6]	validation_0-rmsle:0.51529
[7]	validation_0-rmsle:0.50805
[8]	validation_0-rmsle:0.50190
[9]	validation_0-rmsle:0.49729
[10]	validation_0-rmsle:0.49273
[11]	validation_0-rmsle:0.48875
[12]	validation_0-rmsle:0.48537
[13]	validation_0-rmsle:0.48243
[14]	validation_0-rmsle:0.48020
[15]	validation_0-rmsle:0.47806
[16]	validation_0-rmsle:0.47623
[17]	validation_0-rmsle:0.47418
[18]	validation_0-rmsle:0.47229
[19]	validation_0-rmsle:0.47091
[20]	validation_0-rmsle:0.46991
[21]	validation_0-rmsle:0.46873
[22]	validation_0-rmsle:0.46781
[23]	validation_0-rmsle:0.46687
[24]	validation_0-rmsle:0.46595
[25]	validation_0-rmsle:0.46517
[26]	validation_0-rmsle:0.46444
[27]	validation_0-rmsle:0.46383
[28]	validation_0-rmsle:0.46334
[29]	validation_0-rmsle:0.46299
[30]	validation_0-rmsle:0.46265
[31]	validation_0-

is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.Spars

[0]	validation_0-rmsle:0.58732
[1]	validation_0-rmsle:0.57077
[2]	validation_0-rmsle:0.55417
[3]	validation_0-rmsle:0.54065
[4]	validation_0-rmsle:0.52957
[5]	validation_0-rmsle:0.52174
[6]	validation_0-rmsle:0.51374
[7]	validation_0-rmsle:0.50703
[8]	validation_0-rmsle:0.50092
[9]	validation_0-rmsle:0.49697
[10]	validation_0-rmsle:0.49315
[11]	validation_0-rmsle:0.48974
[12]	validation_0-rmsle:0.48657
[13]	validation_0-rmsle:0.48324
[14]	validation_0-rmsle:0.48132
[15]	validation_0-rmsle:0.47945
[16]	validation_0-rmsle:0.47784
[17]	validation_0-rmsle:0.47607
[18]	validation_0-rmsle:0.47454
[19]	validation_0-rmsle:0.47317
[20]	validation_0-rmsle:0.47216
[21]	validation_0-rmsle:0.47092
[22]	validation_0-rmsle:0.47021
[23]	validation_0-rmsle:0.46924
[24]	validation_0-rmsle:0.46846
[25]	validation_0-rmsle:0.46770
[26]	validation_0-rmsle:0.46699
[27]	validation_0-rmsle:0.46664
[28]	validation_0-rmsle:0.46589
[29]	validation_0-rmsle:0.46551
[30]	validation_0-rmsle:0.46514
[31]	validation_0-

is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.Spars

[5]	validation_0-rmsle:0.53468
[6]	validation_0-rmsle:0.52684
[7]	validation_0-rmsle:0.51993
[8]	validation_0-rmsle:0.51387
[9]	validation_0-rmsle:0.50918
[10]	validation_0-rmsle:0.50481
[11]	validation_0-rmsle:0.50103
[12]	validation_0-rmsle:0.49774
[13]	validation_0-rmsle:0.49490
[14]	validation_0-rmsle:0.49293
[15]	validation_0-rmsle:0.49135
[16]	validation_0-rmsle:0.48966
[17]	validation_0-rmsle:0.48772
[18]	validation_0-rmsle:0.48655
[19]	validation_0-rmsle:0.48511
[20]	validation_0-rmsle:0.48393
[21]	validation_0-rmsle:0.48282
[22]	validation_0-rmsle:0.48202
[23]	validation_0-rmsle:0.48088
[24]	validation_0-rmsle:0.48014
[25]	validation_0-rmsle:0.47928
[26]	validation_0-rmsle:0.47878
[27]	validation_0-rmsle:0.47818
[28]	validation_0-rmsle:0.47764
[29]	validation_0-rmsle:0.47710
[30]	validation_0-rmsle:0.47675
[31]	validation_0-rmsle:0.47639
[32]	validation_0-rmsle:0.47606
[33]	validation_0-rmsle:0.47553
[34]	validation_0-rmsle:0.47512
[35]	validation_0-rmsle:0.47462
[36]	validati

is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.Spars

[4]	validation_0-rmsle:0.54046
[5]	validation_0-rmsle:0.53197
[6]	validation_0-rmsle:0.52264
[7]	validation_0-rmsle:0.51505
[8]	validation_0-rmsle:0.50863
[9]	validation_0-rmsle:0.50390
[10]	validation_0-rmsle:0.49895
[11]	validation_0-rmsle:0.49494
[12]	validation_0-rmsle:0.49135
[13]	validation_0-rmsle:0.48814
[14]	validation_0-rmsle:0.48570
[15]	validation_0-rmsle:0.48371
[16]	validation_0-rmsle:0.48195
[17]	validation_0-rmsle:0.47968
[18]	validation_0-rmsle:0.47785
[19]	validation_0-rmsle:0.47628
[20]	validation_0-rmsle:0.47516
[21]	validation_0-rmsle:0.47399
[22]	validation_0-rmsle:0.47292
[23]	validation_0-rmsle:0.47174
[24]	validation_0-rmsle:0.47068
[25]	validation_0-rmsle:0.46996
[26]	validation_0-rmsle:0.46921
[27]	validation_0-rmsle:0.46876
[28]	validation_0-rmsle:0.46826
[29]	validation_0-rmsle:0.46786
[30]	validation_0-rmsle:0.46746
[31]	validation_0-rmsle:0.46706
[32]	validation_0-rmsle:0.46644
[33]	validation_0-rmsle:0.46586
[34]	validation_0-rmsle:0.46544
[35]	validatio

is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.Spars

[4]	validation_0-rmsle:0.54488
[5]	validation_0-rmsle:0.53622
[6]	validation_0-rmsle:0.52766
[7]	validation_0-rmsle:0.52016
[8]	validation_0-rmsle:0.51395
[9]	validation_0-rmsle:0.50918
[10]	validation_0-rmsle:0.50454
[11]	validation_0-rmsle:0.50058
[12]	validation_0-rmsle:0.49679
[13]	validation_0-rmsle:0.49357
[14]	validation_0-rmsle:0.49131
[15]	validation_0-rmsle:0.48896
[16]	validation_0-rmsle:0.48739
[17]	validation_0-rmsle:0.48514
[18]	validation_0-rmsle:0.48354
[19]	validation_0-rmsle:0.48205
[20]	validation_0-rmsle:0.48090
[21]	validation_0-rmsle:0.47959
[22]	validation_0-rmsle:0.47865
[23]	validation_0-rmsle:0.47753
[24]	validation_0-rmsle:0.47656
[25]	validation_0-rmsle:0.47578
[26]	validation_0-rmsle:0.47502
[27]	validation_0-rmsle:0.47429
[28]	validation_0-rmsle:0.47371
[29]	validation_0-rmsle:0.47310
[30]	validation_0-rmsle:0.47265
[31]	validation_0-rmsle:0.47208
[32]	validation_0-rmsle:0.47158
[33]	validation_0-rmsle:0.47115
[34]	validation_0-rmsle:0.47078
[35]	validatio

is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.Spars

[5]	validation_0-rmsle:0.54671
[6]	validation_0-rmsle:0.53977
[7]	validation_0-rmsle:0.53364
[8]	validation_0-rmsle:0.52851
[9]	validation_0-rmsle:0.52464
[10]	validation_0-rmsle:0.52112
[11]	validation_0-rmsle:0.51823
[12]	validation_0-rmsle:0.51529
[13]	validation_0-rmsle:0.51292
[14]	validation_0-rmsle:0.51082
[15]	validation_0-rmsle:0.50897
[16]	validation_0-rmsle:0.50791
[17]	validation_0-rmsle:0.50653
[18]	validation_0-rmsle:0.50514
[19]	validation_0-rmsle:0.50406
[20]	validation_0-rmsle:0.50331
[21]	validation_0-rmsle:0.50267
[22]	validation_0-rmsle:0.50217
[23]	validation_0-rmsle:0.50150
[24]	validation_0-rmsle:0.50085
[25]	validation_0-rmsle:0.50039
[26]	validation_0-rmsle:0.49993
[27]	validation_0-rmsle:0.49940
[28]	validation_0-rmsle:0.49904
[29]	validation_0-rmsle:0.49873
[30]	validation_0-rmsle:0.49845
[31]	validation_0-rmsle:0.49789
[32]	validation_0-rmsle:0.49757
[33]	validation_0-rmsle:0.49735
[34]	validation_0-rmsle:0.49705
[35]	validation_0-rmsle:0.49683
[36]	validati

is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.Spars

[5]	validation_0-rmsle:0.54463
[6]	validation_0-rmsle:0.53751
[7]	validation_0-rmsle:0.53159
[8]	validation_0-rmsle:0.52652
[9]	validation_0-rmsle:0.52259
[10]	validation_0-rmsle:0.51869
[11]	validation_0-rmsle:0.51517
[12]	validation_0-rmsle:0.51243
[13]	validation_0-rmsle:0.51002
[14]	validation_0-rmsle:0.50758
[15]	validation_0-rmsle:0.50617
[16]	validation_0-rmsle:0.50489
[17]	validation_0-rmsle:0.50336
[18]	validation_0-rmsle:0.50185
[19]	validation_0-rmsle:0.50086
[20]	validation_0-rmsle:0.49998
[21]	validation_0-rmsle:0.49887
[22]	validation_0-rmsle:0.49783
[23]	validation_0-rmsle:0.49725
[24]	validation_0-rmsle:0.49670
[25]	validation_0-rmsle:0.49604
[26]	validation_0-rmsle:0.49557
[27]	validation_0-rmsle:0.49522
[28]	validation_0-rmsle:0.49460
[29]	validation_0-rmsle:0.49422
[30]	validation_0-rmsle:0.49379
[31]	validation_0-rmsle:0.49333
[32]	validation_0-rmsle:0.49288
[33]	validation_0-rmsle:0.49263
[34]	validation_0-rmsle:0.49223
[35]	validation_0-rmsle:0.49197
[36]	validati

is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.Spars

[6]	validation_0-rmsle:0.55116
[7]	validation_0-rmsle:0.54428
[8]	validation_0-rmsle:0.53827
[9]	validation_0-rmsle:0.53352
[10]	validation_0-rmsle:0.52904
[11]	validation_0-rmsle:0.52532
[12]	validation_0-rmsle:0.52209
[13]	validation_0-rmsle:0.51902
[14]	validation_0-rmsle:0.51697
[15]	validation_0-rmsle:0.51511
[16]	validation_0-rmsle:0.51338
[17]	validation_0-rmsle:0.51163
[18]	validation_0-rmsle:0.51012
[19]	validation_0-rmsle:0.50901
[20]	validation_0-rmsle:0.50802
[21]	validation_0-rmsle:0.50696
[22]	validation_0-rmsle:0.50641
[23]	validation_0-rmsle:0.50567
[24]	validation_0-rmsle:0.50490
[25]	validation_0-rmsle:0.50429
[26]	validation_0-rmsle:0.50363
[27]	validation_0-rmsle:0.50336
[28]	validation_0-rmsle:0.50281
[29]	validation_0-rmsle:0.50242
[30]	validation_0-rmsle:0.50208
[31]	validation_0-rmsle:0.50154
[32]	validation_0-rmsle:0.50132
[33]	validation_0-rmsle:0.50098
[34]	validation_0-rmsle:0.50083
[35]	validation_0-rmsle:0.50057
[36]	validation_0-rmsle:0.50028
[37]	validat

is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.Spars

[4]	validation_0-rmsle:0.54208
[5]	validation_0-rmsle:0.53376
[6]	validation_0-rmsle:0.52543
[7]	validation_0-rmsle:0.51831
[8]	validation_0-rmsle:0.51208
[9]	validation_0-rmsle:0.50729
[10]	validation_0-rmsle:0.50278
[11]	validation_0-rmsle:0.49861
[12]	validation_0-rmsle:0.49510
[13]	validation_0-rmsle:0.49217
[14]	validation_0-rmsle:0.48975
[15]	validation_0-rmsle:0.48747
[16]	validation_0-rmsle:0.48575
[17]	validation_0-rmsle:0.48383
[18]	validation_0-rmsle:0.48199
[19]	validation_0-rmsle:0.48049
[20]	validation_0-rmsle:0.47936
[21]	validation_0-rmsle:0.47814
[22]	validation_0-rmsle:0.47740
[23]	validation_0-rmsle:0.47630
[24]	validation_0-rmsle:0.47509
[25]	validation_0-rmsle:0.47419
[26]	validation_0-rmsle:0.47336
[27]	validation_0-rmsle:0.47281
[28]	validation_0-rmsle:0.47200
[29]	validation_0-rmsle:0.47136
[30]	validation_0-rmsle:0.47067
[31]	validation_0-rmsle:0.47031
[32]	validation_0-rmsle:0.46989
[33]	validation_0-rmsle:0.46945
[34]	validation_0-rmsle:0.46914
[35]	validatio

is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead


In [24]:
print("Loss: ", loss)
pred = model.predict(processed_test_df[feats])

Loss:  2293061.899287826


is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead


In [25]:
# Save predictions
prediction_df = pd.DataFrame({
    'id': test_df['id'],
    'price_doc': pred
})

prediction_df.to_csv(output_path, index=False)