In [17]:
import xgboost
from xgboost import XGBRegressor, XGBClassifier

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy import stats
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_squared_log_error, r2_score, mean_squared_error

from sklearn.preprocessing import LabelEncoder, OrdinalEncoder, OneHotEncoder
from category_encoders import MEstimateEncoder, TargetEncoder
from sklearn.inspection import permutation_importance

import seaborn as sns
import shap

from tqdm import tqdm


from xgb_utils import  process_train, process_test, one_hot_cols, cal_mean_errors
from xgb_params import xgb_params_inv, xgb_params_own, num_inv_top_features, num_own_top_features

In [18]:
# Train datasets
invesment_train_path = 'processed/processed_train_inv.csv'
owner_train_path = 'processed/processed_train_own.csv'

# Test
investment_test_path = 'processed/processed_test_inv.csv'
owner_test_path = 'processed/processed_test_own.csv'

# Feats
investment_feat_path = './best_feats/Inv_Onehot_only_mean_NANs_modified_prices.csv'
owner_feat_path = './best_feats/Own_Onehot_only_mean_NANs_modified_prices.csv'

output_path = './output/xgb_split_pred.csv'

In [19]:
def train(X, y, params, evalset=None):    
    model = XGBRegressor(
                    # verbosity=0,
                    n_estimators=params['n_estimators'],
                    device=params['device'],
                    objective=params['objective'],
                    eval_metric=params['eval_metric'],
                    enable_categorical=params['enable_categorical'],
                    early_stopping_rounds=params['early_stopping_rounds'] if evalset else None,

                    eta=params['eta'],
                    max_depth = params['max_depth'],
                    # gamma = params['gamma'],
                    # reg_alpha = params['reg_alpha'],
                    # min_child_weight=params['min_child_weight'],
                    colsample_bytree=params['colsample_bytree'],
                    n_jobs=params['n_jobs']
                )
    if not evalset:
        model.fit(X, y, verbose=True)
    else:
        model.fit(X, y, eval_set=evalset, verbose=True)

    return model

def find_best_model(processed_df, params):
    # Return best model from kfolds
    best_model = None
    min_loss = float('inf')
    cv = KFold(n_splits=10, shuffle=True)
    for fold, (train_idx, test_idx) in enumerate(cv.split(processed_df)):
        X_train = processed_df.iloc[train_idx]
        y_train = X_train["price_doc"]
        X_train.drop(["price_doc"], axis=1, inplace=True)

        X_val = processed_df.iloc[test_idx]
        y_val = X_val["price_doc"]
        X_val.drop(["price_doc"], axis=1, inplace=True)

        evalset = [(X_val, y_val)]
        model = train(X_train, y_train, params, evalset)

        pred = model.predict(X_val)
        loss = mean_squared_error(y_val, pred)

        if loss < min_loss:
            min_loss = loss
            best_model = model
    
    return best_model

# Investment Dataset

In [20]:
inv_feats_df = pd.read_csv(investment_feat_path)
feats = inv_feats_df['col_name'].values.tolist()[:num_inv_top_features]
processed_df = pd.read_csv(invesment_train_path)
processed_df = processed_df[feats+['price_doc']]

processed_inv_test_df = pd.read_csv(investment_test_path)
processed_inv_test_df_copy = processed_inv_test_df.copy(deep=True)
processed_inv_test_df.head()

Unnamed: 0,id,full_sq,life_sq,floor,max_floor,build_year,num_room,kitch_sq,state,area_m,...,big_road1_1line_no,big_road1_1line_yes,railroad_1line_no,railroad_1line_yes,material_1.0,material_2.0,material_4.0,material_5.0,material_6.0,material_nan
0,30474,39.0,20.7,2,9,1998.0,1,8.9,3.0,26155140.0,...,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1,30476,40.5,25.1,3,5,1960.0,2,4.8,2.0,9946335.0,...,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,30482,45.4,28.5,9,12,1972.0,2,6.0,2.0,15319900.0,...,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,30487,39.8,18.9,4,17,2013.0,1,9.9,3.0,11391680.0,...,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,30491,94.5,61.9,2,12,2000.0,4,10.3,2.480988,5704502.0,...,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


In [21]:
model = find_best_model(processed_df, xgb_params_inv)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.Spars

[0]	validation_0-rmsle:0.66173
[1]	validation_0-rmsle:0.65282
[2]	validation_0-rmsle:0.64541
[3]	validation_0-rmsle:0.63855
[4]	validation_0-rmsle:0.63256
[5]	validation_0-rmsle:0.62630
[6]	validation_0-rmsle:0.62056
[7]	validation_0-rmsle:0.61531
[8]	validation_0-rmsle:0.61069
[9]	validation_0-rmsle:0.60632
[10]	validation_0-rmsle:0.60228
[11]	validation_0-rmsle:0.59858
[12]	validation_0-rmsle:0.59582
[13]	validation_0-rmsle:0.59247
[14]	validation_0-rmsle:0.58956
[15]	validation_0-rmsle:0.58726
[16]	validation_0-rmsle:0.58463
[17]	validation_0-rmsle:0.58240
[18]	validation_0-rmsle:0.58036
[19]	validation_0-rmsle:0.57837
[20]	validation_0-rmsle:0.57691
[21]	validation_0-rmsle:0.57546
[22]	validation_0-rmsle:0.57379
[23]	validation_0-rmsle:0.57244
[24]	validation_0-rmsle:0.57123
[25]	validation_0-rmsle:0.57009
[26]	validation_0-rmsle:0.56892
[27]	validation_0-rmsle:0.56771
[28]	validation_0-rmsle:0.56646
[29]	validation_0-rmsle:0.56541
[30]	validation_0-rmsle:0.56445
[31]	validation_0-

is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.Spars

[2]	validation_0-rmsle:0.66345
[3]	validation_0-rmsle:0.65666
[4]	validation_0-rmsle:0.65095
[5]	validation_0-rmsle:0.64469
[6]	validation_0-rmsle:0.63929
[7]	validation_0-rmsle:0.63411
[8]	validation_0-rmsle:0.62941
[9]	validation_0-rmsle:0.62513
[10]	validation_0-rmsle:0.62145
[11]	validation_0-rmsle:0.61769
[12]	validation_0-rmsle:0.61497
[13]	validation_0-rmsle:0.61193
[14]	validation_0-rmsle:0.60920
[15]	validation_0-rmsle:0.60696
[16]	validation_0-rmsle:0.60449
[17]	validation_0-rmsle:0.60221
[18]	validation_0-rmsle:0.59998
[19]	validation_0-rmsle:0.59793
[20]	validation_0-rmsle:0.59638
[21]	validation_0-rmsle:0.59491
[22]	validation_0-rmsle:0.59337
[23]	validation_0-rmsle:0.59217
[24]	validation_0-rmsle:0.59086
[25]	validation_0-rmsle:0.58972
[26]	validation_0-rmsle:0.58873
[27]	validation_0-rmsle:0.58758
[28]	validation_0-rmsle:0.58642
[29]	validation_0-rmsle:0.58521
[30]	validation_0-rmsle:0.58423
[31]	validation_0-rmsle:0.58334
[32]	validation_0-rmsle:0.58235
[33]	validation_

is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.Spars

[5]	validation_0-rmsle:0.66162
[6]	validation_0-rmsle:0.65559
[7]	validation_0-rmsle:0.65019
[8]	validation_0-rmsle:0.64511
[9]	validation_0-rmsle:0.64047
[10]	validation_0-rmsle:0.63636
[11]	validation_0-rmsle:0.63226
[12]	validation_0-rmsle:0.62941
[13]	validation_0-rmsle:0.62597
[14]	validation_0-rmsle:0.62290
[15]	validation_0-rmsle:0.62043
[16]	validation_0-rmsle:0.61780
[17]	validation_0-rmsle:0.61535
[18]	validation_0-rmsle:0.61336
[19]	validation_0-rmsle:0.61144
[20]	validation_0-rmsle:0.60982
[21]	validation_0-rmsle:0.60876
[22]	validation_0-rmsle:0.60695
[23]	validation_0-rmsle:0.60558
[24]	validation_0-rmsle:0.60456
[25]	validation_0-rmsle:0.60330
[26]	validation_0-rmsle:0.60225
[27]	validation_0-rmsle:0.60108
[28]	validation_0-rmsle:0.59993
[29]	validation_0-rmsle:0.59879
[30]	validation_0-rmsle:0.59790
[31]	validation_0-rmsle:0.59718
[32]	validation_0-rmsle:0.59644
[33]	validation_0-rmsle:0.59563
[34]	validation_0-rmsle:0.59489
[35]	validation_0-rmsle:0.59414
[36]	validati

is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.Spars

[2]	validation_0-rmsle:0.66881
[3]	validation_0-rmsle:0.66197
[4]	validation_0-rmsle:0.65605
[5]	validation_0-rmsle:0.65003
[6]	validation_0-rmsle:0.64405
[7]	validation_0-rmsle:0.63892
[8]	validation_0-rmsle:0.63431
[9]	validation_0-rmsle:0.63009
[10]	validation_0-rmsle:0.62623
[11]	validation_0-rmsle:0.62254
[12]	validation_0-rmsle:0.61977
[13]	validation_0-rmsle:0.61678
[14]	validation_0-rmsle:0.61385
[15]	validation_0-rmsle:0.61157
[16]	validation_0-rmsle:0.60912
[17]	validation_0-rmsle:0.60683
[18]	validation_0-rmsle:0.60467
[19]	validation_0-rmsle:0.60270
[20]	validation_0-rmsle:0.60115
[21]	validation_0-rmsle:0.59986
[22]	validation_0-rmsle:0.59850
[23]	validation_0-rmsle:0.59736
[24]	validation_0-rmsle:0.59607
[25]	validation_0-rmsle:0.59496
[26]	validation_0-rmsle:0.59407
[27]	validation_0-rmsle:0.59330
[28]	validation_0-rmsle:0.59237
[29]	validation_0-rmsle:0.59122
[30]	validation_0-rmsle:0.59027
[31]	validation_0-rmsle:0.58953
[32]	validation_0-rmsle:0.58874
[33]	validation_

is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.Spars

[1]	validation_0-rmsle:0.67449
[2]	validation_0-rmsle:0.66625
[3]	validation_0-rmsle:0.65912
[4]	validation_0-rmsle:0.65255
[5]	validation_0-rmsle:0.64568
[6]	validation_0-rmsle:0.63975
[7]	validation_0-rmsle:0.63424
[8]	validation_0-rmsle:0.62937
[9]	validation_0-rmsle:0.62465
[10]	validation_0-rmsle:0.62031
[11]	validation_0-rmsle:0.61631
[12]	validation_0-rmsle:0.61337
[13]	validation_0-rmsle:0.61010
[14]	validation_0-rmsle:0.60706
[15]	validation_0-rmsle:0.60433
[16]	validation_0-rmsle:0.60150
[17]	validation_0-rmsle:0.59929
[18]	validation_0-rmsle:0.59690
[19]	validation_0-rmsle:0.59466
[20]	validation_0-rmsle:0.59291
[21]	validation_0-rmsle:0.59157
[22]	validation_0-rmsle:0.58988
[23]	validation_0-rmsle:0.58837
[24]	validation_0-rmsle:0.58698
[25]	validation_0-rmsle:0.58574
[26]	validation_0-rmsle:0.58446
[27]	validation_0-rmsle:0.58316
[28]	validation_0-rmsle:0.58209
[29]	validation_0-rmsle:0.58090
[30]	validation_0-rmsle:0.57982
[31]	validation_0-rmsle:0.57907
[32]	validation_0

is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.Spars

[0]	validation_0-rmsle:0.69441
[1]	validation_0-rmsle:0.68433
[2]	validation_0-rmsle:0.67561
[3]	validation_0-rmsle:0.66789
[4]	validation_0-rmsle:0.66138
[5]	validation_0-rmsle:0.65495
[6]	validation_0-rmsle:0.64890
[7]	validation_0-rmsle:0.64335
[8]	validation_0-rmsle:0.63838
[9]	validation_0-rmsle:0.63385
[10]	validation_0-rmsle:0.62962
[11]	validation_0-rmsle:0.62589
[12]	validation_0-rmsle:0.62277
[13]	validation_0-rmsle:0.61983
[14]	validation_0-rmsle:0.61685
[15]	validation_0-rmsle:0.61456
[16]	validation_0-rmsle:0.61204
[17]	validation_0-rmsle:0.60997
[18]	validation_0-rmsle:0.60778
[19]	validation_0-rmsle:0.60578
[20]	validation_0-rmsle:0.60408
[21]	validation_0-rmsle:0.60281
[22]	validation_0-rmsle:0.60131
[23]	validation_0-rmsle:0.59993
[24]	validation_0-rmsle:0.59868
[25]	validation_0-rmsle:0.59747
[26]	validation_0-rmsle:0.59618
[27]	validation_0-rmsle:0.59508
[28]	validation_0-rmsle:0.59400
[29]	validation_0-rmsle:0.59311
[30]	validation_0-rmsle:0.59220
[31]	validation_0-

is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.Spars

[6]	validation_0-rmsle:0.67177
[7]	validation_0-rmsle:0.66699
[8]	validation_0-rmsle:0.66281
[9]	validation_0-rmsle:0.65884
[10]	validation_0-rmsle:0.65520
[11]	validation_0-rmsle:0.65179
[12]	validation_0-rmsle:0.64904
[13]	validation_0-rmsle:0.64615
[14]	validation_0-rmsle:0.64366
[15]	validation_0-rmsle:0.64144
[16]	validation_0-rmsle:0.63901
[17]	validation_0-rmsle:0.63708
[18]	validation_0-rmsle:0.63538
[19]	validation_0-rmsle:0.63349
[20]	validation_0-rmsle:0.63208
[21]	validation_0-rmsle:0.63086
[22]	validation_0-rmsle:0.62944
[23]	validation_0-rmsle:0.62831
[24]	validation_0-rmsle:0.62718
[25]	validation_0-rmsle:0.62619
[26]	validation_0-rmsle:0.62509
[27]	validation_0-rmsle:0.62425
[28]	validation_0-rmsle:0.62317
[29]	validation_0-rmsle:0.62217
[30]	validation_0-rmsle:0.62139
[31]	validation_0-rmsle:0.62060
[32]	validation_0-rmsle:0.61996
[33]	validation_0-rmsle:0.61911
[34]	validation_0-rmsle:0.61846
[35]	validation_0-rmsle:0.61779
[36]	validation_0-rmsle:0.61715
[37]	validat

is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.Spars

[4]	validation_0-rmsle:0.65434
[5]	validation_0-rmsle:0.64812
[6]	validation_0-rmsle:0.64273
[7]	validation_0-rmsle:0.63806
[8]	validation_0-rmsle:0.63382
[9]	validation_0-rmsle:0.62993
[10]	validation_0-rmsle:0.62633
[11]	validation_0-rmsle:0.62307
[12]	validation_0-rmsle:0.62021
[13]	validation_0-rmsle:0.61737
[14]	validation_0-rmsle:0.61469
[15]	validation_0-rmsle:0.61235
[16]	validation_0-rmsle:0.61007
[17]	validation_0-rmsle:0.60811
[18]	validation_0-rmsle:0.60616
[19]	validation_0-rmsle:0.60446
[20]	validation_0-rmsle:0.60300
[21]	validation_0-rmsle:0.60179
[22]	validation_0-rmsle:0.60033
[23]	validation_0-rmsle:0.59911
[24]	validation_0-rmsle:0.59795
[25]	validation_0-rmsle:0.59685
[26]	validation_0-rmsle:0.59594
[27]	validation_0-rmsle:0.59506
[28]	validation_0-rmsle:0.59383
[29]	validation_0-rmsle:0.59278
[30]	validation_0-rmsle:0.59184
[31]	validation_0-rmsle:0.59104
[32]	validation_0-rmsle:0.59031
[33]	validation_0-rmsle:0.58961
[34]	validation_0-rmsle:0.58901
[35]	validatio

is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.Spars

[6]	validation_0-rmsle:0.65946
[7]	validation_0-rmsle:0.65443
[8]	validation_0-rmsle:0.64991
[9]	validation_0-rmsle:0.64559
[10]	validation_0-rmsle:0.64136
[11]	validation_0-rmsle:0.63749
[12]	validation_0-rmsle:0.63476
[13]	validation_0-rmsle:0.63157
[14]	validation_0-rmsle:0.62882
[15]	validation_0-rmsle:0.62634
[16]	validation_0-rmsle:0.62383
[17]	validation_0-rmsle:0.62151
[18]	validation_0-rmsle:0.61927
[19]	validation_0-rmsle:0.61735
[20]	validation_0-rmsle:0.61586
[21]	validation_0-rmsle:0.61424
[22]	validation_0-rmsle:0.61256
[23]	validation_0-rmsle:0.61138
[24]	validation_0-rmsle:0.61012
[25]	validation_0-rmsle:0.60906
[26]	validation_0-rmsle:0.60796
[27]	validation_0-rmsle:0.60721
[28]	validation_0-rmsle:0.60601
[29]	validation_0-rmsle:0.60498
[30]	validation_0-rmsle:0.60396
[31]	validation_0-rmsle:0.60321
[32]	validation_0-rmsle:0.60217
[33]	validation_0-rmsle:0.60163
[34]	validation_0-rmsle:0.60075
[35]	validation_0-rmsle:0.59997
[36]	validation_0-rmsle:0.59927
[37]	validat

is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.Spars

[7]	validation_0-rmsle:0.66526
[8]	validation_0-rmsle:0.66086
[9]	validation_0-rmsle:0.65663
[10]	validation_0-rmsle:0.65301
[11]	validation_0-rmsle:0.64944
[12]	validation_0-rmsle:0.64727
[13]	validation_0-rmsle:0.64433
[14]	validation_0-rmsle:0.64149
[15]	validation_0-rmsle:0.63926
[16]	validation_0-rmsle:0.63699
[17]	validation_0-rmsle:0.63503
[18]	validation_0-rmsle:0.63307
[19]	validation_0-rmsle:0.63122
[20]	validation_0-rmsle:0.62978
[21]	validation_0-rmsle:0.62881
[22]	validation_0-rmsle:0.62733
[23]	validation_0-rmsle:0.62610
[24]	validation_0-rmsle:0.62497
[25]	validation_0-rmsle:0.62376
[26]	validation_0-rmsle:0.62289
[27]	validation_0-rmsle:0.62198
[28]	validation_0-rmsle:0.62111
[29]	validation_0-rmsle:0.62024
[30]	validation_0-rmsle:0.61940
[31]	validation_0-rmsle:0.61876
[32]	validation_0-rmsle:0.61804
[33]	validation_0-rmsle:0.61718
[34]	validation_0-rmsle:0.61662
[35]	validation_0-rmsle:0.61593
[36]	validation_0-rmsle:0.61552
[37]	validation_0-rmsle:0.61500
[38]	valida

is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead


In [22]:
pred = model.predict(processed_inv_test_df[feats])
inv_prediction_df = pd.DataFrame({
    'id': processed_inv_test_df_copy['id'],
    'price_doc': pred
})

is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead


# Owner Dataset

In [23]:
own_feats_df = pd.read_csv(owner_feat_path)
feats = own_feats_df['col_name'].values.tolist()[:num_own_top_features]
processed_df = pd.read_csv(owner_train_path)
processed_df = processed_df[feats+['price_doc']]

processed_own_test_df = pd.read_csv(owner_test_path)
processed_own_test_df_copy = processed_own_test_df.copy(deep=True)
processed_own_test_df.head()

Unnamed: 0,id,full_sq,life_sq,floor,max_floor,build_year,num_room,kitch_sq,state,area_m,...,big_road1_1line_yes,railroad_1line_no,railroad_1line_yes,material_1.0,material_2.0,material_3.0,material_4.0,material_5.0,material_6.0,material_nan
0,30475,79.2,49.632838,8,17,0.0,3,1.0,1.0,25536300.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1,30477,62.8,36.0,17,17,2016.0,2,62.8,3.0,21494090.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
2,30478,40.0,40.0,17,17,0.0,1,1.0,1.0,25536300.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,30479,48.43,49.632838,21,1,2015.0,1,1.0,1.0,9629358.0,...,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,30480,38.8,49.632838,15,17,1493.225347,1,1.0,1.0,11324090.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [24]:
model = find_best_model(processed_df, xgb_params_own)

[0]	validation_0-rmsle:0.44507
[1]	validation_0-rmsle:0.42675
[2]	validation_0-rmsle:0.40992
[3]	validation_0-rmsle:0.39714
[4]	validation_0-rmsle:0.38374
[5]	validation_0-rmsle:0.36946
[6]	validation_0-rmsle:0.35657
[7]	validation_0-rmsle:0.34533
[8]	validation_0-rmsle:0.33375
[9]	validation_0-rmsle:0.32260
[10]	validation_0-rmsle:0.31209
[11]	validation_0-rmsle:0.30216
[12]	validation_0-rmsle:0.29412
[13]	validation_0-rmsle:0.28514
[14]	validation_0-rmsle:0.27662
[15]	validation_0-rmsle:0.26955
[16]	validation_0-rmsle:0.26190
[17]	validation_0-rmsle:0.25572



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.Spars

[18]	validation_0-rmsle:0.24870
[19]	validation_0-rmsle:0.24216
[20]	validation_0-rmsle:0.23565
[21]	validation_0-rmsle:0.22966
[22]	validation_0-rmsle:0.22407
[23]	validation_0-rmsle:0.21856
[24]	validation_0-rmsle:0.21338
[25]	validation_0-rmsle:0.20859
[26]	validation_0-rmsle:0.20425
[27]	validation_0-rmsle:0.19985
[28]	validation_0-rmsle:0.19548
[29]	validation_0-rmsle:0.19184
[30]	validation_0-rmsle:0.18919
[31]	validation_0-rmsle:0.18540
[32]	validation_0-rmsle:0.18195
[33]	validation_0-rmsle:0.17893
[34]	validation_0-rmsle:0.17626
[35]	validation_0-rmsle:0.17309
[36]	validation_0-rmsle:0.17018
[37]	validation_0-rmsle:0.16728
[38]	validation_0-rmsle:0.16496
[39]	validation_0-rmsle:0.16283
[40]	validation_0-rmsle:0.16056
[41]	validation_0-rmsle:0.15818
[42]	validation_0-rmsle:0.15627
[43]	validation_0-rmsle:0.15419
[44]	validation_0-rmsle:0.15248
[45]	validation_0-rmsle:0.15069
[46]	validation_0-rmsle:0.14915
[47]	validation_0-rmsle:0.14763
[48]	validation_0-rmsle:0.14641
[49]	val

is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.Spars

[15]	validation_0-rmsle:0.27659
[16]	validation_0-rmsle:0.26968
[17]	validation_0-rmsle:0.26349
[18]	validation_0-rmsle:0.25737
[19]	validation_0-rmsle:0.25180
[20]	validation_0-rmsle:0.24701
[21]	validation_0-rmsle:0.24190
[22]	validation_0-rmsle:0.23675
[23]	validation_0-rmsle:0.23250
[24]	validation_0-rmsle:0.22830
[25]	validation_0-rmsle:0.22431
[26]	validation_0-rmsle:0.22074
[27]	validation_0-rmsle:0.21730
[28]	validation_0-rmsle:0.21403
[29]	validation_0-rmsle:0.21081
[30]	validation_0-rmsle:0.20795
[31]	validation_0-rmsle:0.20512
[32]	validation_0-rmsle:0.20267
[33]	validation_0-rmsle:0.20018
[34]	validation_0-rmsle:0.19821
[35]	validation_0-rmsle:0.19598
[36]	validation_0-rmsle:0.19396
[37]	validation_0-rmsle:0.19207
[38]	validation_0-rmsle:0.19054
[39]	validation_0-rmsle:0.18915
[40]	validation_0-rmsle:0.18776
[41]	validation_0-rmsle:0.18619
[42]	validation_0-rmsle:0.18486
[43]	validation_0-rmsle:0.18352
[44]	validation_0-rmsle:0.18265
[45]	validation_0-rmsle:0.18128
[46]	val

is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.Spars

[13]	validation_0-rmsle:0.27962
[14]	validation_0-rmsle:0.27153
[15]	validation_0-rmsle:0.26475
[16]	validation_0-rmsle:0.25756
[17]	validation_0-rmsle:0.25167
[18]	validation_0-rmsle:0.24513
[19]	validation_0-rmsle:0.23898
[20]	validation_0-rmsle:0.23303
[21]	validation_0-rmsle:0.22731
[22]	validation_0-rmsle:0.22179
[23]	validation_0-rmsle:0.21644
[24]	validation_0-rmsle:0.21129
[25]	validation_0-rmsle:0.20676
[26]	validation_0-rmsle:0.20325
[27]	validation_0-rmsle:0.19884
[28]	validation_0-rmsle:0.19496
[29]	validation_0-rmsle:0.19112
[30]	validation_0-rmsle:0.18871
[31]	validation_0-rmsle:0.18529
[32]	validation_0-rmsle:0.18203
[33]	validation_0-rmsle:0.18025
[34]	validation_0-rmsle:0.17805
[35]	validation_0-rmsle:0.17532
[36]	validation_0-rmsle:0.17310
[37]	validation_0-rmsle:0.17091
[38]	validation_0-rmsle:0.16934
[39]	validation_0-rmsle:0.16737
[40]	validation_0-rmsle:0.16519
[41]	validation_0-rmsle:0.16325
[42]	validation_0-rmsle:0.16189
[43]	validation_0-rmsle:0.16031
[44]	val

is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.Spars

[9]	validation_0-rmsle:0.31111
[10]	validation_0-rmsle:0.30064
[11]	validation_0-rmsle:0.29098
[12]	validation_0-rmsle:0.28315
[13]	validation_0-rmsle:0.27396
[14]	validation_0-rmsle:0.26544
[15]	validation_0-rmsle:0.25858
[16]	validation_0-rmsle:0.25100
[17]	validation_0-rmsle:0.24443
[18]	validation_0-rmsle:0.23746
[19]	validation_0-rmsle:0.23059
[20]	validation_0-rmsle:0.22432
[21]	validation_0-rmsle:0.21816
[22]	validation_0-rmsle:0.21251
[23]	validation_0-rmsle:0.20754
[24]	validation_0-rmsle:0.20203
[25]	validation_0-rmsle:0.19692
[26]	validation_0-rmsle:0.19307
[27]	validation_0-rmsle:0.18843
[28]	validation_0-rmsle:0.18421
[29]	validation_0-rmsle:0.18006
[30]	validation_0-rmsle:0.17735
[31]	validation_0-rmsle:0.17366
[32]	validation_0-rmsle:0.17002
[33]	validation_0-rmsle:0.16755
[34]	validation_0-rmsle:0.16509
[35]	validation_0-rmsle:0.16230
[36]	validation_0-rmsle:0.15940
[37]	validation_0-rmsle:0.15660
[38]	validation_0-rmsle:0.15506
[39]	validation_0-rmsle:0.15310
[40]	vali

is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.Spars

[12]	validation_0-rmsle:0.29294
[13]	validation_0-rmsle:0.28456
[14]	validation_0-rmsle:0.27648
[15]	validation_0-rmsle:0.26993
[16]	validation_0-rmsle:0.26290
[17]	validation_0-rmsle:0.25701
[18]	validation_0-rmsle:0.25089
[19]	validation_0-rmsle:0.24513
[20]	validation_0-rmsle:0.24040
[21]	validation_0-rmsle:0.23497
[22]	validation_0-rmsle:0.22952
[23]	validation_0-rmsle:0.22486
[24]	validation_0-rmsle:0.22015
[25]	validation_0-rmsle:0.21648
[26]	validation_0-rmsle:0.21298
[27]	validation_0-rmsle:0.20936
[28]	validation_0-rmsle:0.20595
[29]	validation_0-rmsle:0.20259
[30]	validation_0-rmsle:0.20000
[31]	validation_0-rmsle:0.19717
[32]	validation_0-rmsle:0.19434
[33]	validation_0-rmsle:0.19215
[34]	validation_0-rmsle:0.19012
[35]	validation_0-rmsle:0.18748
[36]	validation_0-rmsle:0.18528
[37]	validation_0-rmsle:0.18325
[38]	validation_0-rmsle:0.18174
[39]	validation_0-rmsle:0.18024
[40]	validation_0-rmsle:0.17814
[41]	validation_0-rmsle:0.17684
[42]	validation_0-rmsle:0.17535
[43]	val

is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.Spars

[5]	validation_0-rmsle:0.37546
[6]	validation_0-rmsle:0.36291
[7]	validation_0-rmsle:0.35231
[8]	validation_0-rmsle:0.34106
[9]	validation_0-rmsle:0.33034
[10]	validation_0-rmsle:0.32023
[11]	validation_0-rmsle:0.31115
[12]	validation_0-rmsle:0.30328
[13]	validation_0-rmsle:0.29494
[14]	validation_0-rmsle:0.28664
[15]	validation_0-rmsle:0.27998
[16]	validation_0-rmsle:0.27323
[17]	validation_0-rmsle:0.26772
[18]	validation_0-rmsle:0.26143
[19]	validation_0-rmsle:0.25605
[20]	validation_0-rmsle:0.25088
[21]	validation_0-rmsle:0.24604
[22]	validation_0-rmsle:0.24124
[23]	validation_0-rmsle:0.23695
[24]	validation_0-rmsle:0.23298
[25]	validation_0-rmsle:0.22886
[26]	validation_0-rmsle:0.22519
[27]	validation_0-rmsle:0.22171
[28]	validation_0-rmsle:0.21861
[29]	validation_0-rmsle:0.21561
[30]	validation_0-rmsle:0.21374
[31]	validation_0-rmsle:0.21071
[32]	validation_0-rmsle:0.20812
[33]	validation_0-rmsle:0.20596
[34]	validation_0-rmsle:0.20341
[35]	validation_0-rmsle:0.20090
[36]	validati

is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.Spars

[8]	validation_0-rmsle:0.32356
[9]	validation_0-rmsle:0.31232
[10]	validation_0-rmsle:0.30197
[11]	validation_0-rmsle:0.29231
[12]	validation_0-rmsle:0.28414
[13]	validation_0-rmsle:0.27529
[14]	validation_0-rmsle:0.26734
[15]	validation_0-rmsle:0.26017
[16]	validation_0-rmsle:0.25310
[17]	validation_0-rmsle:0.24672
[18]	validation_0-rmsle:0.24039
[19]	validation_0-rmsle:0.23402
[20]	validation_0-rmsle:0.22817
[21]	validation_0-rmsle:0.22252
[22]	validation_0-rmsle:0.21718
[23]	validation_0-rmsle:0.21220
[24]	validation_0-rmsle:0.20720
[25]	validation_0-rmsle:0.20302
[26]	validation_0-rmsle:0.19910
[27]	validation_0-rmsle:0.19491
[28]	validation_0-rmsle:0.19107
[29]	validation_0-rmsle:0.18787
[30]	validation_0-rmsle:0.18466
[31]	validation_0-rmsle:0.18124
[32]	validation_0-rmsle:0.17819
[33]	validation_0-rmsle:0.17581
[34]	validation_0-rmsle:0.17327
[35]	validation_0-rmsle:0.17093
[36]	validation_0-rmsle:0.16847
[37]	validation_0-rmsle:0.16623
[38]	validation_0-rmsle:0.16463
[39]	valid

is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.Spars

[12]	validation_0-rmsle:0.28179
[13]	validation_0-rmsle:0.27276
[14]	validation_0-rmsle:0.26437
[15]	validation_0-rmsle:0.25735
[16]	validation_0-rmsle:0.25009
[17]	validation_0-rmsle:0.24390
[18]	validation_0-rmsle:0.23732
[19]	validation_0-rmsle:0.23118
[20]	validation_0-rmsle:0.22493
[21]	validation_0-rmsle:0.21927
[22]	validation_0-rmsle:0.21377
[23]	validation_0-rmsle:0.20878
[24]	validation_0-rmsle:0.20409
[25]	validation_0-rmsle:0.19954
[26]	validation_0-rmsle:0.19557
[27]	validation_0-rmsle:0.19144
[28]	validation_0-rmsle:0.18797
[29]	validation_0-rmsle:0.18437
[30]	validation_0-rmsle:0.18177
[31]	validation_0-rmsle:0.17839
[32]	validation_0-rmsle:0.17556
[33]	validation_0-rmsle:0.17348
[34]	validation_0-rmsle:0.17122
[35]	validation_0-rmsle:0.16851
[36]	validation_0-rmsle:0.16617
[37]	validation_0-rmsle:0.16379
[38]	validation_0-rmsle:0.16221
[39]	validation_0-rmsle:0.16059
[40]	validation_0-rmsle:0.15856
[41]	validation_0-rmsle:0.15666
[42]	validation_0-rmsle:0.15517
[43]	val

is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.Spars

[14]	validation_0-rmsle:0.29298
[15]	validation_0-rmsle:0.28661
[16]	validation_0-rmsle:0.27964
[17]	validation_0-rmsle:0.27381
[18]	validation_0-rmsle:0.26764
[19]	validation_0-rmsle:0.26166
[20]	validation_0-rmsle:0.25620
[21]	validation_0-rmsle:0.25119
[22]	validation_0-rmsle:0.24646
[23]	validation_0-rmsle:0.24187
[24]	validation_0-rmsle:0.23733
[25]	validation_0-rmsle:0.23363
[26]	validation_0-rmsle:0.23034
[27]	validation_0-rmsle:0.22665
[28]	validation_0-rmsle:0.22335
[29]	validation_0-rmsle:0.22002
[30]	validation_0-rmsle:0.21795
[31]	validation_0-rmsle:0.21498
[32]	validation_0-rmsle:0.21242
[33]	validation_0-rmsle:0.20997
[34]	validation_0-rmsle:0.20805
[35]	validation_0-rmsle:0.20580
[36]	validation_0-rmsle:0.20351
[37]	validation_0-rmsle:0.20152
[38]	validation_0-rmsle:0.20028
[39]	validation_0-rmsle:0.19846
[40]	validation_0-rmsle:0.19663
[41]	validation_0-rmsle:0.19509
[42]	validation_0-rmsle:0.19385
[43]	validation_0-rmsle:0.19245
[44]	validation_0-rmsle:0.19217
[45]	val

is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.Spars

[10]	validation_0-rmsle:0.30613
[11]	validation_0-rmsle:0.29607
[12]	validation_0-rmsle:0.28772
[13]	validation_0-rmsle:0.27878
[14]	validation_0-rmsle:0.27025
[15]	validation_0-rmsle:0.26336
[16]	validation_0-rmsle:0.25579
[17]	validation_0-rmsle:0.24935
[18]	validation_0-rmsle:0.24270
[19]	validation_0-rmsle:0.23651
[20]	validation_0-rmsle:0.23022
[21]	validation_0-rmsle:0.22455
[22]	validation_0-rmsle:0.21863
[23]	validation_0-rmsle:0.21334
[24]	validation_0-rmsle:0.20786
[25]	validation_0-rmsle:0.20344
[26]	validation_0-rmsle:0.19939
[27]	validation_0-rmsle:0.19488
[28]	validation_0-rmsle:0.19080
[29]	validation_0-rmsle:0.18695
[30]	validation_0-rmsle:0.18449
[31]	validation_0-rmsle:0.18094
[32]	validation_0-rmsle:0.17764
[33]	validation_0-rmsle:0.17508
[34]	validation_0-rmsle:0.17251
[35]	validation_0-rmsle:0.16961
[36]	validation_0-rmsle:0.16701
[37]	validation_0-rmsle:0.16428
[38]	validation_0-rmsle:0.16265
[39]	validation_0-rmsle:0.16109
[40]	validation_0-rmsle:0.15895
[41]	val

is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead


In [25]:
pred = model.predict(processed_own_test_df[feats])
own_prediction_df = pd.DataFrame({
    'id': processed_own_test_df_copy['id'],
    'price_doc': pred
})


is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead


## Combine both predictions and sort by id

In [26]:
overall_preds = pd.concat([inv_prediction_df, own_prediction_df])

overall_preds.sort_values(by=['id'], inplace=True)

overall_preds.head()

Unnamed: 0,id,price_doc
0,30474,5573238.0
0,30475,8062681.5
1,30476,5340796.0
1,30477,6149045.5
2,30478,5096453.0


In [27]:
overall_preds.to_csv(output_path, index=False)