In [1]:
from xgboost import XGBRegressor

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy import stats
from sklearn.model_selection import KFold
from sklearn.metrics import  mean_squared_error

from xgb_params import xgb_params_inv, xgb_params_own, num_inv_top_features, num_own_top_features

In [2]:
# Train datasets
invesment_train_path = 'processed/processed_train_inv.csv'
owner_train_path = 'processed/processed_train_own.csv'

# Test
investment_test_path = 'processed/processed_test_inv.csv'
owner_test_path = 'processed/processed_test_own.csv'

# Feats
investment_feat_path = './best_feats/inv_modified_price_feats.csv'
owner_feat_path = './best_feats/own_modified_price_feats.csv'

output_path = './output/xgb_split_pred.csv'

In [3]:
def train(X, y, params, evalset=None):    
    model = XGBRegressor(
                    # verbosity=0,
                    n_estimators=params['n_estimators'],
                    device=params['device'],
                    objective=params['objective'],
                    eval_metric=params['eval_metric'],
                    enable_categorical=params['enable_categorical'],
                    early_stopping_rounds=params['early_stopping_rounds'] if evalset else None,

                    eta=params['eta'],
                    max_depth = params['max_depth'],
                    # gamma = params['gamma'],
                    # reg_alpha = params['reg_alpha'],
                    min_child_weight=params['min_child_weight'],
                    colsample_bytree=params['colsample_bytree'],
                    n_jobs=params['n_jobs']
                )
    if not evalset:
        model.fit(X, y, verbose=True)
    else:
        model.fit(X, y, eval_set=evalset, verbose=True)

    return model

def find_best_model(processed_df, params, seed=42):
    # Return best model from kfolds
    best_model = None
    min_loss = float('inf')
    cv = KFold(n_splits=10, shuffle=True, random_state=seed)
    for fold, (train_idx, test_idx) in enumerate(cv.split(processed_df)):
        X_train = processed_df.iloc[train_idx]
        y_train = X_train["price_doc"]
        X_train.drop(["price_doc"], axis=1, inplace=True)

        X_val = processed_df.iloc[test_idx]
        y_val = X_val["price_doc"]
        X_val.drop(["price_doc"], axis=1, inplace=True)

        evalset = [(X_val, y_val)]
        model = train(X_train, y_train, params, evalset)

        pred = model.predict(X_val)
        loss = mean_squared_error(y_val, pred)

        if loss < min_loss:
            min_loss = loss
            best_model = model
    
    return best_model

# Investment Dataset

In [4]:
inv_feats_df = pd.read_csv(investment_feat_path)
feats = inv_feats_df['col_name'].values.tolist()[:num_inv_top_features]
processed_df = pd.read_csv(invesment_train_path)
processed_df = processed_df[feats+['price_doc']]

processed_inv_test_df = pd.read_csv(investment_test_path)
processed_inv_test_df_copy = processed_inv_test_df.copy(deep=True)
processed_inv_test_df.head()

Unnamed: 0,id,full_sq,life_sq,floor,max_floor,build_year,num_room,kitch_sq,state,area_m,...,big_road1_1line_no,big_road1_1line_yes,railroad_1line_no,railroad_1line_yes,material_1.0,material_2.0,material_4.0,material_5.0,material_6.0,material_nan
0,30474,39.0,20.7,2,9,1998.0,1,8.9,3.0,26155140.0,...,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1,30476,40.5,25.1,3,5,1960.0,2,4.8,2.0,9946335.0,...,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,30482,45.4,28.5,9,12,1972.0,2,6.0,2.0,15319900.0,...,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,30487,39.8,18.9,4,17,2013.0,1,9.9,3.0,11391680.0,...,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,30491,94.5,61.9,2,12,2000.0,4,10.3,2.480988,5704502.0,...,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


In [5]:
model = find_best_model(processed_df, xgb_params_inv, seed=100)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.drop(["price_doc"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_val.drop(["price_doc"], axis=1, inplace=True)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[0]	validation_0-rmsle:0.68767
[1]	validation_0-rmsle:0.67175
[2]	validation_0-rmsle:0.65957
[3]	validation_0-rmsle:0.65008
[4]	validation_0-rmsle:0.64049
[5]	validation_0-rmsle:0.63417
[6]	validation_0-rmsle:0.62826
[7]	validation_0-rmsle:0.62147
[8]	validation_0-rmsle:0.61581
[9]	validation_0-rmsle:0.61104
[10]	validation_0-rmsle:0.60681
[11]	validation_0-rmsle:0.60336
[12]	validation_0-rmsle:0.60049
[13]	validation_0-rmsle:0.59777
[14]	validation_0-rmsle:0.59547
[15]	validation_0-rmsle:0.59358
[16]	validation_0-rmsle:0.59155
[17]	validation_0-rmsle:0.58962
[18]	validation_0-rmsle:0.58839
[19]	validation_0-rmsle:0.58705
[20]	validation_0-rmsle:0.58590
[21]	validation_0-rmsle:0.58498
[22]	validation_0-rmsle:0.58418
[23]	validation_0-rmsle:0.58322
[24]	validation_0-rmsle:0.58234
[25]	validation_0-rmsle:0.58187
[26]	validation_0-rmsle:0.58113
[27]	validation_0-rmsle:0.58063
[28]	validation_0-rmsle:0.58017
[29]	validation_0-rmsle:0.57953
[30]	validation_0-rmsle:0.57900
[31]	validation_0-

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.drop(["price_doc"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_val.drop(["price_doc"], axis=1, inplace=True)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is

[0]	validation_0-rmsle:0.69136
[1]	validation_0-rmsle:0.67469
[2]	validation_0-rmsle:0.66231
[3]	validation_0-rmsle:0.65230
[4]	validation_0-rmsle:0.64161
[5]	validation_0-rmsle:0.63499
[6]	validation_0-rmsle:0.62904
[7]	validation_0-rmsle:0.62286
[8]	validation_0-rmsle:0.61717
[9]	validation_0-rmsle:0.61243
[10]	validation_0-rmsle:0.60873
[11]	validation_0-rmsle:0.60545
[12]	validation_0-rmsle:0.60253
[13]	validation_0-rmsle:0.59977
[14]	validation_0-rmsle:0.59726
[15]	validation_0-rmsle:0.59507
[16]	validation_0-rmsle:0.59352
[17]	validation_0-rmsle:0.59181
[18]	validation_0-rmsle:0.59073
[19]	validation_0-rmsle:0.58945
[20]	validation_0-rmsle:0.58842
[21]	validation_0-rmsle:0.58753
[22]	validation_0-rmsle:0.58664
[23]	validation_0-rmsle:0.58597
[24]	validation_0-rmsle:0.58528
[25]	validation_0-rmsle:0.58490
[26]	validation_0-rmsle:0.58431
[27]	validation_0-rmsle:0.58379
[28]	validation_0-rmsle:0.58295
[29]	validation_0-rmsle:0.58236
[30]	validation_0-rmsle:0.58212
[31]	validation_0-

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.drop(["price_doc"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_val.drop(["price_doc"], axis=1, inplace=True)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categori

[1]	validation_0-rmsle:0.66170
[2]	validation_0-rmsle:0.65045
[3]	validation_0-rmsle:0.64147
[4]	validation_0-rmsle:0.63303
[5]	validation_0-rmsle:0.62704
[6]	validation_0-rmsle:0.62179
[7]	validation_0-rmsle:0.61661
[8]	validation_0-rmsle:0.61239
[9]	validation_0-rmsle:0.60827
[10]	validation_0-rmsle:0.60537
[11]	validation_0-rmsle:0.60225
[12]	validation_0-rmsle:0.59973
[13]	validation_0-rmsle:0.59751
[14]	validation_0-rmsle:0.59567
[15]	validation_0-rmsle:0.59420
[16]	validation_0-rmsle:0.59296
[17]	validation_0-rmsle:0.59200
[18]	validation_0-rmsle:0.59091
[19]	validation_0-rmsle:0.59037
[20]	validation_0-rmsle:0.58927
[21]	validation_0-rmsle:0.58870
[22]	validation_0-rmsle:0.58780
[23]	validation_0-rmsle:0.58725
[24]	validation_0-rmsle:0.58684
[25]	validation_0-rmsle:0.58653
[26]	validation_0-rmsle:0.58608
[27]	validation_0-rmsle:0.58559
[28]	validation_0-rmsle:0.58521
[29]	validation_0-rmsle:0.58497
[30]	validation_0-rmsle:0.58478
[31]	validation_0-rmsle:0.58449
[32]	validation_0

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.drop(["price_doc"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_val.drop(["price_doc"], axis=1, inplace=True)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categori


[1]	validation_0-rmsle:0.63837
[2]	validation_0-rmsle:0.62553
[3]	validation_0-rmsle:0.61464
[4]	validation_0-rmsle:0.60428
[5]	validation_0-rmsle:0.59750
[6]	validation_0-rmsle:0.59151
[7]	validation_0-rmsle:0.58494
[8]	validation_0-rmsle:0.57967
[9]	validation_0-rmsle:0.57496
[10]	validation_0-rmsle:0.57132
[11]	validation_0-rmsle:0.56803
[12]	validation_0-rmsle:0.56505
[13]	validation_0-rmsle:0.56319
[14]	validation_0-rmsle:0.56095
[15]	validation_0-rmsle:0.55924
[16]	validation_0-rmsle:0.55746
[17]	validation_0-rmsle:0.55580
[18]	validation_0-rmsle:0.55445
[19]	validation_0-rmsle:0.55326
[20]	validation_0-rmsle:0.55259
[21]	validation_0-rmsle:0.55183
[22]	validation_0-rmsle:0.55117
[23]	validation_0-rmsle:0.55055
[24]	validation_0-rmsle:0.55012
[25]	validation_0-rmsle:0.54957
[26]	validation_0-rmsle:0.54905
[27]	validation_0-rmsle:0.54865
[28]	validation_0-rmsle:0.54848
[29]	validation_0-rmsle:0.54807
[30]	validation_0-rmsle:0.54781
[31]	validation_0-rmsle:0.54750
[32]	validation_

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.drop(["price_doc"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_val.drop(["price_doc"], axis=1, inplace=True)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categori

[2]	validation_0-rmsle:0.63091
[3]	validation_0-rmsle:0.62157
[4]	validation_0-rmsle:0.61306
[5]	validation_0-rmsle:0.60711
[6]	validation_0-rmsle:0.60231
[7]	validation_0-rmsle:0.59711
[8]	validation_0-rmsle:0.59299
[9]	validation_0-rmsle:0.58942
[10]	validation_0-rmsle:0.58623
[11]	validation_0-rmsle:0.58340
[12]	validation_0-rmsle:0.58097
[13]	validation_0-rmsle:0.57912
[14]	validation_0-rmsle:0.57744
[15]	validation_0-rmsle:0.57603
[16]	validation_0-rmsle:0.57486
[17]	validation_0-rmsle:0.57366
[18]	validation_0-rmsle:0.57262
[19]	validation_0-rmsle:0.57194
[20]	validation_0-rmsle:0.57114
[21]	validation_0-rmsle:0.57043
[22]	validation_0-rmsle:0.56963
[23]	validation_0-rmsle:0.56917
[24]	validation_0-rmsle:0.56882
[25]	validation_0-rmsle:0.56866
[26]	validation_0-rmsle:0.56820
[27]	validation_0-rmsle:0.56802
[28]	validation_0-rmsle:0.56786
[29]	validation_0-rmsle:0.56752
[30]	validation_0-rmsle:0.56725
[31]	validation_0-rmsle:0.56686
[32]	validation_0-rmsle:0.56674
[33]	validation_

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.drop(["price_doc"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_val.drop(["price_doc"], axis=1, inplace=True)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categori

[1]	validation_0-rmsle:0.71920
[2]	validation_0-rmsle:0.70686
[3]	validation_0-rmsle:0.69558
[4]	validation_0-rmsle:0.68614
[5]	validation_0-rmsle:0.67923
[6]	validation_0-rmsle:0.67321
[7]	validation_0-rmsle:0.66676
[8]	validation_0-rmsle:0.66148
[9]	validation_0-rmsle:0.65676
[10]	validation_0-rmsle:0.65267
[11]	validation_0-rmsle:0.64946
[12]	validation_0-rmsle:0.64655
[13]	validation_0-rmsle:0.64397
[14]	validation_0-rmsle:0.64204
[15]	validation_0-rmsle:0.63976
[16]	validation_0-rmsle:0.63803
[17]	validation_0-rmsle:0.63659
[18]	validation_0-rmsle:0.63521
[19]	validation_0-rmsle:0.63457
[20]	validation_0-rmsle:0.63346
[21]	validation_0-rmsle:0.63247
[22]	validation_0-rmsle:0.63167
[23]	validation_0-rmsle:0.63083
[24]	validation_0-rmsle:0.63000
[25]	validation_0-rmsle:0.62915
[26]	validation_0-rmsle:0.62858
[27]	validation_0-rmsle:0.62826
[28]	validation_0-rmsle:0.62793
[29]	validation_0-rmsle:0.62758
[30]	validation_0-rmsle:0.62716
[31]	validation_0-rmsle:0.62696
[32]	validation_0

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.drop(["price_doc"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_val.drop(["price_doc"], axis=1, inplace=True)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categori

[3]	validation_0-rmsle:0.64111
[4]	validation_0-rmsle:0.63277
[5]	validation_0-rmsle:0.62708
[6]	validation_0-rmsle:0.62243
[7]	validation_0-rmsle:0.61729
[8]	validation_0-rmsle:0.61267
[9]	validation_0-rmsle:0.60904
[10]	validation_0-rmsle:0.60575
[11]	validation_0-rmsle:0.60326
[12]	validation_0-rmsle:0.60123
[13]	validation_0-rmsle:0.59922
[14]	validation_0-rmsle:0.59714
[15]	validation_0-rmsle:0.59556
[16]	validation_0-rmsle:0.59434
[17]	validation_0-rmsle:0.59332
[18]	validation_0-rmsle:0.59224
[19]	validation_0-rmsle:0.59142
[20]	validation_0-rmsle:0.59057
[21]	validation_0-rmsle:0.58978
[22]	validation_0-rmsle:0.58914
[23]	validation_0-rmsle:0.58854
[24]	validation_0-rmsle:0.58775
[25]	validation_0-rmsle:0.58726
[26]	validation_0-rmsle:0.58644
[27]	validation_0-rmsle:0.58613
[28]	validation_0-rmsle:0.58581
[29]	validation_0-rmsle:0.58539
[30]	validation_0-rmsle:0.58526
[31]	validation_0-rmsle:0.58509
[32]	validation_0-rmsle:0.58500
[33]	validation_0-rmsle:0.58471
[34]	validation

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.drop(["price_doc"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_val.drop(["price_doc"], axis=1, inplace=True)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categori

[0]	validation_0-rmsle:0.67562
[1]	validation_0-rmsle:0.65878
[2]	validation_0-rmsle:0.64649
[3]	validation_0-rmsle:0.63707
[4]	validation_0-rmsle:0.62794
[5]	validation_0-rmsle:0.62210
[6]	validation_0-rmsle:0.61654
[7]	validation_0-rmsle:0.61051
[8]	validation_0-rmsle:0.60545
[9]	validation_0-rmsle:0.60121
[10]	validation_0-rmsle:0.59732
[11]	validation_0-rmsle:0.59420
[12]	validation_0-rmsle:0.59138
[13]	validation_0-rmsle:0.58905
[14]	validation_0-rmsle:0.58656
[15]	validation_0-rmsle:0.58478
[16]	validation_0-rmsle:0.58300
[17]	validation_0-rmsle:0.58188
[18]	validation_0-rmsle:0.58073
[19]	validation_0-rmsle:0.57967
[20]	validation_0-rmsle:0.57853
[21]	validation_0-rmsle:0.57751
[22]	validation_0-rmsle:0.57697
[23]	validation_0-rmsle:0.57610
[24]	validation_0-rmsle:0.57552
[25]	validation_0-rmsle:0.57496
[26]	validation_0-rmsle:0.57437
[27]	validation_0-rmsle:0.57408
[28]	validation_0-rmsle:0.57373
[29]	validation_0-rmsle:0.57344
[30]	validation_0-rmsle:0.57285
[31]	validation_0-

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.drop(["price_doc"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_val.drop(["price_doc"], axis=1, inplace=True)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categori

[3]	validation_0-rmsle:0.65603
[4]	validation_0-rmsle:0.64755
[5]	validation_0-rmsle:0.64223
[6]	validation_0-rmsle:0.63789
[7]	validation_0-rmsle:0.63235
[8]	validation_0-rmsle:0.62761
[9]	validation_0-rmsle:0.62395
[10]	validation_0-rmsle:0.62092
[11]	validation_0-rmsle:0.61810
[12]	validation_0-rmsle:0.61564
[13]	validation_0-rmsle:0.61339
[14]	validation_0-rmsle:0.61155
[15]	validation_0-rmsle:0.61013
[16]	validation_0-rmsle:0.60877
[17]	validation_0-rmsle:0.60777
[18]	validation_0-rmsle:0.60675
[19]	validation_0-rmsle:0.60565
[20]	validation_0-rmsle:0.60496
[21]	validation_0-rmsle:0.60364
[22]	validation_0-rmsle:0.60287
[23]	validation_0-rmsle:0.60211
[24]	validation_0-rmsle:0.60140
[25]	validation_0-rmsle:0.60087
[26]	validation_0-rmsle:0.60035
[27]	validation_0-rmsle:0.59978
[28]	validation_0-rmsle:0.59943
[29]	validation_0-rmsle:0.59931
[30]	validation_0-rmsle:0.59863
[31]	validation_0-rmsle:0.59811
[32]	validation_0-rmsle:0.59758
[33]	validation_0-rmsle:0.59726
[34]	validation

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.drop(["price_doc"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_val.drop(["price_doc"], axis=1, inplace=True)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categori

[0]	validation_0-rmsle:0.66571
[1]	validation_0-rmsle:0.64989
[2]	validation_0-rmsle:0.63765
[3]	validation_0-rmsle:0.62755
[4]	validation_0-rmsle:0.61851
[5]	validation_0-rmsle:0.61233
[6]	validation_0-rmsle:0.60684
[7]	validation_0-rmsle:0.60063
[8]	validation_0-rmsle:0.59554
[9]	validation_0-rmsle:0.59102
[10]	validation_0-rmsle:0.58735
[11]	validation_0-rmsle:0.58432
[12]	validation_0-rmsle:0.58200
[13]	validation_0-rmsle:0.57953
[14]	validation_0-rmsle:0.57748
[15]	validation_0-rmsle:0.57568
[16]	validation_0-rmsle:0.57388
[17]	validation_0-rmsle:0.57245
[18]	validation_0-rmsle:0.57096
[19]	validation_0-rmsle:0.57024
[20]	validation_0-rmsle:0.56923
[21]	validation_0-rmsle:0.56814
[22]	validation_0-rmsle:0.56710
[23]	validation_0-rmsle:0.56609
[24]	validation_0-rmsle:0.56540
[25]	validation_0-rmsle:0.56475
[26]	validation_0-rmsle:0.56413
[27]	validation_0-rmsle:0.56343
[28]	validation_0-rmsle:0.56288
[29]	validation_0-rmsle:0.56232
[30]	validation_0-rmsle:0.56195
[31]	validation_0-

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)


In [6]:
pred = model.predict(processed_inv_test_df[feats])
inv_prediction_df = pd.DataFrame({
    'id': processed_inv_test_df_copy['id'],
    'price_doc': pred
})

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)


# Owner Dataset

In [7]:
own_feats_df = pd.read_csv(owner_feat_path)
feats = own_feats_df['col_name'].values.tolist()[:num_own_top_features]
processed_df = pd.read_csv(owner_train_path)
processed_df = processed_df[feats+['price_doc']]

processed_own_test_df = pd.read_csv(owner_test_path)
processed_own_test_df_copy = processed_own_test_df.copy(deep=True)
processed_own_test_df.head()

Unnamed: 0,id,full_sq,life_sq,floor,max_floor,build_year,num_room,kitch_sq,state,area_m,...,big_road1_1line_yes,railroad_1line_no,railroad_1line_yes,material_1.0,material_2.0,material_3.0,material_4.0,material_5.0,material_6.0,material_nan
0,30475,79.2,49.632838,8,17,0.0,3,1.0,1.0,25536300.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1,30477,62.8,36.0,17,17,2016.0,2,62.8,3.0,21494090.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
2,30478,40.0,40.0,17,17,0.0,1,1.0,1.0,25536300.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,30479,48.43,49.632838,21,1,2015.0,1,1.0,1.0,9629358.0,...,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,30480,38.8,49.632838,15,17,1493.225347,1,1.0,1.0,11324090.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
model = find_best_model(processed_df, xgb_params_own)

[0]	validation_0-rmsle:0.43325
[1]	validation_0-rmsle:0.39480
[2]	validation_0-rmsle:0.36265
[3]	validation_0-rmsle:0.33451
[4]	validation_0-rmsle:0.31258
[5]	validation_0-rmsle:0.29104
[6]	validation_0-rmsle:0.27216
[7]	validation_0-rmsle:0.25563
[8]	validation_0-rmsle:0.24329
[9]	validation_0-rmsle:0.22949
[10]	validation_0-rmsle:0.21774
[11]	validation_0-rmsle:0.20770
[12]	validation_0-rmsle:0.19834
[13]	validation_0-rmsle:0.19021


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.drop(["price_doc"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_val.drop(["price_doc"], axis=1, inplace=True)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[14]	validation_0-rmsle:0.18298
[15]	validation_0-rmsle:0.17658
[16]	validation_0-rmsle:0.17114
[17]	validation_0-rmsle:0.16628
[18]	validation_0-rmsle:0.16135
[19]	validation_0-rmsle:0.15733
[20]	validation_0-rmsle:0.15442
[21]	validation_0-rmsle:0.15187
[22]	validation_0-rmsle:0.14938
[23]	validation_0-rmsle:0.14714
[24]	validation_0-rmsle:0.14527
[25]	validation_0-rmsle:0.14320
[26]	validation_0-rmsle:0.14167
[27]	validation_0-rmsle:0.13983
[28]	validation_0-rmsle:0.13880
[29]	validation_0-rmsle:0.13777
[30]	validation_0-rmsle:0.13684
[31]	validation_0-rmsle:0.13633
[32]	validation_0-rmsle:0.13524
[33]	validation_0-rmsle:0.13455
[34]	validation_0-rmsle:0.13383
[35]	validation_0-rmsle:0.13296
[36]	validation_0-rmsle:0.13257
[37]	validation_0-rmsle:0.13239
[38]	validation_0-rmsle:0.13208
[39]	validation_0-rmsle:0.13124
[40]	validation_0-rmsle:0.13086
[41]	validation_0-rmsle:0.13065
[42]	validation_0-rmsle:0.13008
[43]	validation_0-rmsle:0.12975
[44]	validation_0-rmsle:0.12912
[45]	val

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.drop(["price_doc"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_val.drop(["price_doc"], axis=1, inplace=True)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categori

[13]	validation_0-rmsle:0.19108
[14]	validation_0-rmsle:0.18452
[15]	validation_0-rmsle:0.17871
[16]	validation_0-rmsle:0.17414
[17]	validation_0-rmsle:0.16996
[18]	validation_0-rmsle:0.16606
[19]	validation_0-rmsle:0.16307
[20]	validation_0-rmsle:0.16052
[21]	validation_0-rmsle:0.15707
[22]	validation_0-rmsle:0.15485
[23]	validation_0-rmsle:0.15293
[24]	validation_0-rmsle:0.15106
[25]	validation_0-rmsle:0.14948
[26]	validation_0-rmsle:0.14800
[27]	validation_0-rmsle:0.14708
[28]	validation_0-rmsle:0.14620
[29]	validation_0-rmsle:0.14549
[30]	validation_0-rmsle:0.14447
[31]	validation_0-rmsle:0.14399
[32]	validation_0-rmsle:0.14338
[33]	validation_0-rmsle:0.14274
[34]	validation_0-rmsle:0.14198
[35]	validation_0-rmsle:0.14119
[36]	validation_0-rmsle:0.14040
[37]	validation_0-rmsle:0.13974
[38]	validation_0-rmsle:0.13951
[39]	validation_0-rmsle:0.13926
[40]	validation_0-rmsle:0.13839
[41]	validation_0-rmsle:0.13775
[42]	validation_0-rmsle:0.13711
[43]	validation_0-rmsle:0.13621
[44]	val

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.drop(["price_doc"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_val.drop(["price_doc"], axis=1, inplace=True)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categori

[13]	validation_0-rmsle:0.19771
[14]	validation_0-rmsle:0.19082
[15]	validation_0-rmsle:0.18475
[16]	validation_0-rmsle:0.17914
[17]	validation_0-rmsle:0.17407
[18]	validation_0-rmsle:0.16948
[19]	validation_0-rmsle:0.16560
[20]	validation_0-rmsle:0.16130
[21]	validation_0-rmsle:0.15798
[22]	validation_0-rmsle:0.15496
[23]	validation_0-rmsle:0.15224
[24]	validation_0-rmsle:0.15014
[25]	validation_0-rmsle:0.14774
[26]	validation_0-rmsle:0.14594
[27]	validation_0-rmsle:0.14409
[28]	validation_0-rmsle:0.14264
[29]	validation_0-rmsle:0.14105
[30]	validation_0-rmsle:0.13934
[31]	validation_0-rmsle:0.13839
[32]	validation_0-rmsle:0.13734
[33]	validation_0-rmsle:0.13643
[34]	validation_0-rmsle:0.13551
[35]	validation_0-rmsle:0.13485
[36]	validation_0-rmsle:0.13439
[37]	validation_0-rmsle:0.13376
[38]	validation_0-rmsle:0.13320
[39]	validation_0-rmsle:0.13239
[40]	validation_0-rmsle:0.13187
[41]	validation_0-rmsle:0.13111
[42]	validation_0-rmsle:0.13071
[43]	validation_0-rmsle:0.13026
[44]	val

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.drop(["price_doc"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_val.drop(["price_doc"], axis=1, inplace=True)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categori

[12]	validation_0-rmsle:0.21301
[13]	validation_0-rmsle:0.20592
[14]	validation_0-rmsle:0.19988
[15]	validation_0-rmsle:0.19431
[16]	validation_0-rmsle:0.18973
[17]	validation_0-rmsle:0.18580
[18]	validation_0-rmsle:0.18221
[19]	validation_0-rmsle:0.17859
[20]	validation_0-rmsle:0.17566
[21]	validation_0-rmsle:0.17284
[22]	validation_0-rmsle:0.16962
[23]	validation_0-rmsle:0.16781
[24]	validation_0-rmsle:0.16602
[25]	validation_0-rmsle:0.16446
[26]	validation_0-rmsle:0.16275
[27]	validation_0-rmsle:0.16154
[28]	validation_0-rmsle:0.16043
[29]	validation_0-rmsle:0.15969
[30]	validation_0-rmsle:0.15873
[31]	validation_0-rmsle:0.15790
[32]	validation_0-rmsle:0.15700
[33]	validation_0-rmsle:0.15632
[34]	validation_0-rmsle:0.15567
[35]	validation_0-rmsle:0.15487
[36]	validation_0-rmsle:0.15402
[37]	validation_0-rmsle:0.15352
[38]	validation_0-rmsle:0.15304
[39]	validation_0-rmsle:0.15272
[40]	validation_0-rmsle:0.15240
[41]	validation_0-rmsle:0.15210
[42]	validation_0-rmsle:0.15150
[43]	val

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.drop(["price_doc"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_val.drop(["price_doc"], axis=1, inplace=True)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categori

[12]	validation_0-rmsle:0.20881
[13]	validation_0-rmsle:0.20137
[14]	validation_0-rmsle:0.19424
[15]	validation_0-rmsle:0.18867
[16]	validation_0-rmsle:0.18371
[17]	validation_0-rmsle:0.17862
[18]	validation_0-rmsle:0.17458
[19]	validation_0-rmsle:0.17038
[20]	validation_0-rmsle:0.16654
[21]	validation_0-rmsle:0.16357
[22]	validation_0-rmsle:0.16060
[23]	validation_0-rmsle:0.15773
[24]	validation_0-rmsle:0.15588
[25]	validation_0-rmsle:0.15441
[26]	validation_0-rmsle:0.15275
[27]	validation_0-rmsle:0.15155
[28]	validation_0-rmsle:0.14971
[29]	validation_0-rmsle:0.14861
[30]	validation_0-rmsle:0.14824
[31]	validation_0-rmsle:0.14756
[32]	validation_0-rmsle:0.14705
[33]	validation_0-rmsle:0.14634
[34]	validation_0-rmsle:0.14583
[35]	validation_0-rmsle:0.14536
[36]	validation_0-rmsle:0.14512
[37]	validation_0-rmsle:0.14440
[38]	validation_0-rmsle:0.14394
[39]	validation_0-rmsle:0.14365
[40]	validation_0-rmsle:0.14310
[41]	validation_0-rmsle:0.14279
[42]	validation_0-rmsle:0.14277
[43]	val

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.drop(["price_doc"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_val.drop(["price_doc"], axis=1, inplace=True)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categori

[11]	validation_0-rmsle:0.21414
[12]	validation_0-rmsle:0.20454
[13]	validation_0-rmsle:0.19629
[14]	validation_0-rmsle:0.18906
[15]	validation_0-rmsle:0.18211
[16]	validation_0-rmsle:0.17683
[17]	validation_0-rmsle:0.17137
[18]	validation_0-rmsle:0.16588
[19]	validation_0-rmsle:0.16174
[20]	validation_0-rmsle:0.15809
[21]	validation_0-rmsle:0.15459
[22]	validation_0-rmsle:0.15174
[23]	validation_0-rmsle:0.14967
[24]	validation_0-rmsle:0.14739
[25]	validation_0-rmsle:0.14506
[26]	validation_0-rmsle:0.14308
[27]	validation_0-rmsle:0.14123
[28]	validation_0-rmsle:0.13968
[29]	validation_0-rmsle:0.13839
[30]	validation_0-rmsle:0.13713
[31]	validation_0-rmsle:0.13646
[32]	validation_0-rmsle:0.13552
[33]	validation_0-rmsle:0.13431
[34]	validation_0-rmsle:0.13318
[35]	validation_0-rmsle:0.13244
[36]	validation_0-rmsle:0.13181
[37]	validation_0-rmsle:0.13140
[38]	validation_0-rmsle:0.13106
[39]	validation_0-rmsle:0.13039
[40]	validation_0-rmsle:0.12992
[41]	validation_0-rmsle:0.12964
[42]	val

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.drop(["price_doc"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_val.drop(["price_doc"], axis=1, inplace=True)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categori

[13]	validation_0-rmsle:0.20195
[14]	validation_0-rmsle:0.19503
[15]	validation_0-rmsle:0.18933
[16]	validation_0-rmsle:0.18418
[17]	validation_0-rmsle:0.17979
[18]	validation_0-rmsle:0.17596
[19]	validation_0-rmsle:0.17248
[20]	validation_0-rmsle:0.16904
[21]	validation_0-rmsle:0.16527
[22]	validation_0-rmsle:0.16234
[23]	validation_0-rmsle:0.16059
[24]	validation_0-rmsle:0.15895
[25]	validation_0-rmsle:0.15711
[26]	validation_0-rmsle:0.15524
[27]	validation_0-rmsle:0.15444
[28]	validation_0-rmsle:0.15353
[29]	validation_0-rmsle:0.15246
[30]	validation_0-rmsle:0.15161
[31]	validation_0-rmsle:0.15063
[32]	validation_0-rmsle:0.14982
[33]	validation_0-rmsle:0.14926
[34]	validation_0-rmsle:0.14838
[35]	validation_0-rmsle:0.14795
[36]	validation_0-rmsle:0.14740
[37]	validation_0-rmsle:0.14720
[38]	validation_0-rmsle:0.14652
[39]	validation_0-rmsle:0.14598
[40]	validation_0-rmsle:0.14507
[41]	validation_0-rmsle:0.14457
[42]	validation_0-rmsle:0.14453
[43]	validation_0-rmsle:0.14435
[44]	val

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.drop(["price_doc"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_val.drop(["price_doc"], axis=1, inplace=True)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categori

[10]	validation_0-rmsle:0.21939
[11]	validation_0-rmsle:0.20840
[12]	validation_0-rmsle:0.19851
[13]	validation_0-rmsle:0.19010
[14]	validation_0-rmsle:0.18190
[15]	validation_0-rmsle:0.17513
[16]	validation_0-rmsle:0.16857
[17]	validation_0-rmsle:0.16312
[18]	validation_0-rmsle:0.15808
[19]	validation_0-rmsle:0.15395
[20]	validation_0-rmsle:0.14974
[21]	validation_0-rmsle:0.14686
[22]	validation_0-rmsle:0.14382
[23]	validation_0-rmsle:0.14106
[24]	validation_0-rmsle:0.13847
[25]	validation_0-rmsle:0.13606
[26]	validation_0-rmsle:0.13455
[27]	validation_0-rmsle:0.13297
[28]	validation_0-rmsle:0.13157
[29]	validation_0-rmsle:0.12978
[30]	validation_0-rmsle:0.12845
[31]	validation_0-rmsle:0.12765
[32]	validation_0-rmsle:0.12644
[33]	validation_0-rmsle:0.12564
[34]	validation_0-rmsle:0.12431
[35]	validation_0-rmsle:0.12333
[36]	validation_0-rmsle:0.12301
[37]	validation_0-rmsle:0.12239
[38]	validation_0-rmsle:0.12169
[39]	validation_0-rmsle:0.12117
[40]	validation_0-rmsle:0.12055
[41]	val

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.drop(["price_doc"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_val.drop(["price_doc"], axis=1, inplace=True)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categori

[10]	validation_0-rmsle:0.25379
[11]	validation_0-rmsle:0.24492
[12]	validation_0-rmsle:0.23743
[13]	validation_0-rmsle:0.23067
[14]	validation_0-rmsle:0.22406
[15]	validation_0-rmsle:0.21926
[16]	validation_0-rmsle:0.21454
[17]	validation_0-rmsle:0.21058
[18]	validation_0-rmsle:0.20732
[19]	validation_0-rmsle:0.20439
[20]	validation_0-rmsle:0.20135
[21]	validation_0-rmsle:0.19894
[22]	validation_0-rmsle:0.19714
[23]	validation_0-rmsle:0.19543
[24]	validation_0-rmsle:0.19407
[25]	validation_0-rmsle:0.19275
[26]	validation_0-rmsle:0.19140
[27]	validation_0-rmsle:0.19015
[28]	validation_0-rmsle:0.18877
[29]	validation_0-rmsle:0.18803
[30]	validation_0-rmsle:0.18719
[31]	validation_0-rmsle:0.18644
[32]	validation_0-rmsle:0.18594
[33]	validation_0-rmsle:0.18530
[34]	validation_0-rmsle:0.18477
[35]	validation_0-rmsle:0.18439
[36]	validation_0-rmsle:0.18390
[37]	validation_0-rmsle:0.18366
[38]	validation_0-rmsle:0.18316
[39]	validation_0-rmsle:0.18266
[40]	validation_0-rmsle:0.18212
[41]	val

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.drop(["price_doc"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_val.drop(["price_doc"], axis=1, inplace=True)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categori

[13]	validation_0-rmsle:0.20220
[14]	validation_0-rmsle:0.19427
[15]	validation_0-rmsle:0.18804
[16]	validation_0-rmsle:0.18226
[17]	validation_0-rmsle:0.17742
[18]	validation_0-rmsle:0.17330
[19]	validation_0-rmsle:0.16910
[20]	validation_0-rmsle:0.16611
[21]	validation_0-rmsle:0.16323
[22]	validation_0-rmsle:0.16048
[23]	validation_0-rmsle:0.15779
[24]	validation_0-rmsle:0.15539
[25]	validation_0-rmsle:0.15314
[26]	validation_0-rmsle:0.15056
[27]	validation_0-rmsle:0.14935
[28]	validation_0-rmsle:0.14791
[29]	validation_0-rmsle:0.14644
[30]	validation_0-rmsle:0.14542
[31]	validation_0-rmsle:0.14402
[32]	validation_0-rmsle:0.14262
[33]	validation_0-rmsle:0.14175
[34]	validation_0-rmsle:0.14084
[35]	validation_0-rmsle:0.13982
[36]	validation_0-rmsle:0.13922
[37]	validation_0-rmsle:0.13881
[38]	validation_0-rmsle:0.13807
[39]	validation_0-rmsle:0.13759
[40]	validation_0-rmsle:0.13735
[41]	validation_0-rmsle:0.13709
[42]	validation_0-rmsle:0.13646
[43]	validation_0-rmsle:0.13625
[44]	val

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)


In [9]:
pred = model.predict(processed_own_test_df[feats])
own_prediction_df = pd.DataFrame({
    'id': processed_own_test_df_copy['id'],
    'price_doc': pred
})


  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)


## Combine both predictions and sort by id

In [10]:
overall_preds = pd.concat([inv_prediction_df, own_prediction_df])

overall_preds.sort_values(by=['id'], inplace=True)

overall_preds.head()

Unnamed: 0,id,price_doc
0,30474,5147662.5
0,30475,7602516.5
1,30476,5196510.5
1,30477,5875431.5
2,30478,4648424.5


In [11]:
overall_preds.to_csv(output_path, index=False)