In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import math

In [2]:
df = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [3]:
y = df.iloc[:, -1].values

In [4]:
df.isnull().sum()

id                  0
brand               0
model               0
model_year          0
milage              0
fuel_type        5083
engine              0
transmission        0
ext_col             0
int_col             0
accident         2452
clean_title     21419
price               0
dtype: int64

In [5]:
test.isnull().sum()

id                  0
brand               0
model               0
model_year          0
milage              0
fuel_type        3383
engine              0
transmission        0
ext_col             0
int_col             0
accident         1632
clean_title     14239
dtype: int64

In [6]:
df = df.drop(['price'], axis=1)

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 188533 entries, 0 to 188532
Data columns (total 12 columns):
 #   Column        Non-Null Count   Dtype 
---  ------        --------------   ----- 
 0   id            188533 non-null  int64 
 1   brand         188533 non-null  object
 2   model         188533 non-null  object
 3   model_year    188533 non-null  int64 
 4   milage        188533 non-null  int64 
 5   fuel_type     183450 non-null  object
 6   engine        188533 non-null  object
 7   transmission  188533 non-null  object
 8   ext_col       188533 non-null  object
 9   int_col       188533 non-null  object
 10  accident      186081 non-null  object
 11  clean_title   167114 non-null  object
dtypes: int64(3), object(9)
memory usage: 17.3+ MB


In [8]:
df = pd.concat([df, test], axis=0)

In [9]:
df.shape

(314223, 12)

In [10]:
df.head(2)

Unnamed: 0,id,brand,model,model_year,milage,fuel_type,engine,transmission,ext_col,int_col,accident,clean_title
0,0,MINI,Cooper S Base,2007,213000,Gasoline,172.0HP 1.6L 4 Cylinder Engine Gasoline Fuel,A/T,Yellow,Gray,None reported,Yes
1,1,Lincoln,LS V8,2002,143250,Gasoline,252.0HP 3.9L 8 Cylinder Engine Gasoline Fuel,A/T,Silver,Beige,At least 1 accident or damage reported,Yes


# Data Preprocessing

## 1. Id

In [11]:
df1 = df.drop(['id'], axis = 1)

## 2. Brand & Model

In [12]:
len(df1['brand'].value_counts())

57

In [13]:
luxury_brands = ["Bentley", "Maserati", "Lamborghini", "Rolls-Royce",
                 "Ferrari", "McLaren", "Aston", "Maybach"]

top_10_brands = ["Tesla", "Toyota", "BYD", "Ferrari", "Mercedes-Benz",
                 "Porsche", "BMW", "Ford", "Volkswagen", "Honda", 'Xiaomi']

df1['Is_Luxury_Brand'] = df1['brand'].isin(luxury_brands).astype(int)
df1['top_10_car_brand'] = df1['brand'].isin(top_10_brands).astype(int)

In [14]:
df1['Brand_Popularity'] = df1.groupby('brand')['brand'].transform('count')
df1['Model_Popularity'] = df1.groupby('model')['model'].transform('count')

In [15]:
df2 = df1.drop(['model', 'brand'], axis=1)

## 3. Model_Year 

In [16]:
df2['car_age'] = 2024 - df2['model_year']
df3 = df2

## 4. Milage 

In [17]:
df3['mileage_age_ratio'] = df3['milage'] / (df3['car_age'] + 1)

In [18]:
df3['milage_age_ratio'] = df3['milage'] / (df3['car_age'] + 1)
# df3['mean_milage_with_age'] = df3.groupby(['car_age'])['milage'].transform('mean')

In [19]:
def segment_mileage(df):
    def popka(milage):
        if milage < 70000:
            return 'Low'
        elif milage < 150000:
            return 'Medium'
        elif milage < 190000:
            return 'High'
        else: return 'pizda'
    df['popa'] = df['milage'].apply(popka)
    return df

df3 = segment_mileage(df3)

In [20]:
df3 = pd.get_dummies(df3, columns=['popa'], drop_first=True)

## 5. Fuel_type

In [21]:
df3['fuel_type'].isnull().sum()

8466

In [22]:
df3['fuel_type'] = df3['fuel_type'].fillna('Gasoline')

In [23]:
df3['fuel_type'].value_counts()

Gasoline          284939
Hybrid             11508
E85 Flex Fuel       8929
Diesel              6641
–                   1319
Plug-In Hybrid       858
not supported         29
Name: fuel_type, dtype: int64

In [24]:
df3['fuel_type'] = df3['fuel_type'].replace('–', 'Gasoline')

In [25]:
df4 = pd.get_dummies(df3, columns=['fuel_type'])

In [26]:
df4 = df4.drop(['fuel_type_not supported'], axis=1)

## 6. Engine 

In [27]:
df4['engine_size']=df4['engine'].str.extract('(\d\.\d+L)')
df4['engine_size']=df4['engine_size'].str.extract('(\d\.\d+)').astype(float)

In [28]:
def extract_horsepower(engine):
        try:
            return float(engine.split('HP')[0])
        except:
            return None
df4['hp'] = df4['engine'].apply(extract_horsepower)

In [29]:
df4['engine_size']=df4['engine_size'].fillna(0)
df4['hp']=df4['hp'].fillna(df4['hp'].median())
df4['power_to_weight_ratio'] = df4['hp'] / (df4['engine_size']+1)

In [31]:
df5 = df4.drop(['engine'], axis=1)

## 7. Transmission  & Int_col & Ext_col

In [32]:
pattern_v = ["I4", "V6", "V8", "V10", "V12"]

pattern = [
    "A/T",
    "w/Dual",
    "M/T",
    "Automatic",
    "CVT",
    "Auto-Shift",
    "Manual",
    "Variable",
    "F",
    "Overdrive",
    "Electronically",
    "At/Mt",
    "DCT",
    "CVT-F",
    "AT",
]

In [33]:
pattern_ex = r"(?:^|\s+)(" + "|".join(pattern) + r")(?:\s+|$)"
pattern_vex = r"(?:^|\s+)(" + "|".join(pattern_v) + r")(?:\s+|$)"
df5.replace("–", np.nan, inplace=True)
df5["transm_"] = df5["transmission"].str.extract(pattern_ex, expand=False)
df5["transm_"] = (
    df5["transm_"]
    .replace("A/T", "AT", regex=True)
    .replace(
        {"A/T": "Automatic", "AT": "Automatic", "MT": "Manual", "M/T": "Manual"}
    )
)

In [34]:
df5 = df5.drop(['transmission'], axis = 1)

In [35]:
df5 = pd.get_dummies(df5, columns=['transm_'], drop_first=True)

## 8. Int_col & Ext_col

In [36]:
from sklearn.preprocessing import OrdinalEncoder

In [37]:
ordEncoder=OrdinalEncoder(handle_unknown="use_encoded_value",unknown_value=-10)
ordinal_features=['ext_col','int_col']
df5[ordinal_features]=ordEncoder.fit_transform(df5[ordinal_features])

In [39]:
df5.head(2)

Unnamed: 0,model_year,milage,ext_col,int_col,accident,clean_title,Is_Luxury_Brand,top_10_car_brand,Brand_Popularity,Model_Popularity,...,transm__Automatic,transm__CVT,transm__CVT-F,transm__DCT,transm__Electronically,transm__F,transm__Manual,transm__Overdrive,transm__Variable,transm__w/Dual
0,2007,213000,312.0,71.0,None reported,Yes,0,0,1761,961,...,1,0,0,0,0,0,0,0,0,0
1,2002,143250,263.0,10.0,At least 1 accident or damage reported,Yes,0,0,4016,45,...,1,0,0,0,0,0,0,0,0,0


## 9. Accident

In [40]:
df5['accident'] = np.where(df5['accident'] == "At least 1 accident or damage reported",1,0)

In [41]:
df5.head(2)

Unnamed: 0,model_year,milage,ext_col,int_col,accident,clean_title,Is_Luxury_Brand,top_10_car_brand,Brand_Popularity,Model_Popularity,...,transm__Automatic,transm__CVT,transm__CVT-F,transm__DCT,transm__Electronically,transm__F,transm__Manual,transm__Overdrive,transm__Variable,transm__w/Dual
0,2007,213000,312.0,71.0,0,Yes,0,0,1761,961,...,1,0,0,0,0,0,0,0,0,0
1,2002,143250,263.0,10.0,1,Yes,0,0,4016,45,...,1,0,0,0,0,0,0,0,0,0


In [42]:
df5.shape

(314223, 34)

## 10. Clean Title

In [43]:
df5['clean_title'].value_counts()

Yes    278565
Name: clean_title, dtype: int64

In [44]:
df6 = pd.get_dummies(df5, columns=['clean_title'])

In [45]:
df6.head(2)

Unnamed: 0,model_year,milage,ext_col,int_col,accident,Is_Luxury_Brand,top_10_car_brand,Brand_Popularity,Model_Popularity,car_age,...,transm__CVT,transm__CVT-F,transm__DCT,transm__Electronically,transm__F,transm__Manual,transm__Overdrive,transm__Variable,transm__w/Dual,clean_title_Yes
0,2007,213000,312.0,71.0,0,0,0,1761,961,17,...,0,0,0,0,0,0,0,0,0,1
1,2002,143250,263.0,10.0,1,0,0,4016,45,22,...,0,0,0,0,0,0,0,0,0,1


In [46]:
final_df = df6.loc[ : , ~df6.columns.duplicated()]
final_df.shape

(314223, 34)

In [47]:
final_df.head(2)

Unnamed: 0,model_year,milage,ext_col,int_col,accident,Is_Luxury_Brand,top_10_car_brand,Brand_Popularity,Model_Popularity,car_age,...,transm__CVT,transm__CVT-F,transm__DCT,transm__Electronically,transm__F,transm__Manual,transm__Overdrive,transm__Variable,transm__w/Dual,clean_title_Yes
0,2007,213000,312.0,71.0,0,0,0,1761,961,17,...,0,0,0,0,0,0,0,0,0,1
1,2002,143250,263.0,10.0,1,0,0,4016,45,22,...,0,0,0,0,0,0,0,0,0,1


In [49]:
X = final_df.iloc[:188533, :].values
final_test = final_df.iloc[188533:, :].values

In [50]:
X.shape

(188533, 34)

In [51]:
final_test.shape

(125690, 34)

In [54]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=25)

In [55]:
from sklearn.metrics import mean_squared_error
def rms(y_actual,y_predicted):
    return math.sqrt(mean_squared_error(y_actual, y_predicted))

# Applying Algorithms

## 1. XGBoost Regressor

In [78]:
from xgboost import XGBRegressor

In [97]:
params={
     'lambda': 0.03855829881805118,
 'alpha': 0.20425553397350257,
 'colsample_bytree': 0.9,
 'subsample': 0.8,
 'learning_rate': 0.009,
 'n_estimators': 3000,
 'max_depth': 4,
 'min_child_weight': 104
}

In [79]:
params = {
    'n_estimators': 474,
    'max_depth': 4,
    'learning_rate': 0.025169007425202783,
    'subsample': 0.940305834703151,
    'colsample_bytree': 0.5147768892744543,
    'gamma': 2.55949911605052,
    'reg_alpha': 8.373743060765753,
    'reg_lambda': 9.693295698962173
}

In [98]:
xgr = XGBRegressor(**params)
xgr.fit(X_train,y_train)

In [None]:
X_train.shape, X_test.shape

In [99]:
y_pred_x = xgr.predict(X_test)
print('Testing accuracy {:.4f}'.format(rms(y_test,y_pred_x)))

Testing accuracy 77220.4880


In [114]:
y_pred_xg = xgr.predict(final_test)

In [83]:
y_pred_xg.shape

(125690,)

In [None]:
pred = pd.DataFrame(y_pred_xg)
sub_df = pd.read_csv('sample_submission.csv')
sub_df = sub_df.drop(['price'], axis = 1)
datasets = pd.concat([sub_df['id'], pred], axis = 1)
datasets.columns = ['id', 'price']
datasets.to_csv('submission.csv', index = False)

## 2. LightGBM

In [56]:
from lightgbm import LGBMRegressor

In [95]:
params = {
    'n_estimators': 1247,
    'num_leaves': 967,
    'min_data_in_leaf': 211,
    'max_depth': 5,
    'learning_rate': 0.015721380283463496,
    'min_child_samples': 60,
    'min_split_gain': 0.33256130965761277,
    'subsample': 0.7978855361713282,
    'colsample_bytree': 0.6388509819265756,
    'reg_alpha': 9.990553424898337,
    'reg_lambda': 2.7059460269648667,
}

In [96]:
lgb_model = LGBMRegressor(**params, importance_type='gain', verbose=-1)
lgb_model.fit(X_train, y_train, eval_metric='rmse')

In [94]:
y_pred_lg = lgb_model.predict(X_test)
print('Testing accuracy {:.4f}'.format(rms(y_test,y_pred_lg)))

Testing accuracy 77559.8618


In [74]:
y_pred_lgb = lgb_model.predict(final_test)

In [75]:
y_pred = y_pred_lgb

In [76]:
y_pred_lgb.shape

(125690,)

In [77]:
pred = pd.DataFrame(y_pred_lgb)
sub_df = pd.read_csv('sample_submission.csv')
sub_df = sub_df.drop(['price'], axis = 1)
datasets = pd.concat([sub_df['id'], pred], axis = 1)
datasets.columns = ['id', 'price']
datasets.to_csv('submission.csv', index = False)

## 3. Catboost

In [60]:
from catboost import CatBoostRegressor

In [None]:
params = {
    'random_seed': 42,
    'early_stopping_rounds': 200,
    'learning_rate': 0.032089785965271685, 
    'model_size_reg': 1.1498478100664318, 
    'colsample_bylevel': 0.7398749059852404, 
    'reg_lambda': 13.481452874196997, 
    'n_estimators': 986, 
    'max_depth': 10, 
    'subsample': 0.5977235262240771,
    
#     'iterations': 1500, 
#     'learning_rate': 0.010011982648647914, 
#     'depth': 10, 
#     'l2_leaf_reg': 9.557443197213443, 
#     'bootstrap_type': 'MVS'
}

In [89]:
params = {
    'iterations': 2576,
    'depth': 10,
    'learning_rate': 0.010061752514516954,
    'l2_leaf_reg': 87.92301809859886,
    'bagging_temperature': 0.39914513957371534,
    'random_strength': 6.994017602268948,
    'min_data_in_leaf': 155,
    'border_count': 15
}

In [90]:
cat = CatBoostRegressor(**params, loss_function='RMSE')
cat.fit(X_train, y_train)

0:	learn: 77715.6343011	total: 21.7ms	remaining: 55.9s
1:	learn: 77629.3219961	total: 42.6ms	remaining: 54.8s
2:	learn: 77555.6330188	total: 63.5ms	remaining: 54.4s
3:	learn: 77472.0470770	total: 85.9ms	remaining: 55.3s
4:	learn: 77403.2753583	total: 107ms	remaining: 54.9s
5:	learn: 77321.1652303	total: 128ms	remaining: 54.8s
6:	learn: 77242.7640685	total: 149ms	remaining: 54.7s
7:	learn: 77166.9732227	total: 170ms	remaining: 54.5s
8:	learn: 77097.1279184	total: 190ms	remaining: 54.2s
9:	learn: 77025.3637346	total: 211ms	remaining: 54.1s
10:	learn: 76954.4575210	total: 234ms	remaining: 54.4s
11:	learn: 76881.3232472	total: 255ms	remaining: 54.6s
12:	learn: 76806.2633010	total: 276ms	remaining: 54.5s
13:	learn: 76737.1199084	total: 297ms	remaining: 54.4s
14:	learn: 76667.4167859	total: 319ms	remaining: 54.5s
15:	learn: 76608.8593793	total: 332ms	remaining: 53.1s
16:	learn: 76542.4362343	total: 352ms	remaining: 53s
17:	learn: 76478.6494088	total: 373ms	remaining: 53s
18:	learn: 76412.363

159:	learn: 72617.0804493	total: 3.31s	remaining: 50.1s
160:	learn: 72609.8477592	total: 3.34s	remaining: 50.1s
161:	learn: 72599.8007760	total: 3.36s	remaining: 50s
162:	learn: 72589.0964911	total: 3.38s	remaining: 50s
163:	learn: 72581.2384440	total: 3.4s	remaining: 50s
164:	learn: 72571.8278202	total: 3.42s	remaining: 50s
165:	learn: 72566.7015696	total: 3.44s	remaining: 49.9s
166:	learn: 72557.6822994	total: 3.46s	remaining: 49.9s
167:	learn: 72548.4233802	total: 3.48s	remaining: 49.9s
168:	learn: 72539.5223863	total: 3.5s	remaining: 49.8s
169:	learn: 72532.3886909	total: 3.52s	remaining: 49.8s
170:	learn: 72524.9408217	total: 3.54s	remaining: 49.8s
171:	learn: 72514.5898982	total: 3.56s	remaining: 49.8s
172:	learn: 72505.1856650	total: 3.58s	remaining: 49.8s
173:	learn: 72499.2714033	total: 3.6s	remaining: 49.7s
174:	learn: 72489.8993734	total: 3.62s	remaining: 49.6s
175:	learn: 72482.0445433	total: 3.64s	remaining: 49.6s
176:	learn: 72476.0371008	total: 3.66s	remaining: 49.6s
177

309:	learn: 71866.3960986	total: 6.36s	remaining: 46.5s
310:	learn: 71863.5935895	total: 6.38s	remaining: 46.5s
311:	learn: 71859.1823571	total: 6.4s	remaining: 46.5s
312:	learn: 71855.4834862	total: 6.42s	remaining: 46.4s
313:	learn: 71853.9652746	total: 6.44s	remaining: 46.4s
314:	learn: 71850.9988045	total: 6.46s	remaining: 46.4s
315:	learn: 71848.9082550	total: 6.48s	remaining: 46.3s
316:	learn: 71845.1571019	total: 6.5s	remaining: 46.3s
317:	learn: 71842.1808789	total: 6.52s	remaining: 46.3s
318:	learn: 71840.2533142	total: 6.54s	remaining: 46.3s
319:	learn: 71837.6443892	total: 6.56s	remaining: 46.3s
320:	learn: 71836.5733829	total: 6.58s	remaining: 46.2s
321:	learn: 71833.8693086	total: 6.6s	remaining: 46.2s
322:	learn: 71831.3744709	total: 6.62s	remaining: 46.2s
323:	learn: 71828.5502562	total: 6.64s	remaining: 46.2s
324:	learn: 71825.6886602	total: 6.66s	remaining: 46.1s
325:	learn: 71823.4168542	total: 6.68s	remaining: 46.1s
326:	learn: 71821.7351505	total: 6.7s	remaining: 46

459:	learn: 71544.7628614	total: 9.43s	remaining: 43.4s
460:	learn: 71542.9744181	total: 9.46s	remaining: 43.4s
461:	learn: 71540.9540453	total: 9.48s	remaining: 43.4s
462:	learn: 71538.2007951	total: 9.5s	remaining: 43.3s
463:	learn: 71536.9526156	total: 9.52s	remaining: 43.3s
464:	learn: 71534.1433446	total: 9.54s	remaining: 43.3s
465:	learn: 71532.1308986	total: 9.56s	remaining: 43.3s
466:	learn: 71531.0310663	total: 9.58s	remaining: 43.3s
467:	learn: 71529.8496511	total: 9.6s	remaining: 43.2s
468:	learn: 71528.5199926	total: 9.62s	remaining: 43.2s
469:	learn: 71525.8060467	total: 9.64s	remaining: 43.2s
470:	learn: 71524.6007340	total: 9.66s	remaining: 43.2s
471:	learn: 71523.5570752	total: 9.69s	remaining: 43.2s
472:	learn: 71521.0289151	total: 9.71s	remaining: 43.2s
473:	learn: 71519.8336377	total: 9.73s	remaining: 43.1s
474:	learn: 71518.6230635	total: 9.75s	remaining: 43.1s
475:	learn: 71517.2648281	total: 9.77s	remaining: 43.1s
476:	learn: 71515.4596367	total: 9.79s	remaining: 

607:	learn: 71340.2493753	total: 12.5s	remaining: 40.5s
608:	learn: 71339.0314374	total: 12.5s	remaining: 40.4s
609:	learn: 71338.0634382	total: 12.5s	remaining: 40.4s
610:	learn: 71336.9914260	total: 12.6s	remaining: 40.4s
611:	learn: 71334.9847517	total: 12.6s	remaining: 40.4s
612:	learn: 71332.8582554	total: 12.6s	remaining: 40.4s
613:	learn: 71331.1359224	total: 12.6s	remaining: 40.3s
614:	learn: 71330.1330276	total: 12.6s	remaining: 40.3s
615:	learn: 71328.6469554	total: 12.7s	remaining: 40.3s
616:	learn: 71327.6175414	total: 12.7s	remaining: 40.3s
617:	learn: 71326.3290089	total: 12.7s	remaining: 40.3s
618:	learn: 71324.8191454	total: 12.7s	remaining: 40.3s
619:	learn: 71322.9540531	total: 12.8s	remaining: 40.2s
620:	learn: 71321.8165814	total: 12.8s	remaining: 40.2s
621:	learn: 71321.6484450	total: 12.8s	remaining: 40.2s
622:	learn: 71321.0808695	total: 12.8s	remaining: 40.2s
623:	learn: 71320.2808477	total: 12.8s	remaining: 40.2s
624:	learn: 71318.6947692	total: 12.9s	remaining

761:	learn: 71168.2701350	total: 15.8s	remaining: 37.5s
762:	learn: 71167.0729431	total: 15.8s	remaining: 37.5s
763:	learn: 71166.3024739	total: 15.8s	remaining: 37.5s
764:	learn: 71166.2251574	total: 15.8s	remaining: 37.4s
765:	learn: 71165.9066831	total: 15.8s	remaining: 37.4s
766:	learn: 71164.5965867	total: 15.9s	remaining: 37.4s
767:	learn: 71164.1796881	total: 15.9s	remaining: 37.4s
768:	learn: 71163.2873752	total: 15.9s	remaining: 37.4s
769:	learn: 71162.8064010	total: 15.9s	remaining: 37.3s
770:	learn: 71161.5603972	total: 15.9s	remaining: 37.3s
771:	learn: 71161.5221652	total: 15.9s	remaining: 37.3s
772:	learn: 71160.6196520	total: 16s	remaining: 37.2s
773:	learn: 71159.8119696	total: 16s	remaining: 37.2s
774:	learn: 71159.8052254	total: 16s	remaining: 37.2s
775:	learn: 71159.1453892	total: 16s	remaining: 37.2s
776:	learn: 71157.6779170	total: 16s	remaining: 37.2s
777:	learn: 71157.2131950	total: 16.1s	remaining: 37.1s
778:	learn: 71156.3929057	total: 16.1s	remaining: 37.1s
77

911:	learn: 71031.1279398	total: 18.9s	remaining: 34.4s
912:	learn: 71030.3277323	total: 18.9s	remaining: 34.4s
913:	learn: 71029.7070002	total: 18.9s	remaining: 34.4s
914:	learn: 71029.1903493	total: 18.9s	remaining: 34.3s
915:	learn: 71028.7864151	total: 18.9s	remaining: 34.3s
916:	learn: 71028.7433679	total: 19s	remaining: 34.3s
917:	learn: 71027.5769226	total: 19s	remaining: 34.3s
918:	learn: 71026.9086565	total: 19s	remaining: 34.3s
919:	learn: 71024.6324639	total: 19s	remaining: 34.2s
920:	learn: 71023.5348654	total: 19s	remaining: 34.2s
921:	learn: 71022.6865006	total: 19.1s	remaining: 34.2s
922:	learn: 71021.9745463	total: 19.1s	remaining: 34.2s
923:	learn: 71021.2531869	total: 19.1s	remaining: 34.2s
924:	learn: 71019.7555182	total: 19.1s	remaining: 34.1s
925:	learn: 71019.0763397	total: 19.2s	remaining: 34.1s
926:	learn: 71018.8260052	total: 19.2s	remaining: 34.1s
927:	learn: 71018.0701526	total: 19.2s	remaining: 34.1s
928:	learn: 71017.3125708	total: 19.2s	remaining: 34.1s
92

1061:	learn: 70899.5105970	total: 22.1s	remaining: 31.5s
1062:	learn: 70899.4763185	total: 22.1s	remaining: 31.5s
1063:	learn: 70898.4044442	total: 22.1s	remaining: 31.5s
1064:	learn: 70897.1074350	total: 22.2s	remaining: 31.4s
1065:	learn: 70895.4326523	total: 22.2s	remaining: 31.4s
1066:	learn: 70894.4327951	total: 22.2s	remaining: 31.4s
1067:	learn: 70893.6234577	total: 22.2s	remaining: 31.4s
1068:	learn: 70892.3417875	total: 22.2s	remaining: 31.4s
1069:	learn: 70891.2673609	total: 22.3s	remaining: 31.3s
1070:	learn: 70890.2438869	total: 22.3s	remaining: 31.3s
1071:	learn: 70888.6475492	total: 22.3s	remaining: 31.3s
1072:	learn: 70887.1461774	total: 22.3s	remaining: 31.3s
1073:	learn: 70885.7350714	total: 22.4s	remaining: 31.3s
1074:	learn: 70884.3381126	total: 22.4s	remaining: 31.3s
1075:	learn: 70883.3713772	total: 22.4s	remaining: 31.2s
1076:	learn: 70882.7960866	total: 22.4s	remaining: 31.2s
1077:	learn: 70880.5489973	total: 22.4s	remaining: 31.2s
1078:	learn: 70879.6973871	tota

1208:	learn: 70751.8009157	total: 25.6s	remaining: 29s
1209:	learn: 70751.5149694	total: 25.6s	remaining: 28.9s
1210:	learn: 70749.5357688	total: 25.7s	remaining: 28.9s
1211:	learn: 70749.0687260	total: 25.7s	remaining: 28.9s
1212:	learn: 70748.3576783	total: 25.7s	remaining: 28.9s
1213:	learn: 70747.6049812	total: 25.8s	remaining: 28.9s
1214:	learn: 70745.9987017	total: 25.8s	remaining: 28.9s
1215:	learn: 70744.7155207	total: 25.8s	remaining: 28.9s
1216:	learn: 70744.1961983	total: 25.9s	remaining: 28.9s
1217:	learn: 70742.0730115	total: 25.9s	remaining: 28.9s
1218:	learn: 70739.9091660	total: 25.9s	remaining: 28.9s
1219:	learn: 70738.8716347	total: 26s	remaining: 28.9s
1220:	learn: 70738.4536170	total: 26s	remaining: 28.9s
1221:	learn: 70737.2567119	total: 26s	remaining: 28.8s
1222:	learn: 70735.7927903	total: 26.1s	remaining: 28.9s
1223:	learn: 70734.6877441	total: 26.1s	remaining: 28.8s
1224:	learn: 70732.8986006	total: 26.2s	remaining: 28.8s
1225:	learn: 70731.8179486	total: 26.2s

1360:	learn: 70601.4285856	total: 29.8s	remaining: 26.6s
1361:	learn: 70600.1408454	total: 29.9s	remaining: 26.6s
1362:	learn: 70599.8140569	total: 29.9s	remaining: 26.6s
1363:	learn: 70598.4530594	total: 29.9s	remaining: 26.6s
1364:	learn: 70597.4944160	total: 30s	remaining: 26.6s
1365:	learn: 70596.1237774	total: 30s	remaining: 26.6s
1366:	learn: 70595.2133614	total: 30s	remaining: 26.6s
1367:	learn: 70594.1102501	total: 30s	remaining: 26.5s
1368:	learn: 70593.2321110	total: 30.1s	remaining: 26.5s
1369:	learn: 70592.0296575	total: 30.1s	remaining: 26.5s
1370:	learn: 70591.9269359	total: 30.1s	remaining: 26.5s
1371:	learn: 70591.7907620	total: 30.1s	remaining: 26.4s
1372:	learn: 70589.9286149	total: 30.2s	remaining: 26.4s
1373:	learn: 70589.8124018	total: 30.2s	remaining: 26.4s
1374:	learn: 70588.6389977	total: 30.2s	remaining: 26.4s
1375:	learn: 70587.8266882	total: 30.2s	remaining: 26.4s
1376:	learn: 70587.6616887	total: 30.2s	remaining: 26.3s
1377:	learn: 70586.8954936	total: 30.3s

1505:	learn: 70508.1523919	total: 33.1s	remaining: 23.5s
1506:	learn: 70507.9716228	total: 33.2s	remaining: 23.5s
1507:	learn: 70507.3473391	total: 33.2s	remaining: 23.5s
1508:	learn: 70506.5278597	total: 33.2s	remaining: 23.5s
1509:	learn: 70505.3706370	total: 33.2s	remaining: 23.5s
1510:	learn: 70504.6691175	total: 33.3s	remaining: 23.4s
1511:	learn: 70503.9351740	total: 33.3s	remaining: 23.4s
1512:	learn: 70503.3228417	total: 33.3s	remaining: 23.4s
1513:	learn: 70502.2074307	total: 33.3s	remaining: 23.4s
1514:	learn: 70501.3740559	total: 33.4s	remaining: 23.4s
1515:	learn: 70500.8869346	total: 33.4s	remaining: 23.3s
1516:	learn: 70500.7309841	total: 33.4s	remaining: 23.3s
1517:	learn: 70500.5754325	total: 33.4s	remaining: 23.3s
1518:	learn: 70499.7564812	total: 33.5s	remaining: 23.3s
1519:	learn: 70499.6378524	total: 33.5s	remaining: 23.3s
1520:	learn: 70498.8046337	total: 33.5s	remaining: 23.2s
1521:	learn: 70498.6863041	total: 33.5s	remaining: 23.2s
1522:	learn: 70497.9057804	tota

1658:	learn: 70411.3271767	total: 36.7s	remaining: 20.3s
1659:	learn: 70411.2185981	total: 36.7s	remaining: 20.2s
1660:	learn: 70409.8996605	total: 36.7s	remaining: 20.2s
1661:	learn: 70409.7912552	total: 36.7s	remaining: 20.2s
1662:	learn: 70409.6828786	total: 36.8s	remaining: 20.2s
1663:	learn: 70409.5748398	total: 36.8s	remaining: 20.2s
1664:	learn: 70409.4669823	total: 36.8s	remaining: 20.1s
1665:	learn: 70408.6455149	total: 36.8s	remaining: 20.1s
1666:	learn: 70408.5378565	total: 36.8s	remaining: 20.1s
1667:	learn: 70407.7207747	total: 36.9s	remaining: 20.1s
1668:	learn: 70406.8965079	total: 36.9s	remaining: 20s
1669:	learn: 70406.0879653	total: 36.9s	remaining: 20s
1670:	learn: 70405.2836631	total: 36.9s	remaining: 20s
1671:	learn: 70404.5270455	total: 37s	remaining: 20s
1672:	learn: 70404.4195056	total: 37s	remaining: 20s
1673:	learn: 70403.6232269	total: 37s	remaining: 19.9s
1674:	learn: 70402.8310689	total: 37s	remaining: 19.9s
1675:	learn: 70402.6888487	total: 37s	remaining: 

1809:	learn: 70350.9612661	total: 39.9s	remaining: 16.9s
1810:	learn: 70349.9962297	total: 40s	remaining: 16.9s
1811:	learn: 70349.4818703	total: 40s	remaining: 16.9s
1812:	learn: 70348.8714772	total: 40s	remaining: 16.8s
1813:	learn: 70348.2264314	total: 40.1s	remaining: 16.8s
1814:	learn: 70347.5841734	total: 40.1s	remaining: 16.8s
1815:	learn: 70347.0731611	total: 40.1s	remaining: 16.8s
1816:	learn: 70346.5648904	total: 40.1s	remaining: 16.8s
1817:	learn: 70346.0593285	total: 40.2s	remaining: 16.7s
1818:	learn: 70345.4570926	total: 40.2s	remaining: 16.7s
1819:	learn: 70345.3583947	total: 40.2s	remaining: 16.7s
1820:	learn: 70345.2598403	total: 40.2s	remaining: 16.7s
1821:	learn: 70345.1614286	total: 40.2s	remaining: 16.7s
1822:	learn: 70345.0635167	total: 40.3s	remaining: 16.6s
1823:	learn: 70344.4639535	total: 40.3s	remaining: 16.6s
1824:	learn: 70344.3661983	total: 40.3s	remaining: 16.6s
1825:	learn: 70343.7692336	total: 40.3s	remaining: 16.6s
1826:	learn: 70343.6421238	total: 40.

1955:	learn: 70311.2826132	total: 43.2s	remaining: 13.7s
1956:	learn: 70311.1927197	total: 43.3s	remaining: 13.7s
1957:	learn: 70311.1029413	total: 43.3s	remaining: 13.7s
1958:	learn: 70311.0132777	total: 43.3s	remaining: 13.6s
1959:	learn: 70310.9008660	total: 43.3s	remaining: 13.6s
1960:	learn: 70310.7888968	total: 43.4s	remaining: 13.6s
1961:	learn: 70310.6769373	total: 43.4s	remaining: 13.6s
1962:	learn: 70310.5875101	total: 43.4s	remaining: 13.6s
1963:	learn: 70310.4981965	total: 43.4s	remaining: 13.5s
1964:	learn: 70309.9533240	total: 43.5s	remaining: 13.5s
1965:	learn: 70309.8641388	total: 43.5s	remaining: 13.5s
1966:	learn: 70309.7750663	total: 43.5s	remaining: 13.5s
1967:	learn: 70309.2323809	total: 43.5s	remaining: 13.5s
1968:	learn: 70308.6918337	total: 43.6s	remaining: 13.4s
1969:	learn: 70308.2399748	total: 43.6s	remaining: 13.4s
1970:	learn: 70307.7016865	total: 43.6s	remaining: 13.4s
1971:	learn: 70307.1655003	total: 43.7s	remaining: 13.4s
1972:	learn: 70307.0766041	tota

2102:	learn: 70276.1987626	total: 46.8s	remaining: 10.5s
2103:	learn: 70276.0939912	total: 46.8s	remaining: 10.5s
2104:	learn: 70275.9894277	total: 46.8s	remaining: 10.5s
2105:	learn: 70275.8850715	total: 46.8s	remaining: 10.5s
2106:	learn: 70275.8050737	total: 46.9s	remaining: 10.4s
2107:	learn: 70275.3301157	total: 46.9s	remaining: 10.4s
2108:	learn: 70275.2502237	total: 46.9s	remaining: 10.4s
2109:	learn: 70274.6256690	total: 46.9s	remaining: 10.4s
2110:	learn: 70274.0039376	total: 47s	remaining: 10.3s
2111:	learn: 70273.9241723	total: 47s	remaining: 10.3s
2112:	learn: 70273.8444993	total: 47s	remaining: 10.3s
2113:	learn: 70273.7649182	total: 47s	remaining: 10.3s
2114:	learn: 70273.6609953	total: 47s	remaining: 10.3s
2115:	learn: 70273.0421302	total: 47.1s	remaining: 10.2s
2116:	learn: 70272.9384320	total: 47.1s	remaining: 10.2s
2117:	learn: 70272.2632652	total: 47.1s	remaining: 10.2s
2118:	learn: 70271.7913854	total: 47.1s	remaining: 10.2s
2119:	learn: 70271.7120117	total: 47.2s	r

2252:	learn: 70226.2624793	total: 50.2s	remaining: 7.2s
2253:	learn: 70226.1668512	total: 50.2s	remaining: 7.18s
2254:	learn: 70224.5279354	total: 50.3s	remaining: 7.16s
2255:	learn: 70224.0164833	total: 50.3s	remaining: 7.13s
2256:	learn: 70223.9426291	total: 50.3s	remaining: 7.11s
2257:	learn: 70223.4330109	total: 50.3s	remaining: 7.09s
2258:	learn: 70221.8235638	total: 50.4s	remaining: 7.07s
2259:	learn: 70221.7282095	total: 50.4s	remaining: 7.04s
2260:	learn: 70220.0211089	total: 50.4s	remaining: 7.02s
2261:	learn: 70219.5133644	total: 50.4s	remaining: 7s
2262:	learn: 70219.0074104	total: 50.5s	remaining: 6.98s
2263:	learn: 70218.9337136	total: 50.5s	remaining: 6.96s
2264:	learn: 70217.4898851	total: 50.5s	remaining: 6.93s
2265:	learn: 70216.9995233	total: 50.5s	remaining: 6.91s
2266:	learn: 70216.4956000	total: 50.6s	remaining: 6.89s
2267:	learn: 70214.9725877	total: 50.6s	remaining: 6.87s
2268:	learn: 70214.4704776	total: 50.6s	remaining: 6.85s
2269:	learn: 70214.3754492	total: 5

2398:	learn: 70152.0722921	total: 54s	remaining: 3.98s
2399:	learn: 70150.6176602	total: 54s	remaining: 3.96s
2400:	learn: 70150.5469130	total: 54s	remaining: 3.94s
2401:	learn: 70150.1501003	total: 54.1s	remaining: 3.92s
2402:	learn: 70149.2088213	total: 54.1s	remaining: 3.89s
2403:	learn: 70149.1181468	total: 54.1s	remaining: 3.87s
2404:	learn: 70148.7224272	total: 54.1s	remaining: 3.85s
2405:	learn: 70148.6319427	total: 54.2s	remaining: 3.83s
2406:	learn: 70148.2372249	total: 54.2s	remaining: 3.8s
2407:	learn: 70147.8434912	total: 54.2s	remaining: 3.78s
2408:	learn: 70146.3437910	total: 54.2s	remaining: 3.76s
2409:	learn: 70146.2535107	total: 54.3s	remaining: 3.74s
2410:	learn: 70145.8350472	total: 54.3s	remaining: 3.71s
2411:	learn: 70145.7645513	total: 54.3s	remaining: 3.69s
2412:	learn: 70145.6744958	total: 54.3s	remaining: 3.67s
2413:	learn: 70145.6041157	total: 54.4s	remaining: 3.65s
2414:	learn: 70145.5338205	total: 54.4s	remaining: 3.62s
2415:	learn: 70145.1167454	total: 54.4

2552:	learn: 70088.5666810	total: 57.8s	remaining: 520ms
2553:	learn: 70088.2233917	total: 57.8s	remaining: 498ms
2554:	learn: 70087.8808204	total: 57.8s	remaining: 475ms
2555:	learn: 70087.5389635	total: 57.9s	remaining: 453ms
2556:	learn: 70086.9879684	total: 57.9s	remaining: 430ms
2557:	learn: 70086.6469658	total: 57.9s	remaining: 408ms
2558:	learn: 70086.5607649	total: 57.9s	remaining: 385ms
2559:	learn: 70086.4747348	total: 58s	remaining: 362ms
2560:	learn: 70086.3888750	total: 58s	remaining: 340ms
2561:	learn: 70086.0038581	total: 58s	remaining: 317ms
2562:	learn: 70085.9378123	total: 58s	remaining: 294ms
2563:	learn: 70085.8718464	total: 58.1s	remaining: 272ms
2564:	learn: 70085.5316559	total: 58.1s	remaining: 249ms
2565:	learn: 70085.4657813	total: 58.1s	remaining: 226ms
2566:	learn: 70085.3999862	total: 58.1s	remaining: 204ms
2567:	learn: 70085.0605222	total: 58.2s	remaining: 181ms
2568:	learn: 70084.5134593	total: 58.2s	remaining: 159ms
2569:	learn: 70084.4477645	total: 58.2s

<catboost.core.CatBoostRegressor at 0x1908a70b010>

In [91]:
y_pred_ct = cat.predict(X_test)
print('Testing accuracy {:.4f}'.format(rms(y_test,y_pred_ct)))

Testing accuracy 77335.9517


In [None]:
print('Testing accuracy {:.4f}'.format(rms(y_test,(y_pred_lg * 0.70) + (y_pred_ct * 0.30))))

In [86]:
y_pred_cat = cat.predict(final_test)

In [None]:
y_pred = (y_pred_lgb * 0.80) + (y_pred_cat * 0.20)

In [None]:
y_pred.shape

In [88]:
pred = pd.DataFrame(y_pred)
sub_df = pd.read_csv('sample_submission.csv')
sub_df = sub_df.drop(['price'], axis = 1)
datasets = pd.concat([sub_df['id'], pred], axis = 1)
datasets.columns = ['id', 'price']
datasets.to_csv('submission.csv', index = False)