In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style = 'whitegrid')
import os

from sklearn.preprocessing        import LabelEncoder, StandardScaler
from sklearn.model_selection      import train_test_split, StratifiedKFold
from sklearn.linear_model         import LinearRegression
from sklearn.tree                 import DecisionTreeRegressor
from sklearn.ensemble             import RandomForestRegressor, AdaBoostRegressor, BaggingRegressor, ExtraTreesRegressor
from lightgbm                     import LGBMRegressor
from xgboost                      import XGBRegressor
from catboost                     import CatBoostRegressor
from sklearn.metrics              import mean_squared_error

le = LabelEncoder()

path = os.getcwd()
data_path = 'C:\\Users\\sunil\\Projects\\HackerEarth\\7342422066d511eb\\dataset\\'

In [3]:
train = pd.read_csv(data_path + 'cleaned_train.csv')
test = pd.read_csv(data_path + 'cleaned_test.csv')
sample_sub = pd.read_csv(data_path + 'sample_submission.csv')

In [4]:
sample_sub = test[['Customer Id', 'Artist Name']].copy()
sample_sub['Cost'] = None
sample_sub = sample_sub.drop('Artist Name', axis = 1)

In [5]:
train['Cost'] = train['Cost'].abs()

In [6]:
train['Cost'] = np.log(train['Cost'])

In [7]:
# metrics = 100*max(0, 1-metrics.mean_squared_log_error(actual, predicted))

---
# Data Preprocessing

In [8]:
df = pd.concat([train, test], axis = 0).reset_index(drop=True)

In [9]:
ID = 'Customer Id'
name = 'Artist Name'
date_1 = 'Scheduled Date'
date_2 = 'Delivery Date'
location = 'Customer Location'
target = 'Cost'


cat_cols = ['Material','International', 'Express Shipment', 
            'Installation Included','Transport',
            'Fragile', 'Customer Information', 
            'Remote Location']

num_cols = ['Artist Reputation', 'Height', 'Width','Weight',
            'Price Of Sculpture', 'Base Shipping Price']

In [10]:
df[cat_cols] = df[cat_cols].apply(le.fit_transform)

#df = pd.get_dummies(data = df, columns=cat_cols, drop_first=True)

In [11]:
features = [col for col in df.columns if col not in ([ID, name, date_1, date_2, location, target])]

## Splitting

In [12]:
train_proc, test_proc = df[:train.shape[0]], df[train.shape[0]:].reset_index(drop = True)

In [13]:
trn, val = train_test_split(train_proc, test_size = 0.2, random_state = 1999)

##### Input for model
X_trn, X_val = trn[features], val[features]

##### Target column
y_trn, y_val = trn[target], val[target]

##### Features for test data that we will be predicting
X_test = test_proc[features]

In [14]:
model_dict = {}

model_dict['Linear Regression'] = LinearRegression()
model_dict['Decision Tree'] = DecisionTreeRegressor(random_state = 1)
model_dict['Extra Trees'] = ExtraTreesRegressor(random_state = 1)
model_dict['Random Forest'] = RandomForestRegressor(random_state = 1)
model_dict['AdaBoost'] = AdaBoostRegressor(random_state = 1)
model_dict['Bagging'] = BaggingRegressor(random_state = 1)
model_dict['XGBoost'] = XGBRegressor(random_state = 1)
model_dict['LGBM'] = LGBMRegressor(random_state = 1)
model_dict['Cat'] = CatBoostRegressor(verbose=False, random_state =1)

In [15]:
def model_test(X_train, X_test, y_train, y_test,model,model_name):
    model.fit(X_train,y_train)
    y_pred = model.predict(X_test)
    y_pred = np.abs(y_pred)
    accuracy = 100*max(0, 1-mean_squared_error(y_test, y_pred))
    print('======================================{}======================================='.format(model_name))
    print('Score is : {}'.format(accuracy))
    print()

In [16]:
for model_name,model in model_dict.items():
    model_test(X_trn, X_val, y_trn, y_val, model, model_name)

Score is : 11.26568677279045

Score is : 75.90201628557229

Score is : 86.26725578388523

Score is : 86.79595133509662

Score is : 61.10259301986842

Score is : 85.51298974979582

Score is : 88.75772463429364

Score is : 88.49146427453147

Score is : 90.99679816552691



In [17]:
lgb_params = {'max_depth': 4, 'n_estimators': 976, 'learning_rate': 0.08133551909202762, 'num_leaves': 318}

In [18]:
cat = CatBoostRegressor(random_state = 1)

_ = cat.fit(train_proc[features], train_proc[target])

preds = cat.predict(test_proc[features])
preds = np.exp(preds)

Learning rate set to 0.054463
0:	learn: 1.5780695	total: 2.95ms	remaining: 2.95s
1:	learn: 1.5071660	total: 6.2ms	remaining: 3.09s
2:	learn: 1.4397906	total: 15.5ms	remaining: 5.17s
3:	learn: 1.3777426	total: 18.3ms	remaining: 4.57s
4:	learn: 1.3208123	total: 21.1ms	remaining: 4.21s
5:	learn: 1.2650310	total: 23.7ms	remaining: 3.93s
6:	learn: 1.2145778	total: 27.5ms	remaining: 3.9s
7:	learn: 1.1639484	total: 30.9ms	remaining: 3.83s
8:	learn: 1.1178391	total: 33.4ms	remaining: 3.68s
9:	learn: 1.0747251	total: 39.5ms	remaining: 3.91s
10:	learn: 1.0318696	total: 42.1ms	remaining: 3.79s
11:	learn: 0.9938101	total: 45.2ms	remaining: 3.72s
12:	learn: 0.9571971	total: 47.8ms	remaining: 3.63s
13:	learn: 0.9220534	total: 52.3ms	remaining: 3.68s
14:	learn: 0.8891652	total: 55.2ms	remaining: 3.62s
15:	learn: 0.8574355	total: 57.8ms	remaining: 3.55s
16:	learn: 0.8282047	total: 60.8ms	remaining: 3.52s
17:	learn: 0.8008061	total: 63.3ms	remaining: 3.46s
18:	learn: 0.7749391	total: 65.9ms	remaining: 

168:	learn: 0.3089047	total: 488ms	remaining: 2.4s
169:	learn: 0.3081768	total: 491ms	remaining: 2.4s
170:	learn: 0.3075049	total: 493ms	remaining: 2.39s
171:	learn: 0.3066833	total: 496ms	remaining: 2.39s
172:	learn: 0.3060934	total: 498ms	remaining: 2.38s
173:	learn: 0.3054830	total: 501ms	remaining: 2.38s
174:	learn: 0.3048513	total: 505ms	remaining: 2.38s
175:	learn: 0.3044876	total: 507ms	remaining: 2.37s
176:	learn: 0.3039137	total: 510ms	remaining: 2.37s
177:	learn: 0.3033649	total: 512ms	remaining: 2.37s
178:	learn: 0.3022083	total: 515ms	remaining: 2.36s
179:	learn: 0.3018377	total: 517ms	remaining: 2.35s
180:	learn: 0.3012520	total: 520ms	remaining: 2.35s
181:	learn: 0.3007993	total: 522ms	remaining: 2.35s
182:	learn: 0.2998506	total: 525ms	remaining: 2.35s
183:	learn: 0.2992428	total: 528ms	remaining: 2.34s
184:	learn: 0.2983971	total: 530ms	remaining: 2.34s
185:	learn: 0.2981449	total: 533ms	remaining: 2.33s
186:	learn: 0.2974273	total: 535ms	remaining: 2.33s
187:	learn: 0.

359:	learn: 0.2401001	total: 1.01s	remaining: 1.79s
360:	learn: 0.2398590	total: 1.01s	remaining: 1.79s
361:	learn: 0.2396947	total: 1.01s	remaining: 1.79s
362:	learn: 0.2396179	total: 1.02s	remaining: 1.79s
363:	learn: 0.2394435	total: 1.02s	remaining: 1.78s
364:	learn: 0.2393742	total: 1.02s	remaining: 1.78s
365:	learn: 0.2391674	total: 1.03s	remaining: 1.78s
366:	learn: 0.2389104	total: 1.03s	remaining: 1.78s
367:	learn: 0.2387404	total: 1.03s	remaining: 1.77s
368:	learn: 0.2385003	total: 1.03s	remaining: 1.77s
369:	learn: 0.2384259	total: 1.04s	remaining: 1.77s
370:	learn: 0.2382708	total: 1.04s	remaining: 1.76s
371:	learn: 0.2381517	total: 1.04s	remaining: 1.76s
372:	learn: 0.2380504	total: 1.05s	remaining: 1.76s
373:	learn: 0.2379306	total: 1.05s	remaining: 1.75s
374:	learn: 0.2378658	total: 1.05s	remaining: 1.75s
375:	learn: 0.2377114	total: 1.05s	remaining: 1.75s
376:	learn: 0.2374810	total: 1.05s	remaining: 1.74s
377:	learn: 0.2371842	total: 1.06s	remaining: 1.74s
378:	learn: 

560:	learn: 0.2109764	total: 1.53s	remaining: 1.2s
561:	learn: 0.2108955	total: 1.54s	remaining: 1.2s
562:	learn: 0.2108233	total: 1.54s	remaining: 1.19s
563:	learn: 0.2107208	total: 1.54s	remaining: 1.19s
564:	learn: 0.2106195	total: 1.55s	remaining: 1.19s
565:	learn: 0.2105432	total: 1.55s	remaining: 1.19s
566:	learn: 0.2104776	total: 1.55s	remaining: 1.19s
567:	learn: 0.2103512	total: 1.55s	remaining: 1.18s
568:	learn: 0.2102705	total: 1.56s	remaining: 1.18s
569:	learn: 0.2100813	total: 1.56s	remaining: 1.18s
570:	learn: 0.2100003	total: 1.56s	remaining: 1.17s
571:	learn: 0.2099343	total: 1.56s	remaining: 1.17s
572:	learn: 0.2098480	total: 1.57s	remaining: 1.17s
573:	learn: 0.2096870	total: 1.57s	remaining: 1.17s
574:	learn: 0.2095472	total: 1.57s	remaining: 1.16s
575:	learn: 0.2094368	total: 1.57s	remaining: 1.16s
576:	learn: 0.2093542	total: 1.58s	remaining: 1.16s
577:	learn: 0.2092618	total: 1.58s	remaining: 1.15s
578:	learn: 0.2091427	total: 1.58s	remaining: 1.15s
579:	learn: 0.

762:	learn: 0.1918688	total: 2.06s	remaining: 639ms
763:	learn: 0.1917452	total: 2.06s	remaining: 637ms
764:	learn: 0.1917089	total: 2.06s	remaining: 634ms
765:	learn: 0.1916367	total: 2.07s	remaining: 631ms
766:	learn: 0.1915743	total: 2.07s	remaining: 629ms
767:	learn: 0.1915020	total: 2.07s	remaining: 626ms
768:	learn: 0.1913918	total: 2.07s	remaining: 623ms
769:	learn: 0.1912815	total: 2.08s	remaining: 620ms
770:	learn: 0.1911924	total: 2.08s	remaining: 618ms
771:	learn: 0.1911632	total: 2.08s	remaining: 615ms
772:	learn: 0.1911102	total: 2.08s	remaining: 612ms
773:	learn: 0.1909962	total: 2.09s	remaining: 609ms
774:	learn: 0.1909308	total: 2.09s	remaining: 607ms
775:	learn: 0.1908413	total: 2.09s	remaining: 604ms
776:	learn: 0.1907619	total: 2.09s	remaining: 601ms
777:	learn: 0.1906915	total: 2.1s	remaining: 598ms
778:	learn: 0.1906032	total: 2.1s	remaining: 596ms
779:	learn: 0.1905211	total: 2.1s	remaining: 593ms
780:	learn: 0.1903785	total: 2.1s	remaining: 590ms
781:	learn: 0.19

965:	learn: 0.1770326	total: 2.58s	remaining: 90.9ms
966:	learn: 0.1769269	total: 2.58s	remaining: 88.2ms
967:	learn: 0.1768437	total: 2.59s	remaining: 85.5ms
968:	learn: 0.1768106	total: 2.59s	remaining: 82.8ms
969:	learn: 0.1767335	total: 2.59s	remaining: 80.1ms
970:	learn: 0.1766578	total: 2.59s	remaining: 77.5ms
971:	learn: 0.1766188	total: 2.6s	remaining: 74.8ms
972:	learn: 0.1765733	total: 2.6s	remaining: 72.1ms
973:	learn: 0.1765276	total: 2.6s	remaining: 69.5ms
974:	learn: 0.1764565	total: 2.6s	remaining: 66.8ms
975:	learn: 0.1764029	total: 2.61s	remaining: 64.1ms
976:	learn: 0.1763417	total: 2.61s	remaining: 61.4ms
977:	learn: 0.1762789	total: 2.61s	remaining: 58.8ms
978:	learn: 0.1761961	total: 2.61s	remaining: 56.1ms
979:	learn: 0.1761394	total: 2.62s	remaining: 53.4ms
980:	learn: 0.1760865	total: 2.62s	remaining: 50.7ms
981:	learn: 0.1760236	total: 2.62s	remaining: 48.1ms
982:	learn: 0.1759820	total: 2.63s	remaining: 45.4ms
983:	learn: 0.1759411	total: 2.63s	remaining: 42.7

In [19]:
sample_sub[target] = preds
sample_sub.to_csv(data_path + 'sub1.csv', index = False)

---
# Cross Validation

In [707]:
# Cross Validation for Boosting
def cross_val(regressor, train, test, features, name):
    N_splits = 7
    
    oofs = np.zeros(len(train))
    preds = np.zeros(len(test))
    
    target_col = train[target]
    
    folds = StratifiedKFold(n_splits = N_splits, shuffle = True,random_state = 1999)
    stratified_target = pd.qcut( train[target], 10, labels=False, duplicates='drop')
    for index, (trn_idx, val_idx) in enumerate(folds.split(train, stratified_target)):
        print(f'\n================================Fold{index + 1}===================================')
        
        #### Train Set
        X_trn, y_trn = train[features].iloc[trn_idx], train[target].iloc[trn_idx]
        
        #### Validation Set
        X_val, y_val = train[features].iloc[val_idx], train[target].iloc[val_idx]
        
        #### Test Set
        X_test = test[features]
        
        if name != 'cat':
            #### Scaling Data ####
            scaler = StandardScaler()
            _ = scaler.fit(X_trn)
            X_trn = scaler.transform(X_trn)
            X_val = scaler.transform(X_val)
            X_test = scaler.transform(X_test)
        
        ############ Fitting #############
        _ = regressor.fit(X_trn, y_trn, eval_set = [(X_val, y_val)], early_stopping_rounds = 100, verbose = False)
        
        ############ Predicting #############
        val_preds = np.abs(regressor.predict(X_val))
        test_preds = np.abs(regressor.predict(X_test))
        
        error = 100*max(0, 1-mean_squared_error(y_val, val_preds))
        print(f'\n Score for Validation set is : {error}')
        
        oofs[val_idx] = val_preds
        preds += test_preds / N_splits
        
    total_error = 100*max(0, 1-mean_squared_error(y_val, val_preds))
    print(f'\n\Score for oofs is {total_error}')
    
    return oofs, preds

In [719]:
# Cross Validation for Boosting
def normal_cross_val(regressor, train, test, features, name):
    N_splits = 7
    
    oofs = np.zeros(len(train))
    preds = np.zeros(len(test))
    
    target_col = train[target]
    
    folds = StratifiedKFold(n_splits = N_splits, shuffle = True,random_state = 1999)
    stratified_target = pd.qcut( train[target], 10, labels=False, duplicates='drop')
    for index, (trn_idx, val_idx) in enumerate(folds.split(train, stratified_target)):
        print(f'\n================================Fold{index + 1}===================================')
        
        #### Train Set
        X_trn, y_trn = train[features].iloc[trn_idx], train[target].iloc[trn_idx]
        
        #### Validation Set
        X_val, y_val = train[features].iloc[val_idx], train[target].iloc[val_idx]
        
        #### Test Set
        X_test = test[features]
        
        if name != 'cat':
            #### Scaling Data ####
            scaler = StandardScaler()
            _ = scaler.fit(X_trn)
            X_trn = scaler.transform(X_trn)
            X_val = scaler.transform(X_val)
            X_test = scaler.transform(X_test)
        
        ############ Fitting #############
        _ = regressor.fit(X_trn, y_trn)
        
        ############ Predicting #############
        val_preds = np.abs(regressor.predict(X_val))
        test_preds = np.abs(regressor.predict(X_test))
        
        error = 100*max(0, 1-mean_squared_error(y_val, val_preds))
        print(f'\n Score for Validation set is : {error}')
        
        oofs[val_idx] = val_preds
        preds += test_preds / N_splits
        
    total_error = 100*max(0, 1-mean_squared_error(y_val, val_preds))
    print(f'\n\Score for oofs is {total_error}')
    
    return oofs, preds

In [722]:
params = {'depth': 5, 'n_estimators': 1694, 'learning_rate': 0.08193293802306104}
cat = CatBoostRegressor(random_state = 1, **params)

In [22]:
%%time
cat_oofs, cat_preds = cross_val(cat, train_proc, test_proc, features, 'cat')



 Score for Validation set is : 91.58707868816231


 Score for Validation set is : 91.54318960757261


 Score for Validation set is : 92.40515925833174


 Score for Validation set is : 91.31435822954025


 Score for Validation set is : 89.98334430516542


 Score for Validation set is : 90.20700369898572


 Score for Validation set is : 91.28358294389555


 Score for Validation set is : 90.26720573096728


 Score for Validation set is : 93.19030935850535


 Score for Validation set is : 94.36956864717492

\Score for oofs is 94.36956864717492
Wall time: 29.8 s


In [23]:
sample_sub[target] = np.exp(cat_preds)
sample_sub.to_csv(data_path + 'sub1.csv', index = False)

# Hyperparameter Tuning

import optuna 
from sklearn.metrics import * 
from sklearn.model_selection import KFold
from catboost import CatBoostRegressor
from xgboost import XGBRegressor
import numpy as np


def objective(trial):
  param = {
      "max_depth": trial.suggest_int("max_depth", 3, 20),
      "n_estimators": trial.suggest_int("n_estimators", 500, 1500),
      "learning_rate": trial.suggest_float("learning_rate", 0.01, 1),
      "num_leaves" : trial.suggest_int("num_leaves", 40, 500)
      #"l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 1, 3),
      #"subsample": trial.suggest_float("subsample", 0.1, 1),
      #'colsample_bylevel': trial.suggest_float('colsample_bylevel,', 0.1, 1),
      #'random_strength':trial.suggest_int('random_strength', 0, 1) 
  }

  skf = KFold(n_splits = 5, shuffle=True, random_state=42) 

  for fold, (tr_ind, val_ind) in enumerate(skf.split(train_proc[features], train_proc[target])): 

    X_train, X_val     = train_proc[features].loc[tr_ind], train_proc[features].loc[val_ind]
    y_train, y_val     = train_proc[target][tr_ind], train_proc[target][val_ind] 
    
    model              = LGBMRegressor(**param, random_state=42)      
    
    model.fit(X_train, y_train) 

    val_pred           = np.abs(model.predict(X_val))
    accuracy           = 100 * max(0, 1-mean_squared_error(y_val, val_pred))

  return accuracy
  
study = optuna.create_study(direction="maximize") 
study.optimize(objective, n_trials=300)

# Feature Engineering

In [660]:
def join(train_, test_):
    df = pd.concat([train_, test_], axis = 0).reset_index(drop=True)
    
    return df

def split(df_):
    train_, test_ = df_[:train.shape[0]], df[train.shape[0]:].reset_index(drop=True)
    features_ = [col for col in df_.columns if col not in ([ID, name, date_1, date_2, location, target])]
    
    return train_, test_, features_

In [661]:
df = join(train_proc, test_proc)

df['location'] = df['Customer Location'].apply(lambda x: x.split(',')[0])
df['Country'] = df['Customer Location'].apply(lambda x: len(x.split(',')))

space_country_index = df[ df['Country'] == 1].index
comma_country_index = df[ df['Country'] == 2].index
df.loc[space_country_index, 'location'] = df.iloc[space_country_index]['Customer Location'].apply(lambda x: x.split()[0])

########### Extracting Country

df.loc[space_country_index, 'Country'] = df.iloc[space_country_index]['Customer Location'].apply(lambda x: x.split()[1])
df.loc[comma_country_index, 'Country'] = df.iloc[comma_country_index]['Customer Location'].apply(lambda x: x.split(',')[1].split()[0])

df[['location', 'Country']] = df[['location', 'Country']].apply(le.fit_transform)

## Datetime Features

In [662]:
df['Delivery Date'] = pd.to_datetime(df['Delivery Date'])
df['Scheduled Date'] = pd.to_datetime(df['Scheduled Date'])

In [663]:
df['Date_Diff'] = (df['Delivery Date'] - df['Scheduled Date']).dt.days

In [664]:
train_feat, test_feat, feats = split(df)

**Artist reputation**

In [665]:
bin_names = [i for i in range(0, 10)]


#df['Height_bins'] = np.array(np.floor(np.array(df['Height']) / 10.))
df['Width_bins'] = np.array(np.floor(np.array(df['Width']) / 10.))
df['price_bins'] = np.array(np.floor(np.array(df['Price Of Sculpture']) / 10))
#df['shipping'] = np.array(np.floor(np.array(df['Base Shipping Price']) / 10.))

#df['weight_bins'] = np.array(np.floor(np.array(df['Weight']) / 1000.)) 
#df['artist_bins'] = np.array(np.floor(np.array(df['Artist Reputation']) / 0.1)) 

In [666]:
#df['cost_per_height_material'] = df.groupby(['Material', 'Height_bins', 'Width_bins', 'price_bins'])['Cost'].transform('median')
#df['cost_per_height_material_mean'] = df.groupby(['Material', 'Height_bins', 'Width_bins', 'price_bins'])['Cost'].transform('mean')
#df['cost_per_height_material_max'] = df.groupby(['Material', 'Height_bins', 'Width_bins', 'price_bins'])['Cost'].transform('max')
#df['cost_per_height_material_min'] = df.groupby(['Material', 'Height_bins', 'Width_bins', 'price_bins'])['Cost'].transform('min')


In [667]:
# train_price_bins = list(df.loc[[i for i in range(0, 6500)]]['price_bins'].unique())

# def imputer(x, col):
#     return df[ df['price_bins'] == x][col].median()

# def imputer_2(x, col):
#     nearest = train_price_bins[min(range(len(train_price_bins)), key = lambda i: abs(train_price_bins[i]-x))]
#     return imputer(nearest, col)

In [668]:
#df.loc[ df['cost_per_height_material'].isna(), 'cost_per_height_material'] = df[ df['cost_per_height_material'].isna()]['price_bins'].apply(lambda x: imputer_2(x, 'cost_per_height_material'))
#df.loc[ df['cost_per_height_material_mean'].isna(), 'cost_per_height_material_mean'] = df[ df['cost_per_height_material_mean'].isna()]['price_bins'].apply(lambda x: imputer_2(x, 'cost_per_height_material_mean'))
#df.loc[ df['cost_per_height_material_max'].isna(), 'cost_per_height_material_max'] = df[ df['cost_per_height_material_max'].isna()]['price_bins'].apply(lambda x: imputer_2(x, 'cost_per_height_material_max'))
#df.loc[ df['cost_per_height_material_min'].isna(), 'cost_per_height_material_min'] = df[ df['cost_per_height_material_min'].isna()]['price_bins'].apply(lambda x: imputer_2(x, 'cost_per_height_material_min'))

---

# Arithmatic Features

In [669]:
#df['median_shipping_price_per_transport'] = df.groupby(['Material'])['Base Shipping Price'].transform('median')
#df['median_shipping_price_per_transport'] = df['median_shipping_price_per_transport']*df['Weight']

In [670]:
df['Weight_/_Shipping'] = df['Weight'] / df['Base Shipping Price']
#df['Height_Width_/_Shipping'] = (df['Height'] * df['Width']) / df['Base Shipping Price']

### Shipping price --->>> 

In [671]:
df['median_shipping_per_material_transport'] = df.groupby(['Material'])['Base Shipping Price'].transform('median')
#df['median_shipping_per_material_price'] = df.groupby(['Material'])['Price Of Sculpture'].transform('median')

# Volume

In [713]:
df['volume'] = (df['Height']**2) * (df['Width'])

In [766]:
train_feat, test_feat, feats = split(df)

In [715]:
lgb_oofs, lgb_preds = cross_val(cat, train_feat, test_feat, feats, 'cat')



 Score for Validation set is : 94.08212978782433


 Score for Validation set is : 93.80547969463147


 Score for Validation set is : 94.12854489053215


 Score for Validation set is : 92.00280674062637


 Score for Validation set is : 93.00385178082983


 Score for Validation set is : 92.84839199249822


 Score for Validation set is : 94.15596592832615

\Score for oofs is 94.15596592832615


In [717]:
sample_sub[target] = np.exp(lgb_preds)
sample_sub.to_csv(data_path + 'sub1.csv', index = False)