In [51]:
# import the needed libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns  
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder 
from sklearn.decomposition import PCA 
import xgboost as xgb
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

In [52]:
# load the datasets
MercedesTrain_df = pd.read_csv('train.csv')
MercedesTest_df = pd.read_csv('test.csv')

In [53]:
# explore the data
MercedesTrain_df.head()

Unnamed: 0,ID,y,X0,X1,X2,X3,X4,X5,X6,X8,...,X375,X376,X377,X378,X379,X380,X382,X383,X384,X385
0,0,130.81,k,v,at,a,d,u,j,o,...,0,0,1,0,0,0,0,0,0,0
1,6,88.53,k,t,av,e,d,y,l,o,...,1,0,0,0,0,0,0,0,0,0
2,7,76.26,az,w,n,c,d,x,j,x,...,0,0,0,0,0,0,1,0,0,0
3,9,80.62,az,t,n,f,d,x,l,e,...,0,0,0,0,0,0,0,0,0,0
4,13,78.02,az,v,n,f,d,h,d,n,...,0,0,0,0,0,0,0,0,0,0


In [54]:
MercedesTest_df.head()

Unnamed: 0,ID,X0,X1,X2,X3,X4,X5,X6,X8,X10,...,X375,X376,X377,X378,X379,X380,X382,X383,X384,X385
0,1,az,v,n,f,d,t,a,w,0,...,0,0,0,1,0,0,0,0,0,0
1,2,t,b,ai,a,d,b,g,y,0,...,0,0,1,0,0,0,0,0,0,0
2,3,az,v,as,f,d,a,j,j,0,...,0,0,0,1,0,0,0,0,0,0
3,4,az,l,n,f,d,z,l,n,0,...,0,0,0,1,0,0,0,0,0,0
4,5,w,s,as,c,d,y,i,m,0,...,1,0,0,0,0,0,0,0,0,0


In [55]:
print('MercedesTrain dataset:', MercedesTrain_df.columns)
print('MercedesTest dataset:', MercedesTest_df.columns)

MercedesTrain dataset: Index(['ID', 'y', 'X0', 'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X8',
       ...
       'X375', 'X376', 'X377', 'X378', 'X379', 'X380', 'X382', 'X383', 'X384',
       'X385'],
      dtype='object', length=378)
MercedesTest dataset: Index(['ID', 'X0', 'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X8', 'X10',
       ...
       'X375', 'X376', 'X377', 'X378', 'X379', 'X380', 'X382', 'X383', 'X384',
       'X385'],
      dtype='object', length=377)


In [56]:
# drop the irrelevant feature
MercedesTrain_df = MercedesTrain_df.drop(columns='ID')
MercedesTest_df = MercedesTest_df.drop(columns='ID')

In [57]:
print('MercedesTrain dataset:', MercedesTrain_df.shape)
print('MercedesTest dataset:', MercedesTest_df.shape)

MercedesTrain dataset: (4209, 377)
MercedesTest dataset: (4209, 376)


In [58]:
MercedesTrain_df.info()
# dtypes: float64(1), int64(369), object(8)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4209 entries, 0 to 4208
Columns: 377 entries, y to X385
dtypes: float64(1), int64(368), object(8)
memory usage: 12.1+ MB


In [59]:
MercedesTest_df.info()
# dtypes: int64(369), object(8)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4209 entries, 0 to 4208
Columns: 376 entries, X0 to X385
dtypes: int64(368), object(8)
memory usage: 12.1+ MB


In [60]:
MercedesTrain_df.describe() 

Unnamed: 0,y,X10,X11,X12,X13,X14,X15,X16,X17,X18,...,X375,X376,X377,X378,X379,X380,X382,X383,X384,X385
count,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,...,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0
mean,100.669318,0.013305,0.0,0.075077,0.057971,0.42813,0.000475,0.002613,0.007603,0.00784,...,0.318841,0.057258,0.314802,0.02067,0.009503,0.008078,0.007603,0.001663,0.000475,0.001426
std,12.679381,0.11459,0.0,0.263547,0.233716,0.494867,0.021796,0.051061,0.086872,0.088208,...,0.466082,0.232363,0.464492,0.142294,0.097033,0.089524,0.086872,0.040752,0.021796,0.037734
min,72.11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,90.82,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,99.15,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,109.01,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,265.32,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [61]:
MercedesTest_df.describe()

Unnamed: 0,X10,X11,X12,X13,X14,X15,X16,X17,X18,X19,...,X375,X376,X377,X378,X379,X380,X382,X383,X384,X385
count,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,...,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0
mean,0.019007,0.000238,0.074364,0.06106,0.427893,0.000713,0.002613,0.008791,0.010216,0.111665,...,0.325968,0.049656,0.311951,0.019244,0.011879,0.008078,0.008791,0.000475,0.000713,0.001663
std,0.136565,0.015414,0.262394,0.239468,0.494832,0.026691,0.051061,0.093357,0.10057,0.314992,...,0.468791,0.217258,0.463345,0.137399,0.108356,0.089524,0.093357,0.021796,0.026691,0.040752
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


1. If for any column(s), the variance is equal to zero, then you need to remove those variable(s).

In [62]:
# identify feature with zero variance
def remove_zero_variance(df): 
    lists = []
    for c in df.columns:
        if df[c].dtype == 'int64': 
            if df[c].var()==0.0: 
                lists.append(c)
    return lists

In [63]:
# drop feature with zero variance in the Train set
print(MercedesTrain_df.shape)
zero_variance = remove_zero_variance(MercedesTrain_df)
MercedesTrain_df = MercedesTrain_df.drop(columns=zero_variance)
print(MercedesTrain_df.shape)

(4209, 377)
(4209, 365)


In [64]:
# drop feature with zero variance in the Test set
print(MercedesTest_df.shape)
MercedesTest_df = MercedesTest_df.drop(columns=zero_variance)
print(MercedesTest_df.shape)

(4209, 376)
(4209, 364)


2. Check for null and unique values for test and train sets.

In [65]:
# train sets
# zero null value 
MercedesTrain_df.isna().sum().sum()

0

In [66]:
print(MercedesTrain_df.nunique())

y       2545
X0        47
X1        27
X2        44
X3         7
        ... 
X380       2
X382       2
X383       2
X384       2
X385       2
Length: 365, dtype: int64


In [67]:
def split_columns(df): 
    int_list_columns = [] 
    object_list_columns = []
    for c in df.columns:
        if df[c].dtype == 'int64': 
            int_list_columns.append(c) 
        if df[c].dtype == 'object': 
            object_list_columns.append(c)  
    return object_list_columns,int_list_columns

In [68]:
object_list,int_list_columns = split_columns(MercedesTrain_df)
print('number of object columns' , len(object_list))
print('object columns' , object_list)

number of object columns 8
object columns ['X0', 'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X8']


In [69]:
print('number of int columns' , len(int_list_columns))
print('int columns' , int_list_columns) 

number of int columns 356
int columns ['X10', 'X12', 'X13', 'X14', 'X15', 'X16', 'X17', 'X18', 'X19', 'X20', 'X21', 'X22', 'X23', 'X24', 'X26', 'X27', 'X28', 'X29', 'X30', 'X31', 'X32', 'X33', 'X34', 'X35', 'X36', 'X37', 'X38', 'X39', 'X40', 'X41', 'X42', 'X43', 'X44', 'X45', 'X46', 'X47', 'X48', 'X49', 'X50', 'X51', 'X52', 'X53', 'X54', 'X55', 'X56', 'X57', 'X58', 'X59', 'X60', 'X61', 'X62', 'X63', 'X64', 'X65', 'X66', 'X67', 'X68', 'X69', 'X70', 'X71', 'X73', 'X74', 'X75', 'X76', 'X77', 'X78', 'X79', 'X80', 'X81', 'X82', 'X83', 'X84', 'X85', 'X86', 'X87', 'X88', 'X89', 'X90', 'X91', 'X92', 'X94', 'X95', 'X96', 'X97', 'X98', 'X99', 'X100', 'X101', 'X102', 'X103', 'X104', 'X105', 'X106', 'X108', 'X109', 'X110', 'X111', 'X112', 'X113', 'X114', 'X115', 'X116', 'X117', 'X118', 'X119', 'X120', 'X122', 'X123', 'X124', 'X125', 'X126', 'X127', 'X128', 'X129', 'X130', 'X131', 'X132', 'X133', 'X134', 'X135', 'X136', 'X137', 'X138', 'X139', 'X140', 'X141', 'X142', 'X143', 'X144', 'X145', 'X146',

In [70]:
# unique values for integer value in the train set is 0 and 1 (min and max value) 
MercedesTrain_df.describe() 

Unnamed: 0,y,X10,X12,X13,X14,X15,X16,X17,X18,X19,...,X375,X376,X377,X378,X379,X380,X382,X383,X384,X385
count,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,...,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0
mean,100.669318,0.013305,0.075077,0.057971,0.42813,0.000475,0.002613,0.007603,0.00784,0.099549,...,0.318841,0.057258,0.314802,0.02067,0.009503,0.008078,0.007603,0.001663,0.000475,0.001426
std,12.679381,0.11459,0.263547,0.233716,0.494867,0.021796,0.051061,0.086872,0.088208,0.299433,...,0.466082,0.232363,0.464492,0.142294,0.097033,0.089524,0.086872,0.040752,0.021796,0.037734
min,72.11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,90.82,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,99.15,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,109.01,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,265.32,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [71]:
# test set 
# zero null value 
MercedesTest_df.isna().sum().sum()

0

In [72]:
print(MercedesTest_df.nunique())

X0      49
X1      27
X2      45
X3       7
X4       4
        ..
X380     2
X382     2
X383     2
X384     2
X385     2
Length: 364, dtype: int64


In [73]:
object_list,int_list_columns = split_columns(MercedesTest_df)
print('number of object columns' , len(object_list))
print('object columns' , object_list)

number of object columns 8
object columns ['X0', 'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X8']


In [74]:
print('number of int columns' , len(int_list_columns))
print('int columns' , int_list_columns) 

number of int columns 356
int columns ['X10', 'X12', 'X13', 'X14', 'X15', 'X16', 'X17', 'X18', 'X19', 'X20', 'X21', 'X22', 'X23', 'X24', 'X26', 'X27', 'X28', 'X29', 'X30', 'X31', 'X32', 'X33', 'X34', 'X35', 'X36', 'X37', 'X38', 'X39', 'X40', 'X41', 'X42', 'X43', 'X44', 'X45', 'X46', 'X47', 'X48', 'X49', 'X50', 'X51', 'X52', 'X53', 'X54', 'X55', 'X56', 'X57', 'X58', 'X59', 'X60', 'X61', 'X62', 'X63', 'X64', 'X65', 'X66', 'X67', 'X68', 'X69', 'X70', 'X71', 'X73', 'X74', 'X75', 'X76', 'X77', 'X78', 'X79', 'X80', 'X81', 'X82', 'X83', 'X84', 'X85', 'X86', 'X87', 'X88', 'X89', 'X90', 'X91', 'X92', 'X94', 'X95', 'X96', 'X97', 'X98', 'X99', 'X100', 'X101', 'X102', 'X103', 'X104', 'X105', 'X106', 'X108', 'X109', 'X110', 'X111', 'X112', 'X113', 'X114', 'X115', 'X116', 'X117', 'X118', 'X119', 'X120', 'X122', 'X123', 'X124', 'X125', 'X126', 'X127', 'X128', 'X129', 'X130', 'X131', 'X132', 'X133', 'X134', 'X135', 'X136', 'X137', 'X138', 'X139', 'X140', 'X141', 'X142', 'X143', 'X144', 'X145', 'X146',

In [75]:
# unique values for integer value in the train set is 0 and 1 (min and max value) 
MercedesTest_df.describe() 

Unnamed: 0,X10,X12,X13,X14,X15,X16,X17,X18,X19,X20,...,X375,X376,X377,X378,X379,X380,X382,X383,X384,X385
count,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,...,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0,4209.0
mean,0.019007,0.074364,0.06106,0.427893,0.000713,0.002613,0.008791,0.010216,0.111665,0.139463,...,0.325968,0.049656,0.311951,0.019244,0.011879,0.008078,0.008791,0.000475,0.000713,0.001663
std,0.136565,0.262394,0.239468,0.494832,0.026691,0.051061,0.093357,0.10057,0.314992,0.34647,...,0.468791,0.217258,0.463345,0.137399,0.108356,0.089524,0.093357,0.021796,0.026691,0.040752
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [76]:
for c in object_list:
    print(c , MercedesTest_df[c].unique())

X0 ['az' 't' 'w' 'y' 'x' 'f' 'ap' 'o' 'ay' 'al' 'h' 'z' 'aj' 'd' 'v' 'ak'
 'ba' 'n' 'j' 's' 'af' 'ax' 'at' 'aq' 'av' 'm' 'k' 'a' 'e' 'ai' 'i' 'ag'
 'b' 'am' 'aw' 'as' 'r' 'ao' 'u' 'l' 'c' 'ad' 'au' 'bc' 'g' 'an' 'ae' 'p'
 'bb']
X1 ['v' 'b' 'l' 's' 'aa' 'r' 'a' 'i' 'p' 'c' 'o' 'm' 'z' 'e' 'h' 'w' 'g' 'k'
 'y' 't' 'u' 'd' 'j' 'q' 'n' 'f' 'ab']
X2 ['n' 'ai' 'as' 'ae' 's' 'b' 'e' 'ak' 'm' 'a' 'aq' 'ag' 'r' 'k' 'aj' 'ay'
 'ao' 'an' 'ac' 'af' 'ax' 'h' 'i' 'f' 'ap' 'p' 'au' 't' 'z' 'y' 'aw' 'd'
 'at' 'g' 'am' 'j' 'x' 'ab' 'w' 'q' 'ah' 'ad' 'al' 'av' 'u']
X3 ['f' 'a' 'c' 'e' 'd' 'g' 'b']
X4 ['d' 'b' 'a' 'c']
X5 ['t' 'b' 'a' 'z' 'y' 'x' 'h' 'g' 'f' 'j' 'i' 'd' 'c' 'af' 'ag' 'ab' 'ac'
 'ad' 'ae' 'ah' 'l' 'k' 'n' 'm' 'p' 'q' 's' 'r' 'v' 'w' 'o' 'aa']
X6 ['a' 'g' 'j' 'l' 'i' 'd' 'f' 'h' 'c' 'k' 'e' 'b']
X8 ['w' 'y' 'j' 'n' 'm' 's' 'a' 'v' 'r' 'o' 't' 'h' 'c' 'k' 'p' 'u' 'd' 'g'
 'b' 'q' 'e' 'l' 'f' 'i' 'x']


3. Apply label encoder.

In [80]:
# encode catagrcal varibale using OrdinalEncoder
OE = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1)
MercedesTrain_df[object_list] = pd.DataFrame(OE.fit_transform(MercedesTrain_df[object_list]), columns=object_list)
MercedesTest_df[object_list] = pd.DataFrame(OE.transform(MercedesTest_df[object_list]), columns=object_list)

In [81]:
Train_Y = MercedesTrain_df['y']
Train_X = MercedesTrain_df.drop(columns='y')
print('MercedesTrain_X ', Train_Y.shape) 
print('MercedesTrain_Y ', Train_X.shape)

MercedesTrain_X  (4209,)
MercedesTrain_Y  (4209, 364)


In [83]:
Train_X.head()

Unnamed: 0,X0,X1,X2,X3,X4,X5,X6,X8,X10,X12,...,X375,X376,X377,X378,X379,X380,X382,X383,X384,X385
0,32.0,23.0,17.0,0.0,3.0,24.0,9.0,14.0,0,0,...,0,0,1,0,0,0,0,0,0,0
1,32.0,21.0,19.0,4.0,3.0,28.0,11.0,14.0,0,0,...,1,0,0,0,0,0,0,0,0,0
2,20.0,24.0,34.0,2.0,3.0,27.0,9.0,23.0,0,0,...,0,0,0,0,0,0,1,0,0,0
3,20.0,21.0,34.0,5.0,3.0,27.0,11.0,4.0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,20.0,23.0,34.0,5.0,3.0,12.0,3.0,13.0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [84]:
Train_Y.head()

0    130.81
1     88.53
2     76.26
3     80.62
4     78.02
Name: y, dtype: float64

In [87]:
Test_x = MercedesTest_df
MercedesTest_df.head()

Unnamed: 0,X0,X1,X2,X3,X4,X5,X6,X8,X10,X12,...,X375,X376,X377,X378,X379,X380,X382,X383,X384,X385
0,20.0,23.0,34.0,5.0,3.0,-1.0,0.0,22.0,0,0,...,0,0,0,1,0,0,0,0,0,0
1,40.0,3.0,7.0,0.0,3.0,-1.0,6.0,24.0,0,0,...,0,0,1,0,0,0,0,0,0,0
2,20.0,23.0,16.0,5.0,3.0,-1.0,9.0,9.0,0,0,...,0,0,0,1,0,0,0,0,0,0
3,20.0,13.0,34.0,5.0,3.0,-1.0,11.0,13.0,0,0,...,0,0,0,1,0,0,0,0,0,0
4,43.0,20.0,16.0,2.0,3.0,28.0,8.0,12.0,0,0,...,1,0,0,0,0,0,0,0,0,0


4. Perform dimensionality reduction.(PCA)

In [88]:
pca = PCA(n_components=18, random_state = 42)
pca_x_Train = pca.fit_transform(Train_X)
pca_x_Test = pca.transform(Test_x)

5. Predict your test_df values using XGBoost.
- https://xgboost.readthedocs.io/en/stable/python/python_intro.html

In [89]:
x_train, x_valid, y_train, y_valid = train_test_split(pca_x_Train, Train_Y, test_size = 0.25, random_state = 42)

In [93]:
d_train = xgb.DMatrix(x_train, label = y_train)
d_valid = xgb.DMatrix(x_valid, label = y_valid)
d_test = xgb.DMatrix(pca_x_Test) 

In [94]:
param = {'max_depth': 2, 'eta': 1, 'objective': 'reg:linear'}
evallist = [(d_valid, 'eval'), (d_train, 'train')]
num_round = 100
XGBoost_ = xgb.train(param, d_train, num_round, evallist, early_stopping_rounds=50)

[0]	eval-rmse:10.58999	train-rmse:10.82918
[1]	eval-rmse:10.05508	train-rmse:9.95477
[2]	eval-rmse:9.71900	train-rmse:9.51677
[3]	eval-rmse:9.34894	train-rmse:9.22851
[4]	eval-rmse:9.35227	train-rmse:9.12727
[5]	eval-rmse:9.37568	train-rmse:9.04603
[6]	eval-rmse:9.37645	train-rmse:8.96451
[7]	eval-rmse:9.27684	train-rmse:8.61327
[8]	eval-rmse:9.24575	train-rmse:8.52157
[9]	eval-rmse:9.14158	train-rmse:8.45982
[10]	eval-rmse:9.15112	train-rmse:8.40827
[11]	eval-rmse:9.16343	train-rmse:8.36164
[12]	eval-rmse:9.22040	train-rmse:8.31906
[13]	eval-rmse:9.14978	train-rmse:8.26045
[14]	eval-rmse:9.17649	train-rmse:8.21232
[15]	eval-rmse:9.15358	train-rmse:8.18171
[16]	eval-rmse:9.19014	train-rmse:8.13815
[17]	eval-rmse:9.23107	train-rmse:8.09022
[18]	eval-rmse:9.23086	train-rmse:7.97912
[19]	eval-rmse:9.26097	train-rmse:7.94881
[20]	eval-rmse:9.21301	train-rmse:7.91087
[21]	eval-rmse:9.24285	train-rmse:7.87313
[22]	eval-rmse:9.23537	train-rmse:7.84013
[23]	eval-rmse:9.22831	train-rmse:7.82054

In [102]:
XGBoost_test = XGBoost_.predict(d_test)
Prediction = pd.DataFrame(XGBoost_test, columns = ['y'])
Prediction.head()

Unnamed: 0,y
0,86.83564
1,85.234184
2,74.288933
3,85.738914
4,109.63501
