In [1]:
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')
warnings.filterwarnings('ignore', category=DeprecationWarning)

import pandas as pd
pd.options.display.max_columns = 100

from matplotlib import pyplot as plt
import numpy as np

import seaborn as sns
sns.set(rc={'figure.figsize':(12,9)})
import pylab as plot
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss,accuracy_score
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier

In [2]:
from xgboost import XGBClassifier
import xgboost as xgb

In [3]:
#for scaling
from sklearn.preprocessing import StandardScaler

Importing training dataset

In [4]:
data = pd.read_csv('./train.csv')

In [5]:
print(data.shape)

(867873, 10)


In [6]:
data.columns

Index(['Dates', 'Category', 'Descript', 'DayOfWeek', 'PdDistrict',
       'Resolution', 'Address', 'X', 'Y', 'Id'],
      dtype='object')

Splitting the "Dates" into date and time

In [7]:
df=data.head(18000)
datetime=df.Dates.str.split(pat=" ",expand=True)
datetime.columns=['Date','Time']
#datetime

In [8]:
Date=datetime.Date.str.split(pat="-",expand=True)
Date.columns=['Year','Month','Day']

Time=datetime.Time.str.split(pat=":",expand=True)
Time.columns=['Hour','Minute','Second']

Dropping the column of "Dates" and appending the dataframe "datetime"

In [9]:
df=pd.concat([df,Date,Time],axis=1)
#df

In [10]:
df=df.drop(labels=['Dates'],axis=1)

# #Encoding

In [11]:
df.columns

Index(['Category', 'Descript', 'DayOfWeek', 'PdDistrict', 'Resolution',
       'Address', 'X', 'Y', 'Id', 'Year', 'Month', 'Day', 'Hour', 'Minute',
       'Second'],
      dtype='object')

LabelEncoding of "DayOfWeek" and "PdDistrict"

In [12]:
le = preprocessing.LabelEncoder()

In [13]:
#y=pd.get_dummies(df.Category,columns=['Category'],prefix=" ",prefix_sep=" ",drop_first=True,)

le_res=le.fit_transform(df['Category'])
y=pd.DataFrame(le_res)
y.columns=['Category']
#y

# Rotation 45 degree

In [14]:
df['Y']=df['Y'].apply(lambda x : x if 37.82 > x else 37.82)
df['X']=df['X'].apply(lambda x : x if -122.3 > x else 122.3)

In [15]:
df["rot45_X"]=0.707 * df["Y"] + 0.707 * df["X"]
df["rot45_Y"]=0.707 * df["Y"] - 0.707 * df["X"]

In [16]:
df["radial45"]=np.sqrt(np.power(df['rot45_X'],2) + np.power(df['rot45_Y'],2))

# Rotation 30 degree

In [17]:
df["rot30_X"]=(1.732/2) * df["Y"] + 0.5 * df["X"]
df["rot30_Y"]=(1.732/2) * df["Y"] - 0.5 * df["X"]


In [18]:
df["radial30"]=np.sqrt(np.power(df['rot30_X'],2) + np.power(df['rot30_Y'],2))

# Rotation 60 degress

In [19]:
df["rot60_X"]=(0.5) * df["Y"] + (1.732/2) * df["X"]
df["rot60_Y"]=0.5 * df["Y"] - (1.732/2) * df["X"]


In [20]:
df["radial60"]=np.sqrt(np.power(df['rot60_X'],2) + np.power(df['rot60_Y'],2))

# Hours clubbing

In [21]:
df['Hour']=df['Hour'].apply(lambda x:int(x))
#df['Hour']=df['Hour'].apply(lambda x : 'EARLY_Morning' if (x >= 1) and (x <=5) else ('MORNING' if x >= 6 and x <=11 else ('DAY' if x>=12 and x<=18 else 'Night')))
df['Hour']=df['Hour'].apply(lambda x : 'EARLY_MORNING' if x in (1,2,3,4,5) else ('MORNING' if x in (6,7,8,9,10,11) else ('DAY' if x in (12,13,14,15,16,17,18) else 'NIGHT')))

# Month Clubbing

In [22]:
df['Month']=df['Month'].apply(lambda x:int(x))
df['Month']=df['Month'].apply(lambda x : 'MonthLow' if x== 12 else ('MonthMed' if x in (2,6,7,8,9,11) else 'MonthHigh'))

# Minute Clubbing 

In [23]:
df['Minute']=df['Minute'].apply(lambda x:int(x))
df['Minute']=df['Minute'].apply(lambda x : 'low' if x <31 else 'high')

In [24]:
df.head()

Unnamed: 0,Category,Descript,DayOfWeek,PdDistrict,Resolution,Address,X,Y,Id,Year,Month,Day,Hour,Minute,Second,rot45_X,rot45_Y,radial45,rot30_X,rot30_Y,radial30,rot60_X,rot60_Y,radial60
0,SEX OFFENSES FORCIBLE,"FORCIBLE RAPE, BODILY FORCE",Friday,MISSION,NONE,2100 Block of MISSION ST,-122.419331,37.762264,141546,2013,MonthMed,28,DAY,high,0,-59.852546,113.248388,128.09186,-28.507545,93.911786,98.143282,-87.134009,124.896273,152.287276
1,LIQUOR LAWS,CONSUMING ALCOHOL IN PUBLIC VIEW,Thursday,SOUTHERN,"ARREST, BOOKED",1000 Block of MARKET ST,-122.41134,37.781271,794152,2004,MonthMed,19,EARLY_MORNING,high,0,-59.833459,113.256175,128.089828,-28.487089,93.92425,98.149269,-87.117585,124.898855,152.279998
2,FRAUD,"CREDIT CARD, THEFT BY USE OF",Wednesday,SOUTHERN,NONE,800 Block of BRYANT ST,-122.403405,37.775421,531205,2007,MonthMed,14,NIGHT,low,0,-59.831985,113.24643,128.080522,-28.488188,93.915217,98.140944,-87.113638,124.889059,152.269705
3,ROBBERY,ROBBERY OF A CHAIN STORE WITH A GUN,Thursday,BAYVIEW,DISTRICT ATTORNEY REFUSES TO PROSECUTE,2400 Block of SAN BRUNO AV,-122.404715,37.730161,523137,2007,MonthLow,27,DAY,low,0,-59.86491,113.215357,128.068437,-28.528038,93.876677,98.115643,-87.137403,124.867564,152.265674
4,OTHER OFFENSES,PROBATION VIOLATION,Sunday,SOUTHERN,"ARREST, BOOKED",4TH ST / STEVENSON ST,-122.405239,37.785265,200968,2012,MonthMed,9,DAY,low,0,-59.826321,113.254686,128.085177,-28.48058,93.924659,98.147771,-87.110304,124.895569,152.273137


# Address splitting

In [25]:
address=pd.DataFrame(df['Address'],columns=['Address'])
address=address.Address.str.split(pat=" /",expand=True )

address.columns=['Address','Intr2']

address=address.Address.str.split(pat=" /",expand=True )
address.columns=['Address']

In [26]:
address.head()

Unnamed: 0,Address
0,2100 Block of MISSION ST
1,1000 Block of MARKET ST
2,800 Block of BRYANT ST
3,2400 Block of SAN BRUNO AV
4,4TH ST


In [27]:
string=address.iloc[:,0]
string=string.str.strip()

In [28]:
address_fram=string.to_frame()
temp=address_fram['Address'].astype(str).str[-2:]

In [29]:
temp.unique()

array(['ST', 'AV', 'LN', 'DR', 'BL', 'HY', 'CT', 'RD', 'PL', 'PZ', '80',
       'TR', 'WY', 'AL', 'AY', 'ER', 'MS', 'CR', 'TI'], dtype=object)

In [30]:
address=temp.to_frame()

("ST","AV","LN","DR","BL","HY","CT","RD","PL","PZ","TR","AL","CR","WK","EX","RW")

In [31]:
address['Address']=address['Address'].apply(lambda x :( x if x in ("ST","AV","LN","DR","BL","HY","CT","RD","PL","PZ","TR","AL","CR","WK","EX","RW") else (("I-80" if x in ("80") else ("HWY" if x in ("WY") else ("WAY" if x in ("AY") else ("TER" if x in ("ER") else ("ALMS" if x in ("MS") else ("MAR" if x in ("AR") else ("PARK" if x in ("RK") else ("STWY" if x in ("WY") else ("VIA" if x in ("NO") else ("BLOCK")))))))))))))

In [32]:
#address.to_csv("Address.csv", float_format = '%.5F')

In [33]:
df=df.drop(labels=['Address'],axis=1)

Concatenating "df" and "address"

In [34]:
df=pd.concat([address,df],axis=1)

In [35]:
Id=df['Id']
df=df.drop(['Descript','Category','Resolution','Id','Second'],axis=1)

In [36]:
df.head()

Unnamed: 0,Address,DayOfWeek,PdDistrict,X,Y,Year,Month,Day,Hour,Minute,rot45_X,rot45_Y,radial45,rot30_X,rot30_Y,radial30,rot60_X,rot60_Y,radial60
0,ST,Friday,MISSION,-122.419331,37.762264,2013,MonthMed,28,DAY,high,-59.852546,113.248388,128.09186,-28.507545,93.911786,98.143282,-87.134009,124.896273,152.287276
1,ST,Thursday,SOUTHERN,-122.41134,37.781271,2004,MonthMed,19,EARLY_MORNING,high,-59.833459,113.256175,128.089828,-28.487089,93.92425,98.149269,-87.117585,124.898855,152.279998
2,ST,Wednesday,SOUTHERN,-122.403405,37.775421,2007,MonthMed,14,NIGHT,low,-59.831985,113.24643,128.080522,-28.488188,93.915217,98.140944,-87.113638,124.889059,152.269705
3,AV,Thursday,BAYVIEW,-122.404715,37.730161,2007,MonthLow,27,DAY,low,-59.86491,113.215357,128.068437,-28.528038,93.876677,98.115643,-87.137403,124.867564,152.265674
4,ST,Sunday,SOUTHERN,-122.405239,37.785265,2012,MonthMed,9,DAY,low,-59.826321,113.254686,128.085177,-28.48058,93.924659,98.147771,-87.110304,124.895569,152.273137


Label Encoded Columns :PdDistrict ,Address ,X ,Y

In [37]:
data['PdDistrict'].unique()

array(['MISSION', 'SOUTHERN', 'BAYVIEW', 'CENTRAL', 'INGLESIDE',
       'NORTHERN', 'RICHMOND', 'TARAVAL', 'TENDERLOIN', 'PARK'],
      dtype=object)

# Label Encoding

In [38]:
le = preprocessing.LabelEncoder()

"DayOf Week"

In [39]:
"""le_res=le.fit_transform(df['DayOfWeek'])
Day=pd.DataFrame(le_res)
Day.columns=['DayOfWeek']
df=df.drop(labels=['DayOfWeek'],axis=1)
df=pd.concat([Day,df],axis=1)

"""

"le_res=le.fit_transform(df['DayOfWeek'])\nDay=pd.DataFrame(le_res)\nDay.columns=['DayOfWeek']\ndf=df.drop(labels=['DayOfWeek'],axis=1)\ndf=pd.concat([Day,df],axis=1)\n\n"

"PdDistrict"

In [40]:
"""
#le = preprocessing.LabelEncoder()
le_res=le.fit_transform(df['PdDistrict'])
District=pd.DataFrame(le_res)

#District=pd.get_dummies(df['PdDistrict'],drop_first=True)
District.columns=['District']
df=df.drop(labels=['PdDistrict'],axis=1)
df=pd.concat([District,df],axis=1)

"""

"\n#le = preprocessing.LabelEncoder()\nle_res=le.fit_transform(df['PdDistrict'])\nDistrict=pd.DataFrame(le_res)\n\n#District=pd.get_dummies(df['PdDistrict'],drop_first=True)\nDistrict.columns=['District']\ndf=df.drop(labels=['PdDistrict'],axis=1)\ndf=pd.concat([District,df],axis=1)\n\n"

"Address"

In [41]:
"""
le_res=le.fit_transform(df['Address'])
Address=pd.DataFrame(le_res)
Address.columns=['Address']
df=df.drop(labels=['Address'],axis=1)
df=pd.concat([Address,df],axis=1)

"""

"\nle_res=le.fit_transform(df['Address'])\nAddress=pd.DataFrame(le_res)\nAddress.columns=['Address']\ndf=df.drop(labels=['Address'],axis=1)\ndf=pd.concat([Address,df],axis=1)\n\n"

"Year"

In [42]:
"""
#le = preprocessing.LabelEncoder()
le_res=le.fit_transform(df['Year'])
Year=pd.DataFrame(le_res)

#District=pd.get_dummies(df['PdDistrict'],drop_first=True)
Year.columns=['Year']
df=df.drop(labels=['Year'],axis=1)
df=pd.concat([Year,df],axis=1)

"""

"\n#le = preprocessing.LabelEncoder()\nle_res=le.fit_transform(df['Year'])\nYear=pd.DataFrame(le_res)\n\n#District=pd.get_dummies(df['PdDistrict'],drop_first=True)\nYear.columns=['Year']\ndf=df.drop(labels=['Year'],axis=1)\ndf=pd.concat([Year,df],axis=1)\n\n"

"Month"

In [43]:
"""
#le = preprocessing.LabelEncoder()
le_res=le.fit_transform(df['Month'])
Month=pd.DataFrame(le_res)

#District=pd.get_dummies(df['PdDistrict'],drop_first=True)
Month.columns=['Month']
df=df.drop(labels=['Month'],axis=1)
df=pd.concat([Month,df],axis=1)

"""

"\n#le = preprocessing.LabelEncoder()\nle_res=le.fit_transform(df['Month'])\nMonth=pd.DataFrame(le_res)\n\n#District=pd.get_dummies(df['PdDistrict'],drop_first=True)\nMonth.columns=['Month']\ndf=df.drop(labels=['Month'],axis=1)\ndf=pd.concat([Month,df],axis=1)\n\n"

"Day"

In [44]:
"""
#le = preprocessing.LabelEncoder()
le_res=le.fit_transform(df['Day'])
Day=pd.DataFrame(le_res)

#District=pd.get_dummies(df['PdDistrict'],drop_first=True)
Day.columns=['Day']
df=df.drop(labels=['Day'],axis=1)
df=pd.concat([Day,df],axis=1)

"""

"\n#le = preprocessing.LabelEncoder()\nle_res=le.fit_transform(df['Day'])\nDay=pd.DataFrame(le_res)\n\n#District=pd.get_dummies(df['PdDistrict'],drop_first=True)\nDay.columns=['Day']\ndf=df.drop(labels=['Day'],axis=1)\ndf=pd.concat([Day,df],axis=1)\n\n"

"Hour"

In [45]:
"""
#le = preprocessing.LabelEncoder()
le_res=le.fit_transform(df['Hour'])
Hour=pd.DataFrame(le_res)

#District=pd.get_dummies(df['PdDistrict'],drop_first=True)
Hour.columns=['Hour']
df=df.drop(labels=['Hour'],axis=1)
df=pd.concat([Hour,df],axis=1)

"""

"\n#le = preprocessing.LabelEncoder()\nle_res=le.fit_transform(df['Hour'])\nHour=pd.DataFrame(le_res)\n\n#District=pd.get_dummies(df['PdDistrict'],drop_first=True)\nHour.columns=['Hour']\ndf=df.drop(labels=['Hour'],axis=1)\ndf=pd.concat([Hour,df],axis=1)\n\n"

"Minute"

In [46]:
"""
#le = preprocessing.LabelEncoder()
le_res=le.fit_transform(df['Minute'])
Minute=pd.DataFrame(le_res)

#District=pd.get_dummies(df['PdDistrict'],drop_first=True)
Minute.columns=['Minute']
df=df.drop(labels=['Minute'],axis=1)
df=pd.concat([Minute,df],axis=1)
"""

"\n#le = preprocessing.LabelEncoder()\nle_res=le.fit_transform(df['Minute'])\nMinute=pd.DataFrame(le_res)\n\n#District=pd.get_dummies(df['PdDistrict'],drop_first=True)\nMinute.columns=['Minute']\ndf=df.drop(labels=['Minute'],axis=1)\ndf=pd.concat([Minute,df],axis=1)\n"

# One Hot Encoding

In [47]:
df=pd.get_dummies(df,columns=['Address', 'DayOfWeek', 'PdDistrict','Year','Month','Day','Hour','Minute'],drop_first=True)

In [48]:
df.shape

(20000, 92)

# Feature Scaling(Not used YET)

In [49]:

scaler=StandardScaler(copy=True, with_mean=True, with_std=True)
lt_res=scaler.fit_transform([df['X']])
Longitude=pd.DataFrame(lt_res,)



36 Crime Category are there 

# Applying different Models

In [50]:
#Independent Column
X=df
X.shape

(20000, 92)

In [51]:
#Dependent column
y.shape

(20000, 1)

In [52]:
#splitting the dataset into the training set and test set
#from sklearn import model_selection as ms

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.33,random_state=0)

In [53]:
"""from sklearn.preprocessing import StandardScaler
sc_X=StandardScaler()
X_train=sc_X.fit_tranform(X_train)
X_test=sc_X.transform(X_test)"""

'from sklearn.preprocessing import StandardScaler\nsc_X=StandardScaler()\nX_train=sc_X.fit_tranform(X_train)\nX_test=sc_X.transform(X_test)'

# XGBoost Model

In [54]:
X.head()

Unnamed: 0,X,Y,rot45_X,rot45_Y,radial45,rot30_X,rot30_Y,radial30,rot60_X,rot60_Y,radial60,Address_ALMS,Address_AV,Address_BL,Address_BLOCK,Address_CR,Address_CT,Address_DR,Address_HWY,Address_HY,Address_I-80,Address_LN,Address_PL,Address_PZ,Address_RD,Address_ST,Address_TER,Address_TR,Address_WAY,DayOfWeek_Monday,DayOfWeek_Saturday,DayOfWeek_Sunday,DayOfWeek_Thursday,DayOfWeek_Tuesday,DayOfWeek_Wednesday,PdDistrict_CENTRAL,PdDistrict_INGLESIDE,PdDistrict_MISSION,PdDistrict_NORTHERN,PdDistrict_PARK,PdDistrict_RICHMOND,PdDistrict_SOUTHERN,PdDistrict_TARAVAL,PdDistrict_TENDERLOIN,Year_2004,Year_2005,Year_2006,Year_2007,Year_2008,Year_2009,Year_2010,Year_2011,Year_2012,Year_2013,Year_2014,Year_2015,Month_MonthLow,Month_MonthMed,Day_02,Day_03,Day_04,Day_05,Day_06,Day_07,Day_08,Day_09,Day_10,Day_11,Day_12,Day_13,Day_14,Day_15,Day_16,Day_17,Day_18,Day_19,Day_20,Day_21,Day_22,Day_23,Day_24,Day_25,Day_26,Day_27,Day_28,Day_29,Day_30,Day_31,Hour_EARLY_MORNING,Hour_MORNING,Hour_NIGHT,Minute_low
0,-122.419331,37.762264,-59.852546,113.248388,128.09186,-28.507545,93.911786,98.143282,-87.134009,124.896273,152.287276,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
1,-122.41134,37.781271,-59.833459,113.256175,128.089828,-28.487089,93.92425,98.149269,-87.117585,124.898855,152.279998,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
2,-122.403405,37.775421,-59.831985,113.24643,128.080522,-28.488188,93.915217,98.140944,-87.113638,124.889059,152.269705,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1
3,-122.404715,37.730161,-59.86491,113.215357,128.068437,-28.528038,93.876677,98.115643,-87.137403,124.867564,152.265674,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1
4,-122.405239,37.785265,-59.826321,113.254686,128.085177,-28.48058,93.924659,98.147771,-87.110304,124.895569,152.273137,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1


In [55]:
model = xgb.XGBClassifier(objective='multi:softprob')
model.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
       n_jobs=1, nthread=None, objective='multi:softprob', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1)

In [56]:
y_pred=model.predict_proba(X_test)

In [57]:
print (log_loss(y_test,y_pred));

2.5165919008444657


Early Stopping in XGBoost

In [37]:
model = XGBClassifier(objective='multi:softprob')
eval_set = [(X_test, y_test)]
model.fit(X_train, y_train, early_stopping_rounds=10, eval_metric="mlogloss", eval_set=eval_set, verbose=True)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[0]	validation_0-mlogloss:3.42722
Will train until validation_0-mlogloss hasn't improved in 10 rounds.
[1]	validation_0-mlogloss:3.31607
[2]	validation_0-mlogloss:3.22979
[3]	validation_0-mlogloss:3.15756
[4]	validation_0-mlogloss:3.0969
[5]	validation_0-mlogloss:3.04489
[6]	validation_0-mlogloss:2.99891
[7]	validation_0-mlogloss:2.9586
[8]	validation_0-mlogloss:2.92274
[9]	validation_0-mlogloss:2.89069
[10]	validation_0-mlogloss:2.86159
[11]	validation_0-mlogloss:2.83499
[12]	validation_0-mlogloss:2.811
[13]	validation_0-mlogloss:2.78906
[14]	validation_0-mlogloss:2.76912
[15]	validation_0-mlogloss:2.75094
[16]	validation_0-mlogloss:2.73411
[17]	validation_0-mlogloss:2.71827
[18]	validation_0-mlogloss:2.70373
[19]	validation_0-mlogloss:2.69066
[20]	validation_0-mlogloss:2.67818
[21]	validation_0-mlogloss:2.66666
[22]	validation_0-mlogloss:2.65626
[23]	validation_0-mlogloss:2.64634
[24]	validation_0-mlogloss:2.63736
[25]	validation_0-mlogloss:2.62887
[26]	validation_0-mlogloss:2.621
[2

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
       n_jobs=1, nthread=None, objective='multi:softprob', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1)

PLot of Important Feature

In [None]:
# plot feature importance
xgb.plot_importance(model)
