## Predicting flight delays - LGBM Classifier


In [43]:
import numpy as np 
import pandas as pd 
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
import lightgbm as lgb
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [44]:
training = pd.read_csv('flights_lgbm.csv')
training = training.loc[np.random.choice(training.index, 1000, replace = False)]

In [45]:
training

Unnamed: 0,Month,DayofMonth,DayOfWeek,DepTime,UniqueCarrier,Origin,Dest,Distance,delayed_15min
21429,10,28,5,741,EV,ATL,GRR,640,N
18265,2,24,5,1532,B6,JFK,FLL,1069,N
47681,7,4,1,2118,CO,LAS,EWR,2227,Y
15578,4,25,1,1852,AA,ORD,AUS,978,N
31122,9,29,5,1039,US,PHL,RSW,992,Y
54792,11,30,3,2143,EV,ATL,LYH,389,N
86656,8,22,2,1706,B6,JFK,BUF,301,N
54040,6,11,7,1136,FL,SRQ,MDW,1035,N
22231,7,6,4,1420,OO,EUG,PDX,106,N
88753,4,26,2,1534,UA,SEA,LAX,954,N


In [46]:
print (training.dtypes)
print ("Number of columns in the flight data:"),len(training.dtypes)

Month             int64
DayofMonth        int64
DayOfWeek         int64
DepTime           int64
UniqueCarrier    object
Origin           object
Dest             object
Distance          int64
delayed_15min    object
dtype: object
Number of columns in the flight data:


(None, 9)

In [47]:
training['delayed_15min'] = training.delayed_15min.apply(lambda x: 1 if x == 'Y' else 0)
training = pd.get_dummies(training)

In [48]:
X_train, X_test, y_train, y_test = train_test_split(training.drop(['delayed_15min'],axis=1), training.delayed_15min, 
                                                    test_size=0.33, random_state=42, stratify=training.delayed_15min)


In [49]:
rf_clf = RandomForestClassifier(n_estimators=100, max_depth=10)
rf_clf.fit(X_train, y_train)
y_pred = rf_clf.predict(X_test)
print(accuracy_score(y_pred, y_test))
print(confusion_matrix(y_pred, y_test))
print(roc_auc_score(y_test, rf_clf.predict_proba(X_test)[:, 1]))

0.793939393939394
[[262  68]
 [  0   0]]
0.6793331836551414


In [50]:
gbm = lgb.LGBMClassifier(n_estimators=200,)
gbm.fit(X_train, y_train,
        eval_set=[(X_test, y_test)],
        eval_metric='l1',
        early_stopping_rounds=5)

[1]	valid_0's l1: 0.323239
Training until validation scores don't improve for 5 rounds.
[2]	valid_0's l1: 0.321373
[3]	valid_0's l1: 0.319694
[4]	valid_0's l1: 0.318059
[5]	valid_0's l1: 0.316283
[6]	valid_0's l1: 0.314382
[7]	valid_0's l1: 0.313875
[8]	valid_0's l1: 0.31229
[9]	valid_0's l1: 0.310903
[10]	valid_0's l1: 0.310246
[11]	valid_0's l1: 0.309057
[12]	valid_0's l1: 0.308552
[13]	valid_0's l1: 0.306863
[14]	valid_0's l1: 0.307382
[15]	valid_0's l1: 0.307044
[16]	valid_0's l1: 0.306467
[17]	valid_0's l1: 0.306608
[18]	valid_0's l1: 0.306105
[19]	valid_0's l1: 0.305524
[20]	valid_0's l1: 0.304988
[21]	valid_0's l1: 0.304874
[22]	valid_0's l1: 0.303708
[23]	valid_0's l1: 0.303048
[24]	valid_0's l1: 0.302823
[25]	valid_0's l1: 0.301645
[26]	valid_0's l1: 0.302461
[27]	valid_0's l1: 0.302565
[28]	valid_0's l1: 0.302754
[29]	valid_0's l1: 0.303152
[30]	valid_0's l1: 0.301897
Early stopping, best iteration is:
[25]	valid_0's l1: 0.301645


LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
        learning_rate=0.1, max_depth=-1, min_child_samples=20,
        min_child_weight=0.001, min_split_gain=0.0, n_estimators=200,
        n_jobs=-1, num_leaves=31, objective=None, random_state=None,
        reg_alpha=0.0, reg_lambda=0.0, silent=True, subsample=1.0,
        subsample_for_bin=200000, subsample_freq=0)

In [51]:
params = {'boosting_type': 'gbdt',
          'max_depth' : -1,
          'objective': 'binary',
          'nthread': 3,
          'num_leaves': 64,
          'learning_rate': 0.05,
          'max_bin': 512,
          'subsample_for_bin': 200,
          'subsample': 1,
          'subsample_freq': 1,
          'colsample_bytree': 0.8,
          'reg_alpha': 5,
          'reg_lambda': 10,
          'min_split_gain': 0.5,
          'min_child_weight': 1,
          'min_child_samples': 5,
          'scale_pos_weight': 1,
          'num_class' : 1,
          'metric' : 'binary_error'}

# Create parameters to search
gridParams = {
    'learning_rate': [0.5, 0.005],
    'n_estimators': [40, 200, 1000],
    'num_leaves': [5, 10, 15],
    'boosting_type' : ['gbdt'],
    'objective' : ['binary'],
    'random_state' : [501], 
    'colsample_bytree' : [0.66],
    'subsample' : [0.75],
    'reg_alpha' : [1,1.2],
    'reg_lambda' : [1,1.4],
    }

# Create classifier to use
mdl = lgb.LGBMClassifier(boosting_type= 'gbdt',
          objective = 'auc',
          n_jobs = 3, 
          silent = True,
          max_depth = params['max_depth'],
          max_bin = params['max_bin'],
          subsample_for_bin = params['subsample_for_bin'],
          subsample = params['subsample'],
          subsample_freq = params['subsample_freq'],
          min_split_gain = params['min_split_gain'],
          min_child_weight = params['min_child_weight'],
          min_child_samples = params['min_child_samples'],
          scale_pos_weight = params['scale_pos_weight'])




In [52]:


grid = GridSearchCV(mdl, gridParams,
                    verbose=3,
                    cv=4,
                    n_jobs=2)
# Run the grid
grid.fit(training.drop('delayed_15min', axis=1), training.delayed_15min)

Fitting 4 folds for each of 72 candidates, totalling 288 fits
[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=40, num_leaves=5, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1, subsample=0.75 
[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=40, num_leaves=5, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1, subsample=0.75 
[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=40, num_leaves=5, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1, subsample=0.75, score=0.796812749003984, total=   0.0s
[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=40, num_leaves=5, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1, subsample=0.75, score=0.768, total=   0.0s
[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=40, num_leaves=5, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1, s

[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=40, num_leaves=10, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1.4, subsample=0.75, score=0.7689243027888446, total=   0.0s
[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=40, num_leaves=10, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1.4, subsample=0.75 
[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=40, num_leaves=10, objective=binary, random_state=501, reg_alpha=1.2, reg_lambda=1, subsample=0.75, score=0.7729083665338645, total=   0.0s
[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=40, num_leaves=10, objective=binary, random_state=501, reg_alpha=1.2, reg_lambda=1, subsample=0.75 
[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=40, num_leaves=10, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1.4, subsample=0.75, score=0.772, tota

[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=40, num_leaves=15, objective=binary, random_state=501, reg_alpha=1.2, reg_lambda=1, subsample=0.75, score=0.776, total=   0.0s
[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=40, num_leaves=15, objective=binary, random_state=501, reg_alpha=1.2, reg_lambda=1, subsample=0.75 
[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=40, num_leaves=15, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1.4, subsample=0.75, score=0.7630522088353414, total=   0.0s
[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=40, num_leaves=15, objective=binary, random_state=501, reg_alpha=1.2, reg_lambda=1.4, subsample=0.75 
[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=40, num_leaves=15, objective=binary, random_state=501, reg_alpha=1.2, reg_lambda=1, subsample=0.75, score=0.7831325301204819, to

[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=200, num_leaves=10, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1, subsample=0.75, score=0.7569721115537849, total=   0.1s
[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=200, num_leaves=10, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1, subsample=0.75 
[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=200, num_leaves=5, objective=binary, random_state=501, reg_alpha=1.2, reg_lambda=1.4, subsample=0.75, score=0.78, total=   0.1s
[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=200, num_leaves=5, objective=binary, random_state=501, reg_alpha=1.2, reg_lambda=1.4, subsample=0.75 
[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=200, num_leaves=10, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1, subsample=0.75, score=0.756, total=   0.1s
[CV

[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=200, num_leaves=15, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1.4, subsample=0.75, score=0.7729083665338645, total=   0.1s
[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=200, num_leaves=15, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1.4, subsample=0.75 
[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=200, num_leaves=15, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1.4, subsample=0.75, score=0.78, total=   0.1s
[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=200, num_leaves=15, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1, subsample=0.75, score=0.7871485943775101, total=   0.1s
[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=200, num_leaves=15, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1.4, subs

[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=1000, num_leaves=5, objective=binary, random_state=501, reg_alpha=1.2, reg_lambda=1, subsample=0.75, score=0.7848605577689243, total=   0.1s
[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=1000, num_leaves=5, objective=binary, random_state=501, reg_alpha=1.2, reg_lambda=1, subsample=0.75 


[Parallel(n_jobs=2)]: Done 100 tasks      | elapsed:    3.4s


[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=1000, num_leaves=5, objective=binary, random_state=501, reg_alpha=1.2, reg_lambda=1.4, subsample=0.75, score=0.7888446215139442, total=   0.2s
[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=1000, num_leaves=5, objective=binary, random_state=501, reg_alpha=1.2, reg_lambda=1, subsample=0.75, score=0.768, total=   0.2s
[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=1000, num_leaves=5, objective=binary, random_state=501, reg_alpha=1.2, reg_lambda=1, subsample=0.75 
[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=1000, num_leaves=5, objective=binary, random_state=501, reg_alpha=1.2, reg_lambda=1.4, subsample=0.75 
[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=1000, num_leaves=5, objective=binary, random_state=501, reg_alpha=1.2, reg_lambda=1.4, subsample=0.75, score=0.764, total=

[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=1000, num_leaves=10, objective=binary, random_state=501, reg_alpha=1.2, reg_lambda=1, subsample=0.75, score=0.7951807228915663, total=   0.2s
[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=1000, num_leaves=15, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1, subsample=0.75 
[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=1000, num_leaves=10, objective=binary, random_state=501, reg_alpha=1.2, reg_lambda=1.4, subsample=0.75, score=0.7831325301204819, total=   0.2s
[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=1000, num_leaves=15, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1.4, subsample=0.75 
[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.5, n_estimators=1000, num_leaves=15, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1, subsample=0.75, score=0.7

[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=40, num_leaves=5, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1, subsample=0.75, score=0.7951807228915663, total=   0.0s
[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=40, num_leaves=5, objective=binary, random_state=501, reg_alpha=1.2, reg_lambda=1, subsample=0.75 
[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=40, num_leaves=5, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1.4, subsample=0.75, score=0.7928286852589641, total=   0.0s
[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=40, num_leaves=5, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1.4, subsample=0.75 
[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=40, num_leaves=5, objective=binary, random_state=501, reg_alpha=1.2, reg_lambda=1, subsample=0.75, score=0.792828

[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=40, num_leaves=10, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1.4, subsample=0.75, score=0.796, total=   0.0s
[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=40, num_leaves=10, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1.4, subsample=0.75 
[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=40, num_leaves=10, objective=binary, random_state=501, reg_alpha=1.2, reg_lambda=1, subsample=0.75, score=0.796, total=   0.0s
[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=40, num_leaves=10, objective=binary, random_state=501, reg_alpha=1.2, reg_lambda=1, subsample=0.75 
[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=40, num_leaves=10, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1.4, subsample=0.75, score=0.7951807228915663, total= 

[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=200, num_leaves=5, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1, subsample=0.75 
[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=40, num_leaves=15, objective=binary, random_state=501, reg_alpha=1.2, reg_lambda=1.4, subsample=0.75, score=0.796, total=   0.0s
[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=40, num_leaves=15, objective=binary, random_state=501, reg_alpha=1.2, reg_lambda=1.4, subsample=0.75 
[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=200, num_leaves=5, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1, subsample=0.75, score=0.7928286852589641, total=   0.1s
[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=200, num_leaves=5, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1, subsample=0.75 
[CV]  boosting_type=gbd

[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=200, num_leaves=10, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1, subsample=0.75 
[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=200, num_leaves=10, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1.4, subsample=0.75, score=0.7928286852589641, total=   0.1s
[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=200, num_leaves=10, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1.4, subsample=0.75 
[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=200, num_leaves=10, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1, subsample=0.75, score=0.796, total=   0.1s
[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=200, num_leaves=10, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1, subsample=0.75 
[CV]  boosting_type=gb

[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=200, num_leaves=15, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1.4, subsample=0.75 
[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=200, num_leaves=15, objective=binary, random_state=501, reg_alpha=1.2, reg_lambda=1, subsample=0.75, score=0.7928286852589641, total=   0.1s
[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=200, num_leaves=15, objective=binary, random_state=501, reg_alpha=1.2, reg_lambda=1, subsample=0.75 
[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=200, num_leaves=15, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1.4, subsample=0.75, score=0.7951807228915663, total=   0.1s
[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=200, num_leaves=15, objective=binary, random_state=501, reg_alpha=1.2, reg_lambda=1.4, subsample=0.75 
[

[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=1000, num_leaves=5, objective=binary, random_state=501, reg_alpha=1.2, reg_lambda=1.4, subsample=0.75, score=0.772, total=   0.2s
[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=1000, num_leaves=5, objective=binary, random_state=501, reg_alpha=1.2, reg_lambda=1.4, subsample=0.75 
[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=1000, num_leaves=5, objective=binary, random_state=501, reg_alpha=1.2, reg_lambda=1, subsample=0.75, score=0.792, total=   0.2s
[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=1000, num_leaves=5, objective=binary, random_state=501, reg_alpha=1.2, reg_lambda=1, subsample=0.75 
[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=1000, num_leaves=5, objective=binary, random_state=501, reg_alpha=1.2, reg_lambda=1.4, subsample=0.75, score=0.796, total=   

[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=1000, num_leaves=15, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1.4, subsample=0.75 
[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=1000, num_leaves=15, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1, subsample=0.75, score=0.8047808764940239, total=   0.4s
[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=1000, num_leaves=15, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1, subsample=0.75 
[CV]  boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=1000, num_leaves=15, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1.4, subsample=0.75, score=0.8047808764940239, total=   0.3s
[CV] boosting_type=gbdt, colsample_bytree=0.66, learning_rate=0.005, n_estimators=1000, num_leaves=15, objective=binary, random_state=501, reg_alpha=1, reg_lambda=1.4, subsample=0.75 
[C

[Parallel(n_jobs=2)]: Done 288 out of 288 | elapsed:   19.3s finished


GridSearchCV(cv=4, error_score='raise',
       estimator=LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
        learning_rate=0.1, max_bin=512, max_depth=-1, min_child_samples=5,
        min_child_weight=1, min_split_gain=0.5, n_estimators=100, n_jobs=3,
        num_leaves=31, objective='auc', random_state=None, reg_alpha=0.0,
        reg_lambda=0.0, scale_pos_weight=1, silent=True, subsample=1,
        subsample_for_bin=200, subsample_freq=1),
       fit_params=None, iid=True, n_jobs=2,
       param_grid={'learning_rate': [0.5, 0.005], 'n_estimators': [40, 200, 1000], 'num_leaves': [5, 10, 15], 'boosting_type': ['gbdt'], 'objective': ['binary'], 'random_state': [501], 'colsample_bytree': [0.66], 'subsample': [0.75], 'reg_alpha': [1, 1.2], 'reg_lambda': [1, 1.4]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=3)

In [53]:
gbm = lgb.LGBMClassifier(**grid.best_params_)
gbm.fit(X_train, y_train,
        eval_set=[(X_test, y_test)],
        eval_metric='l1',
        early_stopping_rounds=5)

[1]	valid_0's l1: 0.326176
Training until validation scores don't improve for 5 rounds.
[2]	valid_0's l1: 0.326037
[3]	valid_0's l1: 0.326057
[4]	valid_0's l1: 0.326045
[5]	valid_0's l1: 0.325931
[6]	valid_0's l1: 0.325798
[7]	valid_0's l1: 0.325666
[8]	valid_0's l1: 0.325558
[9]	valid_0's l1: 0.325557
[10]	valid_0's l1: 0.32543
[11]	valid_0's l1: 0.325285
[12]	valid_0's l1: 0.325165
[13]	valid_0's l1: 0.325051
[14]	valid_0's l1: 0.324927
[15]	valid_0's l1: 0.324781
[16]	valid_0's l1: 0.324683
[17]	valid_0's l1: 0.32466
[18]	valid_0's l1: 0.324541
[19]	valid_0's l1: 0.324409
[20]	valid_0's l1: 0.324271
[21]	valid_0's l1: 0.324134
[22]	valid_0's l1: 0.32399
[23]	valid_0's l1: 0.323857
[24]	valid_0's l1: 0.323863
[25]	valid_0's l1: 0.323744
[26]	valid_0's l1: 0.323725
[27]	valid_0's l1: 0.323706
[28]	valid_0's l1: 0.323556
[29]	valid_0's l1: 0.323548
[30]	valid_0's l1: 0.323432
[31]	valid_0's l1: 0.323296
[32]	valid_0's l1: 0.323168
[33]	valid_0's l1: 0.323054
[34]	valid_0's l1: 0.323004

LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=0.66,
        learning_rate=0.005, max_depth=-1, min_child_samples=20,
        min_child_weight=0.001, min_split_gain=0.0, n_estimators=1000,
        n_jobs=-1, num_leaves=10, objective='binary', random_state=501,
        reg_alpha=1, reg_lambda=1, silent=True, subsample=0.75,
        subsample_for_bin=200000, subsample_freq=0)

In [54]:
gbm = lgb.LGBMClassifier(**grid.best_params_)
gbm.fit(training.drop('delayed_15min', axis=1), training.delayed_15min)

LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=0.66,
        learning_rate=0.005, max_depth=-1, min_child_samples=20,
        min_child_weight=0.001, min_split_gain=0.0, n_estimators=1000,
        n_jobs=-1, num_leaves=10, objective='binary', random_state=501,
        reg_alpha=1, reg_lambda=1, silent=True, subsample=0.75,
        subsample_for_bin=200000, subsample_freq=0)