In [1]:
import json
import lightgbm as lgb
import pandas as pd
from sklearn.metrics import mean_squared_error


In [2]:
# load or create your dataset
print('Load data...')
df_train = pd.read_csv('/Users/avkashchauhan/src/github.com/microsoft/LightGBM/examples/regression/regression.train', header=None, sep='\t')
df_test = pd.read_csv('/Users/avkashchauhan/src/github.com/microsoft/LightGBM/examples/regression/regression.test', header=None, sep='\t')


Load data...


In [4]:
df_train.shape

(7000, 29)

In [5]:
df_test.shape

(500, 29)

In [6]:
y_train = df_train[0]
y_test = df_test[0]
X_train = df_train.drop(0, axis=1)
X_test = df_test.drop(0, axis=1)


In [8]:
y_train.shape

(7000,)

In [10]:
X_train.shape

(7000, 28)

In [11]:
X_test.shape

(500, 28)

In [12]:
# create dataset for lightgbm
lgb_train = lgb.Dataset(X_train, y_train)
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)



In [18]:
# specify your configurations as a dict
params = {
    'task': 'train',
    'boosting_type': 'gbdt',
    'objective': 'regression',
    'metric': {'l2', 'auc'},
    'num_leaves': 31,
    'learning_rate': 0.05,
    'feature_fraction': 0.9,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'verbose': 0
}

In [19]:
params

{'bagging_fraction': 0.8,
 'bagging_freq': 5,
 'boosting_type': 'gbdt',
 'feature_fraction': 0.9,
 'learning_rate': 0.05,
 'metric': {'auc', 'l2'},
 'num_leaves': 31,
 'objective': 'regression',
 'task': 'train',
 'verbose': 0}

In [21]:
print('Start training...')
# train
gbm = lgb.train(params,
                lgb_train,
                num_boost_round=20,
                valid_sets=lgb_eval,
                early_stopping_rounds=5)


Start training...
[1]	valid_0's l2: 0.71477	valid_0's auc: 0.755458
Train until valid scores didn't improve in 5 rounds.
[2]	valid_0's l2: 0.69441	valid_0's auc: 0.745243
[3]	valid_0's l2: 0.675223	valid_0's auc: 0.769842
[4]	valid_0's l2: 0.656313	valid_0's auc: 0.780557
[5]	valid_0's l2: 0.639	valid_0's auc: 0.783927
[6]	valid_0's l2: 0.623967	valid_0's auc: 0.779936
[7]	valid_0's l2: 0.609924	valid_0's auc: 0.788127
[8]	valid_0's l2: 0.596133	valid_0's auc: 0.793279
[9]	valid_0's l2: 0.583548	valid_0's auc: 0.795351
[10]	valid_0's l2: 0.571882	valid_0's auc: 0.79544
[11]	valid_0's l2: 0.561026	valid_0's auc: 0.795271
[12]	valid_0's l2: 0.551058	valid_0's auc: 0.795279
[13]	valid_0's l2: 0.541681	valid_0's auc: 0.79436
[14]	valid_0's l2: 0.533475	valid_0's auc: 0.793319
[15]	valid_0's l2: 0.52547	valid_0's auc: 0.793586
Early stopping, best iteration is:
[10]	valid_0's l2: 0.571882	valid_0's auc: 0.79544


  booster = Booster(params=params, train_set=train_set)


In [22]:
print('Start predicting...')
# predict
y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)
# eval
print('The rmse of prediction is:', mean_squared_error(y_test, y_pred) ** 0.5)


Start predicting...
('The rmse of prediction is:', 0.57188207350333087)


  exec(code_obj, self.user_global_ns, self.user_ns)


In [29]:
print('Dump model to JSON as : lightgbm_model.json')
# dump model to json (and save to file)
model_json = gbm.dump_model()

with open('lightgbm_model.json', 'w+') as f:
    json.dump(model_json, f, indent=4)

print('Above lightgbm_model.json file is saved at your local file system, mostly where jupyter notebook started')    

Dump model to JSON as : lightgbm_model.json
Above lightgbm_model.json file is saved at your local file system, mostly where jupyter notebook started


In [26]:
print('Feature Importance Results:')
print('Feature names:', gbm.feature_name())
print('Calculate feature importances...')
# feature importances
print('Feature importances:', list(gbm.feature_importance()))

Feature Importance Results:
('Feature names:', [u'1', u'2', u'3', u'4', u'5', u'6', u'7', u'8', u'9', u'10', u'11', u'12', u'13', u'14', u'15', u'16', u'17', u'18', u'19', u'20', u'21', u'22', u'23', u'24', u'25', u'26', u'27', u'28'])
Calculate feature importances...
('Feature importances:', [10, 2, 2, 15, 3, 34, 1, 0, 2, 18, 0, 2, 1, 7, 3, 2, 0, 1, 5, 3, 1, 13, 36, 1, 25, 47, 34, 32])


In [28]:
print('Save model...')
# save model to file
gbm.save_model('lightgbm_model.txt')
print('Above lightgbm_model.txt file is saved at your local file system, mostly where jupyter notebook started')    

Save model...
Above lightgbm_model.txt file is saved at your local file system, mostly where jupyter notebook started
