In [1]:
import pandas as pd
import numpy as np
import lightgbm as lgb

from sklearn.model_selection import train_test_split

In [35]:
def _stablesoftmax(x):
    """Compute the softmax of vector x in a numerically stable way."""
    shiftx = x - np.max(x, axis=1, keepdims=True)
    exps = np.exp(shiftx)
    return exps / exps.sum(axis=1)[:, None]

In [36]:
_stablesoftmax(x=a)

array([[0.5       , 0.5       ],
       [0.26894142, 0.73105858],
       [0.26894142, 0.73105858],
       [0.26894142, 0.73105858]])

In [28]:
a - np.max(a, axis=1, keepdims=True)

array([[ 0,  0],
       [-1,  0],
       [-1,  0],
       [-1,  0]])

In [38]:
a = []
a.append([0, 1, 2, 3])
a.append([0, 2, 3, 4])
a = np.array(a).T
a.T[1]

array([0, 2, 3, 4])

In [2]:
swissmetro = pd.read_table('Data/swissmetro.dat')
swissmetro = swissmetro[swissmetro['CHOICE']!=0]

In [3]:
new_train_co = swissmetro['TRAIN_CO'] * (swissmetro['GA']==0)
new_sm_co = swissmetro['SM_CO'] * (swissmetro['GA']==0)

In [4]:
feature = swissmetro[['TRAIN_TT', 'TRAIN_CO', 'TRAIN_HE', 'SM_TT', 'SM_CO', 'SM_HE', 'CAR_TT', 'CAR_CO']]

In [5]:
feature['TRAIN_CO'] = new_train_co
feature['SM_CO'] = new_sm_co

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  feature['TRAIN_CO'] = new_train_co
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  feature['SM_CO'] = new_sm_co


In [6]:
choice = swissmetro[['CHOICE']] -1

In [7]:
X_train, X_test, y_train, y_test = train_test_split(feature, choice, test_size=0.2, random_state = 42)

In [8]:
param = {'max_depth': 1, 
         'num_iterations': 10, 
         'objective':'multiclass',
         'monotone_constraints': [-1, -1, -1, -1, -1, -1, -1, -1], 
         'interaction_constraints': [[0], [1], [2], [3], [4], [5], [6], [7]],
         'learning_rate': 0.3,
         'verbosity': 2,
         'num_classes': 3
        }

In [10]:
train_data = lgb.Dataset(X_train, label=y_train, free_raw_data=False)
validation_data = lgb.Dataset(X_test, label=y_test, reference= train_data, free_raw_data=False)

In [11]:
lightgbm_1 = lgb.train(param, train_data, valid_sets=[validation_data])

[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.074344
[LightGBM] [Debug] init for col-wise cost 0.000320 seconds, init for row-wise cost 0.000887 seconds
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Debug] Using Dense Multi-Val Bin
[LightGBM] [Info] Total Bins 1120
[LightGBM] [Info] Number of data points in the train set: 8575, number of used features: 8
[LightGBM] [Info] Start training from score -2.003021
[LightGBM] [Info] Start training from score -0.541616
[LightGBM] [Info] Start training from score -1.261371
[LightGBM] [Debug] Trained a tree with leaves = 2 and depth = 1
[LightGBM] [Debug] Trained a tree with leaves = 2 and depth = 1
[LightGBM] [Debug] Trained a tree with leaves = 2 and depth = 1
[1]	valid_0's multi_logloss: 0.919163
[LightGBM] [Debug] Trained a tree with leaves = 2 and depth = 1
[LightGBM] [Debug] Trained a tree with leaves = 2 and depth = 1
[LightGBM] 



In [12]:
lightgbm_1.predict(X_test)

array([[0.06272939, 0.60791523, 0.32935537],
       [0.08434972, 0.54909638, 0.3665539 ],
       [0.38008255, 0.463165  , 0.15675245],
       ...,
       [0.12663631, 0.54760363, 0.32576006],
       [0.12199224, 0.61827167, 0.25973609],
       [0.21503035, 0.5527565 , 0.23221315]])

In [16]:
dico = [{'a':2, 'b':3, 'c':4},
        {'a':2, 'b':3, 'c':5}]

for i, j in enumerate(dico):
    print(dico[i])

{'a': 2, 'b': 3, 'c': 4}
{'a': 2, 'b': 3, 'c': 5}


In [16]:
import json

first_model = lightgbm_1.dump_model()

In [18]:
first_model

{'name': 'tree',
 'version': 'v3',
 'num_class': 3,
 'num_tree_per_iteration': 3,
 'label_index': 0,
 'max_feature_idx': 7,
 'objective': 'multiclass num_class:3',
 'average_output': False,
 'feature_names': ['TRAIN_TT',
  'TRAIN_CO',
  'TRAIN_HE',
  'SM_TT',
  'SM_CO',
  'SM_HE',
  'CAR_TT',
  'CAR_CO'],
 'monotone_constraints': [-1, -1, -1, -1, -1, -1, -1, -1],
 'feature_infos': {'TRAIN_TT': {'min_value': 31,
   'max_value': 1049,
   'values': []},
  'TRAIN_CO': {'min_value': 0, 'max_value': 576, 'values': []},
  'TRAIN_HE': {'min_value': 30, 'max_value': 120, 'values': []},
  'SM_TT': {'min_value': 8, 'max_value': 796, 'values': []},
  'SM_CO': {'min_value': 0, 'max_value': 768, 'values': []},
  'SM_HE': {'min_value': 10, 'max_value': 30, 'values': []},
  'CAR_TT': {'min_value': 0, 'max_value': 1560, 'values': []},
  'CAR_CO': {'min_value': 0, 'max_value': 400, 'values': []}},
 'tree_info': [{'tree_index': 0,
   'num_leaves': 2,
   'num_cat': 0,
   'shrinkage': 1,
   'tree_structure

In [122]:
%run -i "Base_Model.py"

            Value   Std err     t-test       p-value  Rob. Std err  \
ASC_CAR  0.212685  0.086947   2.446160  1.443871e-02      0.090920   
ASC_SM   0.510729  0.078430   6.511943  7.418510e-11      0.109986   
B_COST  -0.011023  0.000583 -18.907133  0.000000e+00      0.000762   
B_HE    -0.005344  0.001079  -4.951090  7.379881e-07      0.001103   
B_TIME  -0.012116  0.000631 -19.207268  0.000000e+00      0.001239   

         Rob. t-test  Rob. p-value  
ASC_CAR     2.339243      0.019323  
ASC_SM      4.643591      0.000003  
B_COST    -14.461293      0.000000  
B_HE       -4.843324      0.000001  
B_TIME     -9.778750      0.000000  
Nbr of observations: 5390
LL(0) = -4230.341
LL(beta) = -4229.657
rho bar square = -0.00102
Output file: None


In [123]:
5390*0.696335

3753.2456500000003