# Colab setting

In [1]:
from google.colab import drive
drive.mount('/content/drive')
%cd ./drive/Othercomputers/MacBook/Earth/module/dd_earthquake/book

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/Othercomputers/MacBook/Earth/module/dd_earthquake/book


In [2]:
%ls

bench_mark.ipynb            edm_data.ipynb                 Light_GBM.ipynb
colab_lgbt_multigrid.ipynb  learn_categorical_plots.ipynb


Install lgbm with gpu option

ref: https://an-engineer-note.com/?p=624

In [3]:
!pip uninstall --yes lightgbm && pip install --install-option=--gpu lightgbm

Found existing installation: lightgbm 3.3.5
Uninstalling lightgbm-3.3.5:
  Successfully uninstalled lightgbm-3.3.5
[0mLooking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting lightgbm
  Using cached lightgbm-3.3.5.tar.gz (1.5 MB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Skipping wheel build for lightgbm, due to binaries being disabled for it.
Installing collected packages: lightgbm
  Running setup.py install for lightgbm ... [?25l[?25hdone
Successfully installed lightgbm-3.3.5


# lgbm

In [4]:
import sys
%load_ext autoreload
%autoreload 2
sys.path.append('../')

In [13]:
import numpy as np
import pandas as pd
import warnings
import pprint

import lightgbm as lgb
from pathlib import Path

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold

from src import common

## grid search

In [6]:
train_values, train_labels = common.file.read_data('train')

In [7]:
train_values

Unnamed: 0_level_0,geo_level_1_id,geo_level_2_id,geo_level_3_id,count_floors_pre_eq,age,area_percentage,height_percentage,land_surface_condition,foundation_type,roof_type,...,has_secondary_use_agriculture,has_secondary_use_hotel,has_secondary_use_rental,has_secondary_use_institution,has_secondary_use_school,has_secondary_use_industry,has_secondary_use_health_post,has_secondary_use_gov_office,has_secondary_use_use_police,has_secondary_use_other
building_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
802906,6,487,12198,2,30,6,5,t,r,n,...,0,0,0,0,0,0,0,0,0,0
28830,8,900,2812,2,10,8,7,o,r,n,...,0,0,0,0,0,0,0,0,0,0
94947,21,363,8973,2,10,5,5,t,r,n,...,0,0,0,0,0,0,0,0,0,0
590882,22,418,10694,2,10,6,5,t,r,n,...,0,0,0,0,0,0,0,0,0,0
201944,11,131,1488,3,30,8,9,t,r,n,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
688636,25,1335,1621,1,55,6,3,n,r,n,...,0,0,0,0,0,0,0,0,0,0
669485,17,715,2060,2,0,6,5,t,r,n,...,0,0,0,0,0,0,0,0,0,0
602512,17,51,8163,3,55,6,7,t,r,q,...,0,0,0,0,0,0,0,0,0,0
151409,26,39,1851,2,10,14,6,t,r,x,...,0,0,0,0,0,0,0,0,0,0


In [8]:
features_list = ['geo_level_1_id', 
                 'geo_level_2_id', 
                 'geo_level_3_id',
                 'age', 
                 'area_percentage', 
                 'height_percentage', 
                 'foundation_type', 
                 'roof_type', 
                 'ground_floor_type', 
                 'other_floor_type', 
                 'position', 
                 'has_superstructure_mud_mortar_stone', 
                 'has_superstructure_cement_mortar_brick', 
                 'has_superstructure_timber', 
                 'count_families', 
                 'has_secondary_use'
                ]

In [29]:
train_values, train_labels = common.lgbm_preprocessing((train_values, train_labels), mode='training', features_list=features_list)

In [30]:
x_train, x_test, y_train, y_test = train_test_split(train_values, train_labels,
                                                        test_size=0.1, random_state=19, stratify=train_labels)
trains = lgb.Dataset(x_train, y_train)
valids = lgb.Dataset(x_test, y_test)

In [31]:
skf = StratifiedKFold(n_splits=5,
                      shuffle=True,
                      random_state=0)

In [33]:
# initialize model and parameter grid
model = lgb.LGBMClassifier(objective='multi_class', num_class=3, 
                           learning_rate=0.15, 
                           force_row_wise=True, 
                           num_boost_round=100, early_stopping_rounds=10, 
                           device='gpu', valid_sets=valids)

param_grid = {"max_depth": [10, 25, 50, 75],
              # "learning_rate" : [0.001,0.01,0.05,0.1],
              "num_leaves": [100,300,900,1200],
              "n_estimators": [100,200,500], 
              'reg_alpha': list(map(lambda x: x*0.1, range(1, 5))), 
              'reg_lambda': list(map(lambda x: x*0.1, range(1, 5))),
             }

# grid search
grid_result = GridSearchCV(estimator = model,
                           param_grid = param_grid,
                           scoring = 'f1_micro', 
                           cv = skf,
                           return_train_score = True,
                           n_jobs = -1)

grid_result.fit(trains)

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/IPython/core/interactiveshell.py", line 3326, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-33-c2bbfb6b7896>", line 24, in <module>
    grid_result.fit(trains)
  File "/usr/local/lib/python3.8/dist-packages/sklearn/model_selection/_search.py", line 799, in fit
    X, y, groups = indexable(X, y, groups)
  File "/usr/local/lib/python3.8/dist-packages/sklearn/utils/validation.py", line 378, in indexable
    check_consistent_length(*result)
  File "/usr/local/lib/python3.8/dist-packages/sklearn/utils/validation.py", line 329, in check_consistent_length
    lengths = [_num_samples(X) for X in arrays if X is not None]
  File "/usr/local/lib/python3.8/dist-packages/sklearn/utils/validation.py", line 329, in <listcomp>
    lengths = [_num_samples(X) for X in arrays if X is not None]
  File "/usr/local/lib/python3.8/dist-packages/sklearn/utils/validation.py", line 269, in

TypeError: ignored

In [14]:
pprint.pprint(grid_result.best_score_)

0.6757188260483019
