## Training

In [1]:
import time
import numpy as np

X = np.load('X.npz')['arr_0']
y = np.load('y.npz')['arr_0']

In [2]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=8)

print(f'Total data\t\t: {len(X)}')
print(f'Total training data\t: {len(X_train)}')
print(f'Total testing data\t: {len(X_test)}')

Total data		: 15000
Total training data	: 12000
Total testing data	: 3000


### Random Forest

In [None]:
from sklearn.ensemble import ExtraTreesClassifier

rf = ExtraTreesClassifier(n_jobs=-1)
rf_start = time.time()
rf.fit(X_train, y_train)
rf_stop = time.time()

### LightGBM

In [8]:
from lightgbm import LGBMClassifier

lgbm = LGBMClassifier(, n_jobs=-1)
lgbm_start = time.time()
lgbm.fit(X_train, y_train)
lgbm_stop = time.time()

## Evaluasi

In [9]:
from sklearn.metrics import classification_report

print(classification_report(y_test, lgbm.predict(X_test)))

              precision    recall  f1-score   support

           0       0.78      0.68      0.72      1017
           1       0.70      0.79      0.74      1007
           2       0.79      0.80      0.79       976

    accuracy                           0.75      3000
   macro avg       0.76      0.75      0.75      3000
weighted avg       0.76      0.75      0.75      3000



In [None]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score

y_pred_rf = rf.predict(X_test)
rf_accuracy = accuracy_score(y_test, y_pred_rf)
rf_precision = precision_score(y_test, y_pred_rf, average='macro')
rf_recall = recall_score(y_test, y_pred_rf, average='macro')

y_pred_lgbm = lgbm.predict(X_test)
lgbm_accuracy = accuracy_score(y_test, y_pred_lgbm)
lgbm_precision = precision_score(y_test, y_pred_lgbm, average='macro')
lgbm_recall = recall_score(y_test, y_pred_lgbm, average='macro')

model_table = pd.DataFrame({
    'model': ['Random Forest', 'LightGBM'],
    'accuracy': ['{:.2f}%'.format(rf_accuracy*100), '{:.2f}%'.format(lgbm_accuracy*100)],
    'precision': ['{:.2f}%'.format(rf_precision*100), '{:.2f}%'.format(lgbm_precision*100)],
    'recall': ['{:.2f}%'.format(rf_recall*100), '{:.2f}%'.format(lgbm_recall*100)],
    'execution_time': [(rf_stop - rf_start), (lgbm_stop - lgbm_start)]
})
model_table

## Hyperparameter Tuning

### Random Forest Tuning

In [None]:
from sklearn.model_selection import RandomizedSearchCV

rf_hyperparameter = {
    'n_estimators': range(250, 10001),
    'max_features': ['sqrt', 'log2'],
    'max_depth': range(10, 51),
    'max_leaf_nodes': range(1000, 5001)
}

rf_search = RandomizedSearchCV(rf, rf_hyperparameter, scoring='accuracy', n_iter=10, n_jobs=-1, random_state=8)
rf_search.fit(X_train, y_train)

print(f'Random Forest Hyperparameter Tuning')
print(f'Best score\t\t: {rf_search.best_score_}')
print(f'Best hyperparameter\t: {rf_search.best_params_}')

### LightGBM Tuning

In [1]:
from sklearn.model_selection import RandomizedSearchCV

lgbm_hyperparameter = {
    'n_estimators': range(50, 501),
    'num_leaves': range(20, 101),
    'learning_rate': [0.001, 0.01, 0.1],
    'max_depth': range(5, 21),
    'subsample': [0.5, 0.6, 0.7, 0.8, 0.9, 1],
    'colsample_bytree': [0.5, 0.6, 0.7, 0.8, 0.9, 1],
    'reg_alpha': [0.2, 0.4, 0.6, 0.8, 1],
    'reg_lambda': [0.2, 0.4, 0.6, 0.8, 1],
    'objective': ['multiclass']
}

lgbm_search = RandomizedSearchCV(lgbm, lgbm_hyperparameter, scoring='accuracy', n_iter=10, n_jobs=-1, random_state=8)
lgbm_search.fit(X_train, y_train)

print(f'LightGBM Hyperparameter Tuning')
print(f'Best score\t\t: {lgbm_search.best_score_}')
print(f'Best hyperparameter\t: {lgbm_search.best_params_}')

NameError: name 'lgbm' is not defined