<a href="https://colab.research.google.com/github/PrettyCharity/Machine_Learning_Practice/blob/main/LightGBM_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title Installing and upgrading packages
!pip install lightgbm --upgrade
!pip install optuna

In [2]:
#@title Preparing the data
# Importing libraries
import pandas as pd
import numpy as np
pd.set_option("display.precision", 4)
# Metrics and tools
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.feature_selection import SelectKBest, chi2, f_classif
# Optuna packages
import optuna.integration.lightgbm as lgb
from lightgbm import early_stopping
from lightgbm import log_evaluation

# Loading the data
df = pd.read_csv('Data.csv')
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# Encoding the Dependent Variable
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)

In [5]:
#@title Optuna Lightgbm tuner
def tuner(data, target):
  # Splitting the data
  X_train, X_test, y_train, y_test = train_test_split(data, target,
                                                    test_size = 0.25,
                                                    random_state = 42)
  
  # Setting up the data for the model
  dtrain = lgb.Dataset(X_train, label = y_train)
  dtest = lgb.Dataset(X_test, label = y_test)

  # Parameters for classification
  params = {
      'objective' : 'binary',
      'metric' : 'binary_logloss',
      'verbosity' : -1,
      'boosting_type' : 'gbdt'
  }

  # Training the model
  model = lgb.train(
      params,
      dtrain,
      valid_sets = [dtrain, dtest],
      callbacks = [early_stopping(100), log_evaluation(100)]
  )

  # Results
  y_pred = np.rint(model.predict(X_test, 
                                     num_iteration = model.best_iteration))
  score = f1_score(y_test, y_pred)
  return score

In [None]:
#@title Feature Selection
# Chi-squared stats works for non-negative feature and class
score_func = [chi2, f_classif][(df.values < 0).any()]
# Store scores and number of features
results = {}
for num in range(5, X.shape[1] + 1):
  X_new = SelectKBest(score_func, k = num).fit_transform(X, y)
  results[num] = tuner(X_new, y)


In [22]:
#@title Results of HP tuning and Feature Selection
scores = list(results.values())
selection_results = pd.DataFrame(np.array([scores]).reshape(-1, 1),
                                 columns = ['f1 score'],
                                 index = list(results.keys()))
print('LightGBM scores at different number of features:\n')
selection_results.style

LightGBM scores at different number of features:



Unnamed: 0,f1 score
5,0.9624
6,0.9624
7,0.9624
8,0.9624
9,0.9701
10,0.9701
