In [None]:
import numpy as np
import pandas as pd
import lightgbm as lgb
import matplotlib.pyplot as plt
import math 

from sklearn.cluster import KMeans
from sklearn.metrics import confusion_matrix
from sklearn import model_selection, metrics
from sklearn.model_selection import GridSearchCV, learning_curve, train_test_split
from sklearn import neighbors, linear_model, svm, tree, ensemble
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier, StackingClassifier
from xgboost import XGBClassifier
from collections import Counter
from datetime import datetime

import warnings
warnings.filterwarnings("ignore")
%matplotlib inline

In [None]:
class Model_Fit(object):
    def __init__(self, clf, params = None):
        if params:
            self.clf = clf(**params)
        else:
            self.clf = clf
            
    def train(self, x_train, y_train):
        self.clf.fit(x_train, y_train)
            
    def predit(self, x):
        return self.clf.predict(x)
    
    def grid_search(self, parameters, Kfold):
        self.grid = GridSearchCV(estimator = self.clf, param_grid = parameters, scoring="f1", n_jobs=-1, verbose=1, cv = Kfold)

    def grid_fit(self, x_train, y_train):
        self.grid.fit(x_train, y_train)
        
    def grid_predict(self, x_train, y_train, x_test, y_test):
        self.best_model = self.grid.best_estimator_
        self.predictions = self.best_model.predict(x_test) # Use the best estimator from GridSearchCV to make predictions
        self.y_test_pred = self.best_model.predict(x_test)

        train_score = self.best_model.score(x_train, y_train)
        test_score = self.best_model.score(x_test, y_test)

        print("Accuracy score of training dataset2: {:.2f} % ".format(100*train_score))
        print("Accuracy score of testing dataset2: {:.2f} % ".format(100*test_score))
        print(metrics.classification_report(y_test, self.predictions))

        return train_score, test_score

In [None]:
# model algorithms 
def lgb_model(train_X, train_Y, test_X, test_Y, LGB, k):
    print('Lightgbm:')
    LGB.grid_search(parameters = [{'learning_rate': [0.05, 0.5, 1], 'max_depth': [-1, -2, -3], 'reg_alpha': [0, 0.1, 1], 'reg_lambda': [0, 0.1, 1], 'n_estimators': [30, 50, 80, 100, 200]}], Kfold = k)
    LGB.grid_fit(train_X, train_Y)
    train_score, test_score = LGB.grid_predict(train_X, train_Y, test_X, test_Y) 

    return train_score, test_score
    
def xgb_model(train_X, train_Y, test_X, test_Y, XGB, k):
    print('XGB:')
    XGB.grid_search(parameters = {'learning_rate': [0.05, 0.5, 1], 'max_depth': [3,4,5], 'reg_alpha': [0, 0.1, 1], 'reg_lambda': [0, 0.1, 1]} , Kfold = k)
    XGB.grid_fit(train_X, train_Y)
    train_score, test_score = XGB.grid_predict(train_X, train_Y, test_X, test_Y) 

    return train_score, test_score

def rf_model(train_X, train_Y, test_X, test_Y, RF, k):
    print('RF:')
    param_grid = {'criterion' : ['entropy', 'gini'], 'n_estimators' : [30, 50, 80, 100, 200], 'max_depth': [3,4,5], 'max_features' :['sqrt', 'log2']}
    RF.grid_search(parameters = param_grid, Kfold = k)
    RF.grid_fit(train_X, train_Y)
    train_score, test_score = RF.grid_predict(train_X, train_Y, test_X, test_Y)

    return train_score, test_score

def knn_model(train_X, train_Y, test_X, test_Y, KNN, k):
    print('KNN:')
    KNN.grid_search(parameters = [{'n_neighbors': np.arange(1,50,1), 'leaf_size': [3,6,9,12,15]}], Kfold = k)
    KNN.grid_fit(train_X, train_Y)
    train_score, test_score = KNN.grid_predict(train_X, train_Y, test_X, test_Y)

    return train_score, test_score

def vote_model(train_X, train_Y, test_X, test_Y, LGB_best, XGB_best, RF_best, KNN_best, k):
    print('Vote:')
    votingC = ensemble.VotingClassifier(estimators=[('rf', RF_best),('lgb', LGB_best), ('xgb', XGB_best), ('knn', KNN_best)], voting='soft')
    votingC = votingC.fit(train_X, train_Y)
    predictions = votingC.predict(test_X)

    train_score = votingC.score(train_X, train_Y)
    test_score = votingC.score(test_X, test_Y)

    print("Accuracy score of training dataset: {:.2f} % ".format(100*train_score))
    print("Accuracy score of testing dataset: {:.2f} % ".format(100*test_score))
    print(metrics.classification_report(test_Y, predictions))

    return train_score, test_score