In [1]:
%%write_and_run ClassifierUtils.py

import time
import pandas as pd
import numpy as np
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import recall_score, confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from classifierdto import ClassifierResultDto

def compareClassifierScore(Classifier1, Classifier2, weight_accuracy = 1, weight_recall = 1):
    score1 = (Classifier1.Accuracy * weight_accuracy) + (Classifier1.RecallScore * weight_recall)
    score2 = (Classifier2.Accuracy * weight_accuracy) + (Classifier2.RecallScore * weight_recall)
    
    if score1 < score2:
        return -1
    elif score1 == score2:
        return 0
    else:
        return 1

def testClassifiers(TrainMatrix, ValidateMatrix, y_train, y_validate, Classifiers):
    
    BestClassifier = ClassifierResultDto(
        'Not set yet',
        0,
        0,
        [[0,0], [0,0]],
        [])
    
    #---Modellen
    model_mnb = MultinomialNB()
    model_mlp = MLPClassifier(random_state=1, max_iter=(len(y_train)*10))
    model_dt = DecisionTreeClassifier(random_state=0)
    model_lr = LogisticRegression(max_iter = (len(y_train)*10))
    model_knn = KNeighborsClassifier()
    model_svm = SVC()
    model_rfc = RandomForestClassifier()
    
    parameters_mnb = {'fit_prior': [True, False],
                     'alpha': np.arange(0.1, 1, 0.1)}
    parameters_mlp = {#'activation': ['identity', 'logistic', 'tanh', 'relu'],
                      'solver': ['lbfgs', 'sgd', 'adam']}
    parameters_dt = {'criterion': ['gini', 'entropy', 'log_loss'],
                     'splitter': ['best', 'random'],
                     'min_samples_split': np.arange(2, 5, 1),
                     'min_samples_leaf': np.arange(1, 5, 1)}
                     #'min_weight_fraction_leaf': np.arange(0, 0.5, 0.1)}
    parameters_lr = {'C': np.logspace(-5, 8, 15)}
    parameters_knn = {'n_neighbors': list(range(3, 10)),
                     'leaf_size' : list(range(1, 5))}
    parameters_rfc = {'min_samples_leaf': [2,4,6,8,10],
                      'min_samples_split': list(range(2, 5)),
                      'min_weight_fraction_leaf': np.arange(0.05, 0.20, 0.05)}
    
    if "MNB" in Classifiers:
        #---Multinomial Naive Bayes
        tic = time.perf_counter()
        print("\nMultinomial Naive Bayes")
        Grid_mnb = GridSearchCV(model_mnb, parameters_mnb, cv=5)
        Grid_mnb.fit(TrainMatrix, y_train)
        y_predmnb = Grid_mnb.predict(ValidateMatrix)

        MNBDto = ClassifierResultDto(
            "Multinomial Naive Bayes",
            accuracy_score(y_validate, y_predmnb),
            recall_score(y_validate, y_predmnb),
            confusion_matrix(y_true = y_validate, y_pred = y_predmnb),
            y_predmnb)
    #     print("y_pred: ", y_predmnb)
        print("Tuned Naive Bayes Parameters: {}".format(Grid_mnb.best_params_)) 
        print("Best score is {}".format(Grid_mnb.best_score_))
        print("Confusion Matrix: ", confusion_matrix(y_true = y_validate, y_pred = y_predmnb))
        print("Accuracy Score: ", accuracy_score(y_validate, y_predmnb))
        print("Recall Score: ",recall_score(y_validate, y_predmnb))
        toc = time.perf_counter()
        print(f"Finished classifier in {toc - tic:0.4f} seconds")

        score = compareClassifierScore(BestClassifier, MNBDto)
        if(score == -1):
            BestClassifier = MNBDto
    
    if "MLP" in Classifiers:
        #--Neural Network MLP
        tic = time.perf_counter()
        print("\nNeural Network MLP")
        Grid_mlp = GridSearchCV(model_mlp, parameters_mlp, cv=5)
        Grid_mlp.fit(TrainMatrix, y_train)
        y_predmlp = Grid_mlp.predict(ValidateMatrix)

        MLPDto = ClassifierResultDto(
            "Neural Network MLP",
            accuracy_score(y_validate, y_predmlp),
            recall_score(y_validate, y_predmlp),
            confusion_matrix(y_true = y_validate, y_pred = y_predmlp),
            y_predmlp)
        #print("y_pred: ", y_predmlp)
        print("Tuned Neural Network MLP: {}".format(Grid_mlp.best_params_)) 
        print("Best score is {}".format(Grid_mlp.best_score_))
        print("Confusion Matrix: ", confusion_matrix(y_true = y_validate, y_pred = y_predmlp))
        print("Accuracy Score: ", accuracy_score(y_validate, y_predmlp))
        print("Recall Score: ",recall_score(y_validate, y_predmlp))
        toc = time.perf_counter()
        print(f"Finished classifier in {toc - tic:0.4f} seconds")

        score = compareClassifierScore(BestClassifier, MLPDto)
        if(score == -1):
            BestClassifier = MLPDto

    if "DT" in Classifiers:
        #--Decision Tree
        tic = time.perf_counter()
        print("\nDecision Tree")
        Grid_dt = GridSearchCV(model_dt, parameters_dt, cv=5)
        Grid_dt.fit(TrainMatrix, y_train)
        y_preddt = Grid_dt.predict(ValidateMatrix)

        DTDto = ClassifierResultDto(
            "Decision Tree",
            accuracy_score(y_validate, y_preddt),
            recall_score(y_validate, y_preddt),
            confusion_matrix(y_true = y_validate, y_pred = y_preddt),
            y_preddt)
    #     print("y_pred: ", y_preddt)
        print("Tuned Decision Tree: {}".format(Grid_dt.best_params_)) 
        print("Best score is {}".format(Grid_dt.best_score_))
        print("Confusion Matrix: ", confusion_matrix(y_true = y_validate, y_pred = y_preddt))
        print("Accuracy Score: ", accuracy_score(y_validate, y_preddt))
        print("Recall Score: ",recall_score(y_validate, y_preddt))
        toc = time.perf_counter()
        print(f"Finished classifier in {toc - tic:0.4f} seconds")

        score = compareClassifierScore(BestClassifier, DTDto)
        if(score == -1):
            BestClassifier = DTDto

    if "LR" in Classifiers:
        #---Logistic Regression
        tic = time.perf_counter()
        print("\nLogistic Regression")
        Grid_lr = GridSearchCV(model_lr, parameters_lr, cv=5)
        Grid_lr.fit(TrainMatrix, y_train)
        y_predlr = Grid_lr.predict(ValidateMatrix)

        LRDto = ClassifierResultDto(
            "Logistic Regression",
            accuracy_score(y_validate, y_predlr),
            recall_score(y_validate, y_predlr),
            confusion_matrix(y_true = y_validate, y_pred = y_predlr),
            y_predlr)
    #     print("y_pred: ", y_predlr)
        print("Tuned Logistic Regression Parameters: {}".format(Grid_lr.best_params_)) 
        print("Best score is {}".format(Grid_lr.best_score_))
        print("Confusion Matrix: ", confusion_matrix(y_true = y_validate, y_pred = y_predlr))
        print("Accuracy Score: ", accuracy_score(y_validate, y_predlr))
        print("Recall Score: ",recall_score(y_validate, y_predlr))
        toc = time.perf_counter()
        print(f"Finished classifier in {toc - tic:0.4f} seconds")

        score = compareClassifierScore(BestClassifier, LRDto)
        if(score == -1):
            BestClassifier = LRDto

    if "KNN" in Classifiers:
    #---K_NearestNeighbors
        tic = time.perf_counter()
        print("\nK_NearestNeighbors")
        Grid_knn = GridSearchCV(model_knn, parameters_knn, cv=5)
        Grid_knn.fit(TrainMatrix, y_train)
        y_predknn = Grid_knn.predict(ValidateMatrix)

        KNNDto = ClassifierResultDto(
            "K_NearestNeighbors",
            accuracy_score(y_validate, y_predknn),
            recall_score(y_validate, y_predknn),
            confusion_matrix(y_true = y_validate, y_pred = y_predknn),
            y_predknn)
        #print("y_pred: ", y_predknn)
        print("Tuned K_NearestNeighbors Parameters: {}".format(Grid_knn.best_params_)) 
        print("Best score is {}".format(Grid_knn.best_score_))
        print("Confusion Matrix: ", confusion_matrix(y_true = y_validate, y_pred = y_predknn))
        print("Accuracy Score: ", accuracy_score(y_validate, y_predknn))
        print("Recall Score: ",recall_score(y_validate, y_predknn))
        toc = time.perf_counter()
        print(f"Finished classifier in {toc - tic:0.4f} seconds")

        score = compareClassifierScore(BestClassifier, KNNDto)
        if(score == -1):
            BestClassifier = KNNDto

    if "RFC" in Classifiers:
    #---Random Forest
        tic = time.perf_counter()
        print("\nRFC")
        Grid_rfc = GridSearchCV(model_rfc, parameters_rfc, cv=5)
        Grid_rfc.fit(TrainMatrix, y_train)
        y_predrfc = Grid_rfc.predict(ValidateMatrix)

        RFCDto = ClassifierResultDto(
            "Random Forest",
            accuracy_score(y_validate, y_predrfc),
            recall_score(y_validate, y_predrfc),
            confusion_matrix(y_true = y_validate, y_pred = y_predrfc),
            y_predrfc)
        #print("y_pred: ", y_predrfc)
        print("Tuned RFC Parameters: {}".format(Grid_rfc.best_params_)) 
        print("Best score is {}".format(Grid_rfc.best_score_))
        print("Confusion Matrix: ", confusion_matrix(y_true = y_validate, y_pred = y_predrfc))
        print("Accuracy Score: ", accuracy_score(y_validate, y_predrfc))
        print("Recall Score: ",recall_score(y_validate, y_predrfc))
        toc = time.perf_counter()
        print(f"Finished classifier in {toc - tic:0.4f} seconds")

        score = compareClassifierScore(BestClassifier, RFCDto)
        if(score == -1):
            BestClassifier = RFCDto

    print("====================================================================")
    return BestClassifier

writing file ClassifierUtils.py
