In [7]:
import numpy as np
from sklearn.model_selection import train_test_split


In [1]:
class modelManager:

    """
    This class will take X, y and the value toPredict and return a dict with each model and their predictions / accuracy
    """

    def __init__(self, X, y, toPredict) -> None:
        self.toPredict = toPredict
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
            X, y, test_size=0.2, random_state=42)

    def decisionTree(self) -> dict:
        from sklearn.tree import DecisionTreeClassifier
        from sklearn.metrics import accuracy_score
        from sklearn.metrics import recall_score

        model = DecisionTreeClassifier()
        model.fit(self.X_train, self.y_train)
        acc = accuracy_score(self.y_test, model.predict(self.X_test))
        predicted = model.predict(self.toPredict)

        # calculate recall value
        recall = recall_score(self.y_test, model.predict(
            self.X_test), average='macro')

        return {"model": model, "accuracy": acc, "prediction": predicted, "recall": recall}

    # do the same with the other models
    def naiveBayes(self) -> dict:
        # make a naive bayes model
        from sklearn.naive_bayes import GaussianNB
        from sklearn.metrics import accuracy_score
        from sklearn.metrics import recall_score

        model = GaussianNB()
        model.fit(self.X_train, self.y_train)
        acc = accuracy_score(self.y_test, model.predict(self.X_test))
        predicted = model.predict(self.toPredict)

        recall = recall_score(self.y_test, model.predict(
            self.X_test), average='macro')

        return {"model": model, "accuracy": acc, "prediction": predicted, "recall": recall}

    def randomForest(self) -> dict:
        from sklearn.ensemble import RandomForestClassifier
        from sklearn.metrics import accuracy_score
        from sklearn.metrics import recall_score

        model = RandomForestClassifier(n_estimators=100)
        model.fit(self.X_train, self.y_train)
        acc = accuracy_score(self.y_test, model.predict(self.X_test))
        predicted = model.predict(self.toPredict)

        recall = recall_score(self.y_test, model.predict(
            self.X_test), average='macro')

        return {"model": model, "accuracy": acc, "prediction": predicted, "recall": recall}

    def svm(self) -> dict:
        from sklearn.svm import SVC
        from sklearn.metrics import accuracy_score
        from sklearn.metrics import recall_score

        model = SVC(gamma='auto')
        model.fit(self.X_train, self.y_train)
        acc = accuracy_score(self.y_test, model.predict(self.X_test))
        predicted = model.predict(self.toPredict)

        recall = recall_score(self.y_test, model.predict(
            self.X_test), average='macro')

        return {"model": model, "accuracy": acc, "prediction": predicted, "recall": recall}

    def findOptimalNeighborsForKNN(X_train, y_train) -> int:
        from sklearn.neighbors import KNeighborsClassifier
        from sklearn.model_selection import cross_val_score
        import pandas as pd

        KNN_scores_para_df = []

        for i in range(1, 100, 10):
            KNN_model = KNeighborsClassifier(n_neighbors=i)
            KNN_cv_scores = cross_val_score(KNN_model, X_train, y_train, cv=5)
            KNN_dict_row_score = {'score_medio': np.mean(
                KNN_cv_scores), 'score_std': np.std(KNN_cv_scores), 'n_neighbours': i}
            KNN_scores_para_df.append(KNN_dict_row_score)
        scoresKNN = pd.DataFrame(KNN_scores_para_df)
        return scoresKNN[scoresKNN["score_medio"] == scoresKNN["score_medio"].max()]["n_neighbours"].values[0]

    def KNN(self) -> dict:
        from sklearn.neighbors import KNeighborsClassifier
        from sklearn.metrics import accuracy_score
        from sklearn.metrics import recall_score

        optimal_neighbors = modelManager.findOptimalNeighborsForKNN(self.X_train , self.y_train)
        print(optimal_neighbors)
        model = KNeighborsClassifier(n_neighbors= optimal_neighbors)

        model.fit(self.X_train, self.y_train)
        acc = accuracy_score(self.y_test, model.predict(self.X_test))
        predicted = model.predict(self.toPredict)

        recall = recall_score(self.y_test, model.predict(
            self.X_test), average='macro')

        return {"model": model, "accuracy": acc, "prediction": predicted, "recall": recall}

    def getPrediction(self) -> dict:
        return {"decisionTree": self.decisionTree(), "naiveBayes": self.naiveBayes(), "randomForest": self.randomForest(), "svm": self.svm(), "KNN": self.KNN()}

    def getTrainTest(self) -> dict:
        return {"X_train": self.X_train, "X_test": self.X_test, "y_train": self.y_train, "y_test": self.y_test}
