In [None]:
!pip install scikit-learn pandas numpy xgboost catboost lightgbm pyarrow

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from random import uniform, randint, sample
from abc import ABC, abstractmethod
from pandas import read_parquet
from copy import deepcopy

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier


In [None]:
class Individuo:
    def __init__(self, cromossomo):
        self.cromossomo = cromossomo
        self.performance = 0
        self.acuracia = None
        self.precisao = None
        self.recall = None

    def retornarPerformance(self):
        return round((self.acuracia + 2 * self.precisao + 3 * self.recall) / 6, 1)

    def __str__(self):
        return ('Cromossomo: {} -> Acuracia: {}%, Precisao: {}%, Recall: {}%, Performance: {}%'.
                format(self.cromossomo, self.acuracia, self.precisao, self.recall, self.performance))


In [None]:
class Algoritmo(ABC):
    def __init__(self):
        self.min = self.definirMinimo()
        self.max = self.definirMaximo()
        self.qtd_genes = len(self.max)
        self.indices_float = self.definirIndices()

    @abstractmethod
    def __str__(self):
        pass

    @abstractmethod
    def definirMinimo(self):
        pass

    @abstractmethod
    def definirMaximo(self):
        pass

    @abstractmethod
    def gerarModelo(self, pos):
        pass

    def definirIndices(self):
        indices = []

        for i in range(self.qtd_genes):
            if not isinstance(self.min[i], int):
                indices.append(i)

        return indices

    def gerarCromossomo(self):
        cromossomo = []

        for i in range(self.qtd_genes):
            if i in self.indices_float:
                cromossomo.append(round(uniform(self.min[i], self.max[i]), 4))
            else:
                cromossomo.append(randint(self.min[i], self.max[i]))

        return cromossomo

    def gerarGene(self, i):
        if i in self.indices_float:
            return round(uniform(self.min[i], self.max[i]), 4)

        return randint(self.min[i], self.max[i])


In [None]:
class KNN(Algoritmo):
    def __str__(self):
        return "K NEIGHBORS CLASSIFIER"

    def definirMinimo(self):
        return [1, 0, 0, 10]

    def definirMaximo(self):
        return [30, 1, 2, 100]

    def gerarModelo(self, genes):
        lista_weights = ['uniform', 'distance']
        lista_metric = ['euclidean', 'manhattan', 'minkowski']

        return KNeighborsClassifier(n_neighbors=genes[0], weights=lista_weights[genes[1]],
                                    metric=lista_metric[genes[2]], leaf_size=genes[3], n_jobs=-1)


In [None]:
class RFC(Algoritmo):
    def __str__(self):
        return 'RANDOM FOREST CLASSIFIER'

    def definirMinimo(self):
        return [100, 5, 2, 1, 0]

    def definirMaximo(self):
        return [400, 25, 10, 5, 2]

    def gerarModelo(self, genes):
        max_features_list = ["sqrt", "log2", 0.8]

        return RandomForestClassifier(
            n_estimators=genes[0],
            max_depth=genes[1],
            min_samples_split=genes[2],
            min_samples_leaf=genes[3],
            max_features=max_features_list[genes[4]],
            n_jobs=-1,
            class_weight="balanced",
            bootstrap=True
        )


In [None]:
class LR(Algoritmo):
    def __str__(self):
        return 'LOGISTIC REGRESSION'

    def definirMinimo(self):
        return [0.01, 100, 0, 0]

    def definirMaximo(self):
        return [10, 500, 1, 1]

    def gerarModelo(self, genes):
        solvers = ['liblinear', 'saga']
        penalties = ['l1', 'l2']

        return LogisticRegression(C=genes[0], max_iter=genes[1], solver=solvers[genes[2]],
                                  penalty=penalties[genes[3]], n_jobs=-1)


In [None]:
class XGB(Algoritmo):
    def __str__(self):
        return "XGBOOST CLASSIFIER"

    def definirMinimo(self):
        return [100, 0.01, 3, 0.5, 0.5, 0, 0, 0, 1]

    def definirMaximo(self):
        return [1000, 0.3, 12, 1, 1, 5, 3, 3, 10]

    def gerarModelo(self, genes):
        return XGBClassifier(
            n_estimators=genes[0],
            learning_rate=genes[1],
            max_depth=genes[2],
            subsample=genes[3],
            colsample_bytree=genes[4],
            gamma=genes[5],
            reg_lambda=genes[6],
            reg_alpha=genes[7],
            min_child_weight=genes[8],
            eval_metric="logloss",
            tree_method="hist",
            n_jobs=-1,
        )


In [None]:
class LGBM(Algoritmo):
    def __str__(self):
        return "LIGHT GBM CLASSIFIER"

    def definirMinimo(self):
        return [100, 0.01, 16, 3, 0.5, 0.5, 5, 0.0, 0.0]

    def definirMaximo(self):
        return [1500, 0.2, 64, 10, 1, 1, 30, 3, 3]

    def gerarModelo(self, genes):
        return LGBMClassifier(
            n_estimators=genes[0],
            learning_rate=genes[1],
            num_leaves=genes[2],
            max_depth=genes[3],
            subsample=genes[4],
            colsample_bytree=genes[5],
            min_child_samples=genes[6],
            reg_lambda=genes[7],
            reg_alpha=genes[8],
            n_jobs=-1,
            device="cpu",
            boosting_type="gbdt",
            class_weight="balanced",
            importance_type="gain"
        )


In [None]:
class CBC(Algoritmo):
    def __str__(self):
        return "CAT BOOST CLASSIFIER"

    def definirMinimo(self):
        return [50, 0.0005, 3, 0.1, 0.0, 0.0]

    def definirMaximo(self):
        return [1500, 0.2, 10, 10, 5, 1]

    def gerarModelo(self, genes):
        return CatBoostClassifier(
            iterations=genes[0],
            learning_rate=genes[1],
            depth=genes[2],
            l2_leaf_reg=genes[3],
            random_strength=genes[4],
            bagging_temperature=genes[5],
            task_type="CPU",
            thread_count=-1,
            verbose=False,
        )


In [None]:
def definirAlgoritmo(tipo_algoritmo):
    if tipo_algoritmo == "KNeighborsClassifier":
        return KNN()
    if tipo_algoritmo == "RandomForestClassifier":
        return RFC()
    if tipo_algoritmo == "LogisticRegression":
        return LR()
    if tipo_algoritmo == "XGBClassifier":
        return XGB()
    if tipo_algoritmo == "LGBMClassifier":
        return LGBM()
    if tipo_algoritmo == "CatBoostClassifier":
        return CBC()

    return None


def selecionarPorTorneio(populacao):
    pai1, pai2, mae1, mae2 = sample(populacao, 4)

    if pai1.performance > pai2.performance:
        pai = pai1
    else:
        pai = pai2

    if mae1.performance > mae2.performance:
        mae = mae1
    else:
        mae = mae2

    return deepcopy(pai), deepcopy(mae)


In [None]:
class GA:
    def __init__(self, num_individuos, num_populacoes, tipo_algoritmo):
        self.num_individuos = num_individuos
        self.num_populacoes = num_populacoes
        self.algoritmo = definirAlgoritmo(tipo_algoritmo)
        self.qtd_genes = self.algoritmo.qtd_genes
        self.chance_de_mutar = 1 / self.qtd_genes
        self.melhor_individuo = Individuo(None)

        self.x_treinamento = None
        self.y_treinamento = None
        self.x_validacao = None
        self.y_validacao = None
        self.x_teste = None
        self.y_teste = None

    def definirXY(self, endereco_parquet):
        dataframe = read_parquet(endereco_parquet)
        x = dataframe.iloc[:, :-1].values
        y = dataframe.iloc[:, -1].values
        scaler = MinMaxScaler()

        x_train, x_rest, self.y_treinamento, y_rest = train_test_split(
            x, y, test_size=0.3, random_state=42, stratify=y)

        x_val, x_test, self.y_validacao, self.y_teste = train_test_split(
            x_rest, y_rest, test_size=0.5, random_state=42, stratify=y_rest)

        self.x_treinamento = scaler.fit_transform(x_train)
        self.x_validacao = scaler.transform(x_val)
        self.x_teste = scaler.transform(x_test)

    def gerarPopulacao(self):
        populacao = []

        for _ in range(self.num_individuos):
            populacao.append(Individuo(self.algoritmo.gerarCromossomo()))

        return populacao

    def ajustarMetricas(self, individuo):
        modelo = self.algoritmo.gerarModelo(individuo.cromossomo)
        modelo.fit(self.x_treinamento, self.y_treinamento)
        previsoes = modelo.predict(self.x_validacao)

        individuo.acuracia = round(accuracy_score(self.y_validacao, previsoes) * 100, 1)
        individuo.precisao = round(precision_score(self.y_validacao, previsoes, zero_division=0) * 100, 1)
        individuo.recall = round(recall_score(self.y_validacao, previsoes, zero_division=0) * 100, 1)

        individuo.performance = individuo.retornarPerformance()

    def fazerCrossover(self, pai, mae):
        indices = sample(range(self.qtd_genes), randint(1, self.qtd_genes - 1))

        for i in indices:
            pai.cromossomo[i], mae.cromossomo[i] = mae.cromossomo[i], pai.cromossomo[i]

        return pai, mae

    def mutar(self, filho, filha):
        for i in range(self.qtd_genes):
            if uniform(0, 1) <= self.chance_de_mutar:
                filho.cromossomo[i] = self.algoritmo.gerarGene(i)
            if uniform(0, 1) <= self.chance_de_mutar:
                filha.cromossomo[i] = self.algoritmo.gerarGene(i)

        self.ajustarMetricas(filho)
        self.ajustarMetricas(filha)

        return [filho, filha]

    def avaliarMelhorNoTeste(self, file):
        modelo = self.algoritmo.gerarModelo(self.melhor_individuo.cromossomo)
        modelo.fit(self.x_treinamento, self.y_treinamento)
        previsoes = modelo.predict(self.x_teste)

        acuracia = round(accuracy_score(self.y_teste, previsoes) * 100, 1)
        precisao = round(precision_score(self.y_teste, previsoes, zero_division=0) * 100, 1)
        recall = round(recall_score(self.y_teste, previsoes, zero_division=0) * 100, 1)
        media = round((acuracia + 2 * precisao + 3 * recall) / 6, 1)

        file.write("\nPerformance no Teste -> Acuracia: {}%, Precisao: {}%, Recall: {}%, Media: {}%".
                   format(acuracia, precisao, recall, media))

    def comecarPrimeiroLoop(self, populacao, file):
        file.write("1* Populacao:\n")

        for individuo in populacao:
            self.ajustarMetricas(individuo)
            file.write(str(individuo) + '\n')

            if individuo.performance > self.melhor_individuo.performance:
                self.melhor_individuo = deepcopy(individuo)

        file.write('\nMelhor ' + str(self.melhor_individuo) + "\n\n")

    def comecarDemaisLoops(self, populacao, file):
        for num_populacao in range(1, self.num_populacoes):
            file.write("{}* Populacao:\n".format(num_populacao + 1))
            nova_populacao = []

            for _ in range(self.num_individuos // 2):
                pai, mae = selecionarPorTorneio(populacao)
                filho, filha = self.fazerCrossover(pai, mae)
                irmaos = self.mutar(filho, filha)

                for irmao in irmaos:
                    file.write(str(irmao) + '\n')
                    nova_populacao.append(irmao)

                    if irmao.performance > self.melhor_individuo.performance:
                        self.melhor_individuo = deepcopy(irmao)

            file.write('\nMelhor ' + str(self.melhor_individuo) + "\n\n")
            populacao = nova_populacao

    def executar(self):
        populacao = self.gerarPopulacao()

        with open("resultados/" + str(self.algoritmo) + ".txt", "w") as file:
            self.comecarPrimeiroLoop(populacao, file)
            self.comecarDemaisLoops(populacao, file)
            self.avaliarMelhorNoTeste(file)


In [None]:
lista = ["KNeighborsClassifier", "RandomForestClassifier", "LogisticRegression",
         "XGBClassifier", "LGBMClassifier", "CatBoostClassifier"]

for item in lista:
    print(item)
    ga = GA(10, 10, item)
    ga.definirXY('datasets/mimic iv.parquet')
    ga.executar()
