In [None]:
!wget https://github.com/korakot/kora/releases/download/v0.10/py310.sh
!bash ./py310.sh -b -f -p /usr/local
!python -m ipykernel install --name "py310" --user

In [None]:
!python --version

In [None]:
!pip install scipy==1.8.1


In [None]:
!pip install Cython==0.29.35

In [None]:
!pip install scikit-learn==0.24.2 --no-build-isolation

In [None]:
!pip install auto-sklearn

In [None]:
!pip show auto-sklearn

In [None]:
!pip install tabulate

In [None]:
import autosklearn.classification as classification
import autosklearn.pipeline as pipeline
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import time
from score import pipeline_score
import os
import io
import sys
import csv

In [None]:
# Redireciona os prints da função
def capturar_e_salvar_prints_em_csv(func, *args, csv_filename="log.csv", **kwargs):
    buffer = io.StringIO()
    stdout_original = sys.stdout  # Guarda o stdout original
    sys.stdout = buffer           # Redireciona para o buffer

    try:
        resultado = func(*args, **kwargs)
    finally:
        sys.stdout = stdout_original  # Restaura o stdout original

    # Pega o conteúdo dos prints
    saida = buffer.getvalue().splitlines()

    # Salva em um CSV
    with open(csv_filename, mode='w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(["mensagem"])
        for linha in saida:
            writer.writerow([linha])

    return resultado

# Listar todos os arquivos CSV na pasta
datasets = [f for f in os.listdir() if f.endswith(".csv")]

# Caminho do arquivo CSV onde os resultados serão armazenados
result_csv_path = "AutoSklearn_results.csv"

# Se o arquivo de resultados ainda não existir, cria com cabeçalho
if not os.path.exists(result_csv_path):
    pd.DataFrame(columns=["dataset", "best_params", "test_score", "execution_time"]).to_csv(result_csv_path, index=False)

model = classification.AutoSklearnClassifier(include={'classifier': ['decision_tree','k_nearest_neighbors','libsvm_svc', 'random_forest']})

for dataset in datasets:
  if dataset != "AutoSklearn_results.csv":
    print(f"\nProcessando: {dataset}")

    # Carregar o dataset
    df = pd.read_csv(dataset)
    X = df.iloc[:, :-1].values
    y = df.iloc[:, -1].values

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state=42)

    start_time = time.time()
    model.fit(X_train, y_train)
    end_time = time.time()
    execution_time = end_time - start_time

    print(model.leaderboard())

    y_pred = model.predict(X_test)
    final_score = capturar_e_salvar_prints_em_csv(pipeline_score, y_test, y_pred, verbosity=True, csv_filename=f"tabela {dataset}")

    # Criar um dicionário com os resultados
    result = {
        "dataset": dataset,
        # Convertendo para string para evitar problemas de formatação
        "best_params": str(model.show_models()),
        "test_score": final_score,
        "execution_time": execution_time,
    }

    # Adicionar ao arquivo CSV imediatamente
    pd.DataFrame([result]).to_csv(result_csv_path, mode="a", header=False, index=False)


Processando: Sepsis Survival Minimal Clinical Records.csv
          rank  ensemble_weight                 type      cost    duration
model_id                                                                  
5           15             0.02        random_forest  0.073123   22.737187
6           14             0.02        random_forest  0.073123   34.021070
11          11             0.02        random_forest  0.073123   17.169122
12          10             0.04        random_forest  0.073123   35.531870
16           7             0.02        random_forest  0.073123   28.542372
17           6             0.02        random_forest  0.073123   20.591283
21           4             0.06        random_forest  0.073123   17.789463
28          18             0.02        random_forest  0.073123   17.191502
30           8             0.04           libsvm_svc  0.073123   35.693063
31           2             0.02        random_forest  0.073123   12.594692
41           3             0.02          