## 1. Configuração Banco de Dados ##

###  Conexão com o Banco de Dados ###

In [419]:
from sqlalchemy import create_engine, text

In [420]:
USER = "your_user"
PASSWORD = "your_password"
HOST = "localhost"
DATABASE = "your_database" 

In [422]:
# Criar conexão usando SQLAlchemy
engine = create_engine(f"mysql+pymysql://{USER}:{PASSWORD}@{HOST}/{DATABASE}")

### Criação das Tabelas ###

In [None]:
# Deletar tabelas do bancos de dados
with engine.connect() as connection:
    tables_to_drop = ["Metrics", "Experiments", "LearnStrategies", "Hyperparameters", "Models"]
    for table in tables_to_drop:
        connection.execute(text(f"DROP TABLE IF EXISTS {table};"))
    connection.commit() # Confirma a remoção das tabelas

In [424]:
tables = {
    "Models": """
    CREATE TABLE IF NOT EXISTS Models (
        id INT AUTO_INCREMENT PRIMARY KEY,
        type INT,  -- 0: Classificação, 1: Regressão
        algorithm VARCHAR(50)
    )
    """,
    
    "Hyperparameters": """
    CREATE TABLE IF NOT EXISTS Hyperparameters (
        id INT AUTO_INCREMENT PRIMARY KEY,
        model_id INT,
        name VARCHAR(50),
        value VARCHAR(50),
        FOREIGN KEY (model_id) REFERENCES Models(id) ON DELETE CASCADE
    )
    """,
    
    "LearnStrategies": """
    CREATE TABLE IF NOT EXISTS LearnStrategies (
        id INT AUTO_INCREMENT PRIMARY KEY,
        model_id INT NOT NULL,
        preprocessing_type VARCHAR(50) NOT NULL,  -- Scaling, Sampling, Feature Selection
        data_sampling VARCHAR(50) NOT NULL,  -- Undersampling, Oversampling, Stratified Sampling
        type VARCHAR(50) NOT NULL,  -- Cross-Validation, Hold-out
        len_data JSON NOT NULL,  -- Lista [Treino, Teste, Validação]
        FOREIGN KEY (model_id) REFERENCES Models(id) ON DELETE CASCADE
    )
    """,
    
    "Experiments": """
    CREATE TABLE IF NOT EXISTS Experiments (
        id INT AUTO_INCREMENT PRIMARY KEY,
        model_id INT,
        dataset VARCHAR(50),
        date_created TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
        FOREIGN KEY (model_id) REFERENCES Models(id) ON DELETE CASCADE
    )
    """,

    "Metrics": """
    CREATE TABLE IF NOT EXISTS Metrics (
        id INT AUTO_INCREMENT PRIMARY KEY,
        experiment_id INT,
        type VARCHAR(50),
        value FLOAT,
        FOREIGN KEY (experiment_id) REFERENCES Experiments(id) ON DELETE CASCADE
    )
    """
}

In [425]:
with engine.connect() as connection:
    for table_name, sql_query in tables.items():
        connection.execute(text(sql_query)) # Use text() here!
        print(f"Tabela {table_name} criada com sucesso! ✅")

print("Todas as tabelas foram criadas corretamente! 🎉")

Tabela Models criada com sucesso! ✅
Tabela Hyperparameters criada com sucesso! ✅
Tabela LearnStrategies criada com sucesso! ✅
Tabela Experiments criada com sucesso! ✅
Tabela Metrics criada com sucesso! ✅
Todas as tabelas foram criadas corretamente! 🎉


## 2.Funções de Treinamento e Avaliação ##

### Importação das Bibliotecas ###

In [426]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns 
import json

In [427]:
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.svm import SVC, SVR
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso, LogisticRegression
from xgboost import XGBClassifier, XGBRegressor
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, mean_squared_error, mean_absolute_error, r2_score
from imblearn.over_sampling import SMOTE
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import KFold, cross_val_score, cross_val_predict

### Carregar Datasets ###

In [428]:
def load_dataset(name):
    from sklearn.datasets import load_iris, load_breast_cancer, load_diabetes, fetch_california_housing
    
    if name == "iris":
        data = load_iris()
    elif name == "breast_cancer":
        data = load_breast_cancer()
    elif name == "diabetes":
        from sklearn.datasets import load_diabetes 
        data = load_diabetes()
    elif name == "california":
        data = fetch_california_housing()
    else:
        raise ValueError("Dataset não suportado")
    
    return pd.DataFrame(data.data, columns=data.feature_names), pd.Series(data.target)


### Função de Treinamento e Salvamento no BD ###

In [None]:
def train_and_save_model(dataset_name, model_type, algorithm, model, params={}, use_cross_val=False, test_size=0.2, val_size=0.0, cv=5, scaling = True, oversample=False, feature_selection=False, k=10):

    try:  # Try to load the dataset, handle potential errors
        X, y = load_dataset(dataset_name)
    except Exception as e:
        print(f"Error loading dataset {dataset_name}: {e}")
        return  # Exit the function if dataset loading fails

    scaling_applied = False
    sampling_applied = False
    feature_selection_applied = False

    X_transformed = X  # Initialize X_transformed (will hold scaled or original data)

    if scaling:  # Apply scaling if scaling is True
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)
        X_transformed = X_scaled
        scaling_applied = True

    # Apply oversampling *before* splitting
    try:  # Handle potential errors during oversampling
        if oversample:
            smote = SMOTE(random_state=42)
            X_transformed, y = smote.fit_resample(X_transformed, y)  # Oversample all data
            sampling_applied = True  # Set flag *after* successful oversampling
    except ImportError:
        print("imblearn not found. Install it using: pip install imbalanced-learn")
    except Exception as e:
        print(f"Error during oversampling: {e}")

    # Apply feature selection *before* splitting
    try:  # Handle potential errors during feature selection
        if feature_selection:
            n_features = X_transformed.shape[1]  # Get the number of features
            k = min(k, n_features)  # Ensure k is not greater than n_features
            selector = SelectKBest(f_classif, k=k)
            X_transformed = selector.fit_transform(X_transformed, y)  # Fit and transform all data
            feature_selection_applied = True  # Set flag *after* successful feature selection
    except ImportError:
        print("scikit-learn is required for feature selection.")
    except Exception as e:
        print(f"Error during feature selection: {e}")


    preprocessing_strategies = []

    if scaling_applied:  # Check scaling_applied
        preprocessing_strategies.append("Scaling")
    if sampling_applied:  # Check oversample directly
        preprocessing_strategies.append("Sampling")
    if feature_selection_applied:  # Check feature_selection directly
        preprocessing_strategies.append("Feature Selection")

    metrics = {}

    if use_cross_val:
        kfold = KFold(n_splits=cv, shuffle=True, random_state=42)
        len_data = [cv, 0, 0]

        if model_type == "classification":
            y_pred = cross_val_predict(model, X_transformed, y, cv=kfold, method="predict")

            try: # Try to get probabilities; handle multi-class cases
                y_prob = cross_val_predict(model, X_transformed, y, cv=kfold, method="predict_proba")
                if y_prob.shape[1] > 2: # Multi-class
                    auc_roc = roc_auc_score(y, y_prob, multi_class='ovr', average='weighted')
                else: # Binary class
                    auc_roc = roc_auc_score(y, y_prob[:, 1], multi_class='ovr', average='weighted')
            except AttributeError: # Model doesn't have predict_proba
                auc_roc = None

            metrics = {
                "accuracy": accuracy_score(y, y_pred),
                "precision": precision_score(y, y_pred, average='weighted'),
                "recall": recall_score(y, y_pred, average='weighted'),
                "f1_score": f1_score(y, y_pred, average='weighted'),
                "auc_roc": auc_roc
            }
        else:  # Regression
            y_pred = cross_val_predict(model, X_transformed, y, cv=kfold)
            metrics = {
                "mse": mean_squared_error(y, y_pred),
                "rmse": np.sqrt(mean_squared_error(y, y_pred)),
                "mae": mean_absolute_error(y, y_pred),
                "r2": r2_score(y, y_pred)
            }

    else:
        # Separar treino, teste e validação *after* oversampling and feature selection
        if val_size > 0:
            X_train, X_temp, y_train, y_temp = train_test_split(X_transformed, y, test_size=test_size, random_state=42)
            X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=test_size, random_state=42)
            len_data = [len(X_train) / len(X_transformed), len(X_test) / len(X_transformed), len(X_val) / len(X_transformed)]
        else:
            X_train, X_test, y_train, y_test = train_test_split(X_transformed, y, test_size=test_size, random_state=42)
            len_data = [len(X_train) / len(X_transformed), len(X_test) / len(X_transformed), 0]

        model.set_params(**params)
        model.fit(X_train, y_train)  # Fit on the training data (no need for X_train_resampled)
        y_pred = model.predict(X_test)

        if model_type == "classification":
            metrics = {
                "accuracy": accuracy_score(y_test, y_pred),
                "precision": precision_score(y_test, y_pred, average='weighted'),
                "recall": recall_score(y_test, y_pred, average='weighted'),
                "f1_score": f1_score(y_test, y_pred, average='weighted'),
                "auc_roc": roc_auc_score(y_test, model.predict_proba(X_test), multi_class='ovr', average='weighted') if hasattr(model, "predict_proba") else None
            }
        else:  # Regression
            metrics = {
                "mse": mean_squared_error(y_test, y_pred),
                "rmse": np.sqrt(mean_squared_error(y_test, y_pred)),
                "mae": mean_absolute_error(y_test, y_pred),
                "r2": r2_score(y_test, y_pred)
            }


    with engine.connect() as connection:
        try:
            # Salvar model
            result = connection.execute(text("INSERT INTO Models (type, algorithm) VALUES (:type, :algorithm)"),
                                        {"type": 0 if model_type == "classification" else 1, "algorithm": algorithm})
            model_id = result.lastrowid

            # Salvar hyperparameters
            for name, value in params.items():
                connection.execute(text("INSERT INTO Hyperparameters (model_id, name, value) VALUES (:model_id, :name, :value)"),
                                 {"model_id": model_id, "name": name, "value": str(value)})

            # Salvar experiment
            result = connection.execute(text("INSERT INTO Experiments (model_id, dataset) VALUES (:model_id, :dataset)"),
                                        {"model_id": model_id, "dataset": dataset_name})
            experiment_id = result.lastrowid

            # Salvar metrics
            for metric, value in metrics.items():
                connection.execute(text("INSERT INTO Metrics (experiment_id, type, value) VALUES (:experiment_id, :type, :value)"),
                             {"experiment_id": experiment_id, "type": metric, "value": value})

            # Salvar LearnStrategies
            result = connection.execute(text("INSERT INTO LearnStrategies (model_id, preprocessing_type, data_sampling, type, len_data) VALUES (:model_id, :preprocessing_type, :data_sampling, :type, :len_data)"),
                            {"model_id": model_id, "preprocessing_type": ", ".join(preprocessing_strategies), "data_sampling": "None", "type": "Cross-Validation" if use_cross_val else "Hold-out", "len_data": json.dumps(len_data)})

            connection.commit()
            print(f"Modelo {algorithm} treinado e salvo com sucesso!")

        except Exception as e:
            connection.rollback()
            print(f"Erro ao salvar o modelo: {e}")
            raise

In [None]:
# train_and_save_model(dataset_name, model_type, algorithm, model, params={}, use_cross_val=False, test_size=0.2, val_size=0.0, cv=5, scaling = True, oversample=False, feature_selection=False, k=10)

# REGRESSÃO
# Hold-out
train_and_save_model("diabetes", "regression", "LinearRegression", LinearRegression(), {"fit_intercept": True}, test_size=0.2, val_size=0.0, scaling=True) 

train_and_save_model("diabetes", "regression", "DecisionTree", DecisionTreeRegressor(), {"max_depth": 3, "min_samples_split": 5}, use_cross_val=False, test_size=0.2, val_size=0.1, scaling=False, feature_selection=True)

train_and_save_model("california", "regression", "XGBoost", XGBRegressor(), {"n_estimators": 50, "max_depth": 3, "learning_rate": 0.05}, use_cross_val=False, test_size=0.3, val_size=0.05, scaling=True)

train_and_save_model("california", "regression", "RandomForest", RandomForestRegressor(), {"n_estimators": 75, "max_depth": 4}, use_cross_val=False, test_size=0.3, scaling=False, feature_selection=True)

# Cross-Validation
train_and_save_model("diabetes", "regression", "Ridge", Ridge(), {"alpha": 1.0}, use_cross_val=True, cv=10, scaling=True) 

train_and_save_model("california", "regression", "Lasso", Lasso(), {"alpha": 0.5}, use_cross_val=True, cv=5, scaling=False, feature_selection=True) 

train_and_save_model("california", "regression", "Lasso", Lasso(), {"alpha": 0.5}, use_cross_val=True, cv=5, scaling=True) 


# CLASSIFICAÇÃO
# Hold-out
train_and_save_model("iris", "classification", "Random Forest", RandomForestClassifier(), {"n_estimators": 50, "max_depth": 4}, test_size=0.2, scaling=True) 

train_and_save_model("iris", "classification", "SVM", SVC(probability=True), {"C": 1.0, "kernel": "rbf"}, test_size=0.3, scaling=True) 

train_and_save_model("iris", "classification", "KNeighbors", KNeighborsClassifier(), {"n_neighbors": 3}, use_cross_val=False, test_size=0.3, scaling=True, oversample=False)

# Cross-Validation
train_and_save_model("breast_cancer", "classification", "LogisticRegression", LogisticRegression(max_iter=1000), {"C": 0.1, "solver": 'liblinear'}, use_cross_val=True, cv=7, scaling=True) 

train_and_save_model("breast_cancer", "classification", "GaussianNB", GaussianNB(), {}, use_cross_val=True, cv=5, feature_selection=True) 

train_and_save_model("breast_cancer", "classification", "GaussianNB", GaussianNB(), {}, use_cross_val=True, cv=5, oversample=True) 

Modelo LinearRegression treinado e salvo com sucesso!
Modelo DecisionTree treinado e salvo com sucesso!
Modelo XGBoost treinado e salvo com sucesso!
Modelo RandomForest treinado e salvo com sucesso!
Modelo Ridge treinado e salvo com sucesso!
Modelo Lasso treinado e salvo com sucesso!
Modelo Lasso treinado e salvo com sucesso!
Modelo Random Forest treinado e salvo com sucesso!
Modelo SVM treinado e salvo com sucesso!
Modelo KNeighbors treinado e salvo com sucesso!
Modelo LogisticRegression treinado e salvo com sucesso!
Modelo GaussianNB treinado e salvo com sucesso!
Modelo GaussianNB treinado e salvo com sucesso!


## 3. Relatórios

### Apresentação Geral dos Dados

In [432]:
def fetch_models_with_strategies():
    with engine.connect() as connection:
        df = pd.read_sql(text("""
            SELECT m.algorithm, m.type,  e.dataset, ls.preprocessing_type,  ls.type AS strategy_type, ls.len_data
            FROM LearnStrategies ls
            JOIN Models m ON ls.model_id = m.id
            JOIN Experiments e ON ls.model_id = e.model_id
            ORDER BY e.dataset
        """), connection)
        return df

df_models_with_strategies = fetch_models_with_strategies()
df_models_with_strategies

# type = 0: Classificação, 1: Regressão

Unnamed: 0,algorithm,type,dataset,preprocessing_type,strategy_type,len_data
0,LogisticRegression,0,breast_cancer,Scaling,Cross-Validation,"[7, 0, 0]"
1,GaussianNB,0,breast_cancer,"Scaling, Feature Selection",Cross-Validation,"[5, 0, 0]"
2,GaussianNB,0,breast_cancer,"Scaling, Sampling",Cross-Validation,"[5, 0, 0]"
3,XGBoost,1,california,Scaling,Hold-out,"[0.7, 0.20998062015503877, 0.09001937984496124]"
4,RandomForest,1,california,Feature Selection,Hold-out,"[0.7, 0.3, 0]"
5,Lasso,1,california,Feature Selection,Cross-Validation,"[5, 0, 0]"
6,Lasso,1,california,Scaling,Cross-Validation,"[5, 0, 0]"
7,LinearRegression,1,diabetes,Scaling,Hold-out,"[0.7986425339366516, 0.2013574660633484, 0]"
8,DecisionTree,1,diabetes,Feature Selection,Hold-out,"[0.7986425339366516, 0.16063348416289594, 0.04..."
9,Ridge,1,diabetes,Scaling,Cross-Validation,"[10, 0, 0]"


In [433]:
def fetch_results():
    with engine.connect() as connection: 
        df = pd.read_sql(text("""
            SELECT e.dataset, m.id, m.algorithm, me.type, me.value 
            FROM Experiments e 
            JOIN Models m ON e.model_id = m.id
            JOIN Metrics me ON e.id = me.experiment_id
        """), connection)

        df_agg = df.groupby(['id','dataset', 'algorithm', 'type'])['value'].mean().reset_index()

        return df


df_results = fetch_results()
df_results


Unnamed: 0,dataset,id,algorithm,type,value
0,diabetes,1,LinearRegression,mse,2900.19
1,diabetes,1,LinearRegression,rmse,53.8534
2,diabetes,1,LinearRegression,mae,42.7941
3,diabetes,1,LinearRegression,r2,0.452603
4,diabetes,2,DecisionTree,mse,3622.81
5,diabetes,2,DecisionTree,rmse,60.1898
6,diabetes,2,DecisionTree,mae,48.6999
7,diabetes,2,DecisionTree,r2,0.362642
8,california,3,XGBoost,mse,0.434417
9,california,3,XGBoost,rmse,0.659103


In [434]:
def fetch_metrics_comparison():
    with engine.connect() as connection:
        df = pd.read_sql(text("""
            SELECT m.algorithm, e.dataset, m.type AS model_type, me.type AS metric_type, me.value
            FROM Metrics me
            JOIN Experiments e ON me.experiment_id = e.id
            JOIN Models m ON e.model_id = m.id
        """), connection)
        
        df['model_type'] = df['model_type'].map({0: 'Classificação', 1: 'Regressão'})
        return df

df_metrics_comparison = fetch_metrics_comparison()

# Dividir em tabelas de classificação e regressão
classification_metrics = df_metrics_comparison[df_metrics_comparison['model_type'] == 'Classificação']
regression_metrics = df_metrics_comparison[df_metrics_comparison['model_type'] == 'Regressão']

# Pivotar as tabelas para melhor visualização
classification_comparison = classification_metrics.pivot_table(index=['dataset', 'algorithm'], columns='metric_type', values='value').reset_index()
regression_comparison = regression_metrics.pivot_table(index=['dataset', 'algorithm'], columns='metric_type', values='value').reset_index()


In [435]:
regression_comparison

metric_type,dataset,algorithm,mae,mse,r2,rmse
0,california,Lasso,0.839875,1.141884,0.142442,1.064868
1,california,RandomForest,0.531847,0.516401,0.606563,0.718611
2,california,XGBoost,0.488687,0.434417,0.66284,0.659103
3,diabetes,DecisionTree,48.6999,3622.81,0.362642,60.1898
4,diabetes,LinearRegression,42.7941,2900.19,0.452603,53.8534
5,diabetes,Ridge,44.4578,3020.86,0.49057,54.9624


In [436]:
classification_comparison

metric_type,dataset,algorithm,accuracy,auc_roc,f1_score,precision,recall
0,breast_cancer,GaussianNB,0.935466,0.985035,0.935424,0.936435,0.935466
1,breast_cancer,LogisticRegression,0.975395,0.994345,0.975347,0.975402,0.975395
2,iris,KNeighbors,1.0,1.0,1.0,1.0,1.0
3,iris,Random Forest,1.0,1.0,1.0,1.0,1.0
4,iris,SVM,1.0,1.0,1.0,1.0,1.0


### Hiperparâmetros

In [442]:
# Função para buscar modelos e hiperparâmetros
def fetch_models_and_hyperparams():
    # Conectar ao banco de dados usando SQLAlchemy
    with engine.connect() as connection:
        # Executar consulta SQL para buscar datasets, algoritmos e hiperparâmetros
        df = pd.read_sql(text("""
            SELECT e.dataset, m.algorithm, h.name AS hyperparameter_name, h.value AS hyperparameter_value
            FROM Experiments e
            JOIN Models m ON e.model_id = m.id
            JOIN Hyperparameters h ON m.id = h.model_id
        """), connection)

        # Inicializar dicionário para armazenar resultados
        results = {}
        # Agrupar resultados por dataset e algoritmo
        for (dataset, algorithm), group in df.groupby(['dataset', 'algorithm']):
            # Concatenar hiperparâmetros em uma string
            hyperparams = "; ".join(f"{row['hyperparameter_name']} = {row['hyperparameter_value']}" for _, row in group.iterrows())
            # Armazenar hiperparâmetros no dicionário de resultados
            results[f"{dataset} ({algorithm})"] = f"{{ {hyperparams} }}"

        return results

# Buscar modelos e hiperparâmetros e armazenar em uma variável
models_and_hyperparams = fetch_models_and_hyperparams()

# Imprimir resultados
for model, hyperparams in models_and_hyperparams.items():
    print(f"{model} : {hyperparams}")


breast_cancer (LogisticRegression) : { C = 0.1; solver = liblinear }
california (Lasso) : { alpha = 0.5; alpha = 0.5 }
california (RandomForest) : { n_estimators = 75; max_depth = 4 }
california (XGBoost) : { n_estimators = 50; max_depth = 3; learning_rate = 0.05 }
diabetes (DecisionTree) : { max_depth = 3; min_samples_split = 5 }
diabetes (LinearRegression) : { fit_intercept = True }
diabetes (Ridge) : { alpha = 1.0 }
iris (KNeighbors) : { n_neighbors = 3 }
iris (Random Forest) : { n_estimators = 50; max_depth = 4 }
iris (SVM) : { C = 1.0; kernel = rbf }


## 4. Gerar Relatório

In [457]:
import os

# Gerar relatório HTML dos resultados
# Criar diretório para armazenar relatórios, se não existir
os.makedirs("reports", exist_ok=True)

# Gerar um nome de arquivo único para o relatório
def generate_unique_filename(base_name, extension=".html"):
    counter = 1
    filename = f"{base_name}{extension}"
    while os.path.exists(os.path.join("reports", filename)):
        filename = f"{base_name}_{counter}{extension}"
        counter += 1
    return filename

# Gerar nome de arquivo único para o relatório atual
report_filename = generate_unique_filename("report_models")

def fetch_models():
    with engine.connect() as connection:
        df = pd.read_sql(text("SELECT * FROM Models"), connection)
        return df

def fetch_hyperparameters():
    with engine.connect() as connection:
        df = pd.read_sql(text("SELECT * FROM Hyperparameters"), connection)
        return df

def fetch_learn_strategies():
    with engine.connect() as connection:
        df = pd.read_sql(text("SELECT * FROM LearnStrategies"), connection)
        return df

def fetch_experiments():
    with engine.connect() as connection:
        df = pd.read_sql(text("SELECT * FROM Experiments"), connection)
        return df

def fetch_metrics():
    with engine.connect() as connection:
        df = pd.read_sql(text("SELECT * FROM Metrics"), connection)
        return df

def generate_html_report():
    models = fetch_models()
    hyperparameters = fetch_hyperparameters()
    learn_strategies = fetch_learn_strategies()
    experiments = fetch_experiments()
    metrics = fetch_metrics()

    # Estilo CSS para o relatório
    css_styles = """
    <style>
        body { font-family: Arial, sans-serif; margin: 40px; padding: 20px; background-color: #f4f4f4; }
        h1 { text-align: center; color: #333; }
        h2 { border-bottom: 2px solid #2F4156; padding-bottom: 5px; color: #2F4156; }
        .container { max-width: 900px; margin: auto; background: white; padding: 20px; border-radius: 8px; box-shadow: 0px 0px 10px rgba(0,0,0,0.1); }
        table { width: 100%; border-collapse: collapse; margin-top: 10px; }
        th, td { padding: 10px; border: 1px solid #ddd; text-align: left; }
        th { background-color: #2F4156; color: white; }
        .hyperparams { font-style: italic; color: #555; }
        hr { border: 0; height: 1px; background: #ddd; margin: 20px 0; }
    </style>
    """

    # Início do HTML
    html_content = f"""
    <html>
    <head>
        <title>Relatório de Modelos</title>
        {css_styles}
    </head>
    <body>
        <div class='container'>
            <h1>Relatório de Modelos</h1>
    """

    # Agrupar modelos por dataset
    grouped_experiments = experiments.groupby('dataset')

    for dataset, group in grouped_experiments:
        html_content += f"<h2>Dataset: {dataset}</h2>"

        for _, experiment in group.iterrows():
            model_id = experiment['model_id']
            model = models[models['id'] == model_id].iloc[0]
            algorithm = model['algorithm']
            model_type = "Classificação" if model['type'] == 0 else "Regressão"

            html_content += f"""
                <h3>Modelo: {algorithm}</h3>
                <p><strong>Tipo:</strong> {model_type}</p>
            """

            # Adicionar hiperparâmetros
            model_hyperparams = hyperparameters[hyperparameters['model_id'] == model_id]
            if not model_hyperparams.empty:
                html_content += "<h4>Hiperparâmetros:</h4><ul>"
                for _, param in model_hyperparams.iterrows():
                    html_content += f"<li>{param['name']} = {param['value']}</li>"
                html_content += "</ul>"

            # Adicionar estratégias de aprendizado
            model_strategies = learn_strategies[learn_strategies['model_id'] == model_id]
            if not model_strategies.empty:
                html_content += "<h4>Estratégias de Aprendizado:</h4><ul>"
                for _, strategy in model_strategies.iterrows():
                    html_content += f"<li>{strategy['preprocessing_type']} - {strategy['data_sampling']} - {strategy['type']} - {strategy['len_data']}</li>"
                html_content += "</ul>"

            # Adicionar métricas
            model_metrics = metrics[metrics['experiment_id'] == experiment['id']]
            if not model_metrics.empty:
                html_content += "<h4>Métricas:</h4><ul>"
                for _, metric in model_metrics.iterrows():
                    html_content += f"<li>{metric['type']} = {metric['value']}</li>"
                html_content += "</ul>"

            html_content += "<hr>"

    # Fechando o HTML
    html_content += """
        </div>
    </body>
    </html>
    """

    with open(f"reports/{report_filename}", "w", encoding="utf-8") as file:
        file.write(html_content)

generate_html_report()
print("Relatório HTML gerado com sucesso!")

Relatório HTML gerado com sucesso!


In [458]:
engine.dispose()
print("Conexão com o banco de dados encerrada.")

Conexão com o banco de dados encerrada.
