## 1. Configuração Banco de Dados ##

###  Conexão com o Banco de Dados ###

In [29]:
from sqlalchemy import create_engine, text

In [4]:
USER = "your_user"
PASSWORD = "your_password"
HOST = "localhost"
DATABASE = "your_database" 

In [31]:
# Criar conexão usando SQLAlchemy
engine = create_engine(f"mysql+pymysql://{USER}:{PASSWORD}@{HOST}/{DATABASE}")

### Criação das Tabelas ###

In [59]:
with engine.connect() as connection:
    tables_to_drop = ["Metrics", "Experiments", "LearnStrategies", "Hyperparameters", "Models"]
    for table in tables_to_drop:
        connection.execute(text(f"DROP TABLE IF EXISTS {table};"))
    connection.commit() # Confirma a remoção das tabelas

In [60]:
tables = {
    "Models": """
    CREATE TABLE IF NOT EXISTS Models (
        id INT AUTO_INCREMENT PRIMARY KEY,
        type INT,  -- 0: Classificação, 1: Regressão
        algorithm VARCHAR(50)
    )
    """,
    
    "Hyperparameters": """
    CREATE TABLE IF NOT EXISTS Hyperparameters (
        id INT AUTO_INCREMENT PRIMARY KEY,
        model_id INT,
        name VARCHAR(50),
        value VARCHAR(50),
        FOREIGN KEY (model_id) REFERENCES Models(id) ON DELETE CASCADE
    )
    """,
    
    "LearnStrategies": """
    CREATE TABLE IF NOT EXISTS LearnStrategies (
        id INT AUTO_INCREMENT PRIMARY KEY,
        model_id INT NOT NULL,
        preprocessing_type VARCHAR(50) NOT NULL,  -- Scaling, Sampling, Feature Selection
        data_sampling VARCHAR(50) NOT NULL,  -- Undersampling, Oversampling, Stratified Sampling
        type VARCHAR(50) NOT NULL,  -- Cross-Validation, Hold-out
        len_data JSON NOT NULL,  -- Lista [Treino, Teste, Validação]
        FOREIGN KEY (model_id) REFERENCES Models(id) ON DELETE CASCADE
    )
    """,
    
    "Experiments": """
    CREATE TABLE IF NOT EXISTS Experiments (
        id INT AUTO_INCREMENT PRIMARY KEY,
        model_id INT,
        dataset VARCHAR(50),
        date_created TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
        FOREIGN KEY (model_id) REFERENCES Models(id) ON DELETE CASCADE
    )
    """,

    "Metrics": """
    CREATE TABLE IF NOT EXISTS Metrics (
        id INT AUTO_INCREMENT PRIMARY KEY,
        experiment_id INT,
        type VARCHAR(50),
        value FLOAT,
        FOREIGN KEY (experiment_id) REFERENCES Experiments(id) ON DELETE CASCADE
    )
    """
}

In [61]:
with engine.connect() as connection:
    for table_name, sql_query in tables.items():
        connection.execute(text(sql_query)) # Use text() here!
        print(f"Tabela {table_name} criada com sucesso! ✅")

print("Todas as tabelas foram criadas corretamente! 🎉")

Tabela Models criada com sucesso! ✅
Tabela Hyperparameters criada com sucesso! ✅
Tabela LearnStrategies criada com sucesso! ✅
Tabela Experiments criada com sucesso! ✅
Tabela Metrics criada com sucesso! ✅
Todas as tabelas foram criadas corretamente! 🎉


## 2.Funções de Treinamento e Avaliação ##

### Importação das Bibliotecas ###

In [62]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns 

In [63]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.svm import SVC, SVR
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from xgboost import XGBClassifier, XGBRegressor
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, mean_squared_error, mean_absolute_error, r2_score

### Carregar Datasets ###

In [64]:
def load_dataset(name):
    from sklearn.datasets import load_iris, load_breast_cancer, load_diabetes, fetch_california_housing
    
    if name == "iris":
        data = load_iris()
    elif name == "breast_cancer":
        data = load_breast_cancer()
    elif name == "diabetes":
        from sklearn.datasets import load_diabetes 
        data = load_diabetes()
    elif name == "california":
        data = fetch_california_housing()
    else:
        raise ValueError("Dataset não suportado")
    
    return pd.DataFrame(data.data, columns=data.feature_names), pd.Series(data.target)


### Função de Treinamento e Salvamento no BD ###

In [65]:
def train_and_save_model(dataset_name, model_type, algorithm, model, params={}):
    # Carregar dataset
    X, y = load_dataset(dataset_name)
    
    # Normalizar os dados
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Separar treino e teste
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

    # Treinar o modelo
    model.set_params(**params)
    model.fit(X_train, y_train)

    # Fazer previsões
    y_pred = model.predict(X_test)
    
    if model_type == "classification":
        metrics = {
            "accuracy": accuracy_score(y_test, y_pred),
            "precision": precision_score(y_test, y_pred, average='weighted'),
            "recall": recall_score(y_test, y_pred, average='weighted'),
            "f1_score": f1_score(y_test, y_pred, average='weighted'),
            "auc_roc": roc_auc_score(y_test, model.decision_function(X_test), multi_class='ovr') if hasattr(model, "decision_function") else None
        }
    else:  # Regression
        metrics = {
            "mse": mean_squared_error(y_test, y_pred),
            "rmse": np.sqrt(mean_squared_error(y_test, y_pred)),
            "mae": mean_absolute_error(y_test, y_pred),
            "r2": r2_score(y_test, y_pred)
        }

    with engine.connect() as connection:
        try:
            # Salvar model
            result = connection.execute(text("INSERT INTO Models (type, algorithm) VALUES (:type, :algorithm)"),
                                        {"type": 0 if model_type == "classification" else 1, "algorithm": algorithm})
            model_id = result.lastrowid

            # Salvar hyperparameters
            for name, value in params.items():
                connection.execute(text("INSERT INTO Hyperparameters (model_id, name, value) VALUES (:model_id, :name, :value)"),
                                 {"model_id": model_id, "name": name, "value": str(value)})

            # Salvar experiment
            result = connection.execute(text("INSERT INTO Experiments (model_id, dataset) VALUES (:model_id, :dataset)"),
                                        {"model_id": model_id, "dataset": dataset_name})
            experiment_id = result.lastrowid


            # Salvar metrics
            for metric, value in metrics.items():
                connection.execute(text("INSERT INTO Metrics (experiment_id, type, value) VALUES (:experiment_id, :type, :value)"),
                             {"experiment_id": experiment_id, "type": metric, "value": value})

            connection.commit()
            print(f"Modelo {algorithm} treinado e salvo com sucesso!")

        except Exception as e:
            connection.rollback()
            print(f"Erro ao salvar o modelo: {e}")
            raise

In [66]:
# Treinar e armazenar modelos
train_and_save_model("iris", "classification", "RandomForest", RandomForestClassifier(), {"n_estimators": 100})
train_and_save_model("breast_cancer", "classification", "SVM", SVC(probability=True), {"kernel": "linear"})
train_and_save_model("diabetes", "regression", "DecisionTree", DecisionTreeRegressor(), {"max_depth": 5})
train_and_save_model("california", "regression", "XGBoost", XGBRegressor(n_estimators=100, max_depth=5, learning_rate=0.1, subsample=0.7, colsample_bytree=0.7), {"n_estimators": 100})

Modelo RandomForest treinado e salvo com sucesso!
Modelo SVM treinado e salvo com sucesso!
Modelo DecisionTree treinado e salvo com sucesso!
Modelo XGBoost treinado e salvo com sucesso!


## 3. Relatórios

### Apresentação Geral dos Dados

In [67]:
def fetch_results():
    with engine.connect() as connection: 
        df = pd.read_sql(text("""
            SELECT e.dataset, m.algorithm, me.type, me.value 
            FROM Experiments e 
            JOIN Models m ON e.model_id = m.id
            JOIN Metrics me ON e.id = me.experiment_id
        """), connection)

        df_agg = df.groupby(['dataset', 'algorithm', 'type'])['value'].mean().reset_index() # Função de agregação

        return df_agg.pivot(index=['dataset', 'algorithm'], columns='type', values='value')


df_results = fetch_results()
df_results


Unnamed: 0_level_0,type,accuracy,auc_roc,f1_score,mae,mse,precision,r2,recall,rmse
dataset,algorithm,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
breast_cancer,SVM,0.95614,0.996397,0.956237,,,0.956488,,0.95614,
california,XGBoost,,,,0.329314,0.239564,,0.817184,,0.489453
diabetes,DecisionTree,,,,46.1525,3633.19,,0.314254,,60.2759
iris,RandomForest,1.0,,1.0,,,1.0,,1.0,


In [68]:
def fetch_detailed_results():
    with engine.connect() as connection:
        df = pd.read_sql(text("""
            SELECT e.dataset, m.algorithm, m.type AS model_type, me.type AS metric_type, me.value
            FROM Experiments e
            JOIN Models m ON e.model_id = m.id
            JOIN Metrics me ON e.id = me.experiment_id
        """), connection) 
        
        df_pivot = df.pivot_table(index=['dataset', 'algorithm', 'model_type'], columns='metric_type', values='value', aggfunc='mean').reset_index()
        return df_pivot

df_detailed_results = fetch_detailed_results()

df_detailed_results['model_type'] = df_detailed_results['model_type'].map({0: 'Classificação', 1: 'Regressão'})

classification_results = df_detailed_results[df_detailed_results['model_type'] == 'Classificação'].dropna(axis=1, how='all')
regression_results = df_detailed_results[df_detailed_results['model_type'] == 'Regressão'].dropna(axis=1, how='all')

In [69]:
classification_results


metric_type,dataset,algorithm,model_type,accuracy,auc_roc,f1_score,precision,recall
0,breast_cancer,SVM,Classificação,0.95614,0.996397,0.956237,0.956488,0.95614
3,iris,RandomForest,Classificação,1.0,,1.0,1.0,1.0


In [70]:
regression_results

metric_type,dataset,algorithm,model_type,mae,mse,r2,rmse
1,california,XGBoost,Regressão,0.329314,0.239564,0.817184,0.489453
2,diabetes,DecisionTree,Regressão,46.1525,3633.19,0.314254,60.2759


### Acrescentando um Novo Modelo

In [71]:
train_and_save_model("iris", "classification", "DecisionTree", DecisionTreeClassifier(criterion="entropy", max_depth=3), {"criterion": "entropy", "max_depth": 3})

Modelo DecisionTree treinado e salvo com sucesso!


In [72]:
df_detailed_results02 = fetch_detailed_results()

df_detailed_results02['model_type'] = df_detailed_results02['model_type'].map({0: 'Classificação', 1: 'Regressão'})

classification_results_02 = df_detailed_results02[df_detailed_results02['model_type'] == 'Classificação'].dropna(axis=1, how='all')
regression_results_02 = df_detailed_results02[df_detailed_results02['model_type'] == 'Regressão'].dropna(axis=1, how='all')

In [73]:
df_detailed_results02

metric_type,dataset,algorithm,model_type,accuracy,auc_roc,f1_score,mae,mse,precision,r2,recall,rmse
0,breast_cancer,SVM,Classificação,0.95614,0.996397,0.956237,,,0.956488,,0.95614,
1,california,XGBoost,Regressão,,,,0.329314,0.239564,,0.817184,,0.489453
2,diabetes,DecisionTree,Regressão,,,,46.1525,3633.19,,0.314254,,60.2759
3,iris,DecisionTree,Classificação,1.0,,1.0,,,1.0,,1.0,
4,iris,RandomForest,Classificação,1.0,,1.0,,,1.0,,1.0,


In [74]:
classification_results_02

metric_type,dataset,algorithm,model_type,accuracy,auc_roc,f1_score,precision,recall
0,breast_cancer,SVM,Classificação,0.95614,0.996397,0.956237,0.956488,0.95614
3,iris,DecisionTree,Classificação,1.0,,1.0,1.0,1.0
4,iris,RandomForest,Classificação,1.0,,1.0,1.0,1.0


In [75]:
regression_results_02

metric_type,dataset,algorithm,model_type,mae,mse,r2,rmse
1,california,XGBoost,Regressão,0.329314,0.239564,0.817184,0.489453
2,diabetes,DecisionTree,Regressão,46.1525,3633.19,0.314254,60.2759


In [76]:
# Função para buscar modelos e hiperparâmetros
def fetch_models_and_hyperparams():
    # Conectar ao banco de dados usando SQLAlchemy
    with engine.connect() as connection:
        # Executar consulta SQL para buscar datasets, algoritmos e hiperparâmetros
        df = pd.read_sql(text("""
            SELECT e.dataset, m.algorithm, h.name AS hyperparameter_name, h.value AS hyperparameter_value
            FROM Experiments e
            JOIN Models m ON e.model_id = m.id
            JOIN Hyperparameters h ON m.id = h.model_id
        """), connection)

        # Inicializar dicionário para armazenar resultados
        results = {}
        # Agrupar resultados por dataset e algoritmo
        for (dataset, algorithm), group in df.groupby(['dataset', 'algorithm']):
            # Concatenar hiperparâmetros em uma string
            hyperparams = "; ".join(f"{row['hyperparameter_name']} = {row['hyperparameter_value']}" for _, row in group.iterrows())
            # Armazenar hiperparâmetros no dicionário de resultados
            results[f"{dataset} ({algorithm})"] = f"{{ {hyperparams} }}"

        return results

# Buscar modelos e hiperparâmetros e armazenar em uma variável
models_and_hyperparams = fetch_models_and_hyperparams()

# Imprimir resultados
for model, hyperparams in models_and_hyperparams.items():
    print(f"{model} : {hyperparams}")


breast_cancer (SVM) : { kernel = linear }
california (XGBoost) : { n_estimators = 100 }
diabetes (DecisionTree) : { max_depth = 5 }
iris (DecisionTree) : { criterion = entropy; max_depth = 3 }
iris (RandomForest) : { n_estimators = 100 }


In [77]:
# Mostrar para cada modelo treinado o dataset, algoritmo, tipo de métrica e valor da métrica, assim como os hiperparâmetros utilizados para treinar o modelo.
def display_model_info():
	detailed_results = fetch_detailed_results()
	models_and_hyperparams = fetch_models_and_hyperparams()

	for index, row in detailed_results.iterrows():
		dataset = row['dataset']
		algorithm = row['algorithm']
		model_type = row['model_type']
		metrics = row.drop(['dataset', 'algorithm', 'model_type']).dropna().to_dict()
		hyperparams = models_and_hyperparams.get(f"{dataset} ({algorithm})", "{}")

		print(f"Dataset: {dataset}")
		print(f"Algorithm: {algorithm}")
		print(f"Model Type: {model_type}")
		print("Metrics:")
		for metric, value in metrics.items():
			print(f"  {metric}: {value}")
		print(f"Hyperparameters: {hyperparams}")
		print("-" * 50)

display_model_info()

Dataset: breast_cancer
Algorithm: SVM
Model Type: 0
Metrics:
  accuracy: 0.95614
  auc_roc: 0.996397
  f1_score: 0.956237
  precision: 0.956488
  recall: 0.95614
Hyperparameters: { kernel = linear }
--------------------------------------------------
Dataset: california
Algorithm: XGBoost
Model Type: 1
Metrics:
  mae: 0.329314
  mse: 0.239564
  r2: 0.817184
  rmse: 0.489453
Hyperparameters: { n_estimators = 100 }
--------------------------------------------------
Dataset: diabetes
Algorithm: DecisionTree
Model Type: 1
Metrics:
  mae: 46.1525
  mse: 3633.19
  r2: 0.314254
  rmse: 60.2759
Hyperparameters: { max_depth = 5 }
--------------------------------------------------
Dataset: iris
Algorithm: DecisionTree
Model Type: 0
Metrics:
  accuracy: 1.0
  f1_score: 1.0
  precision: 1.0
  recall: 1.0
Hyperparameters: { criterion = entropy; max_depth = 3 }
--------------------------------------------------
Dataset: iris
Algorithm: RandomForest
Model Type: 0
Metrics:
  accuracy: 1.0
  f1_score: 1

In [None]:
import os

# Gerar relatório HTML dos resultados
# Criar diretório para armazenar relatórios, se não existir
os.makedirs("reports", exist_ok=True)

# Gerar um nome de arquivo único para o relatório
def generate_unique_filename(base_name, extension=".html"):
    counter = 1
    filename = f"{base_name}{extension}"
    while os.path.exists(os.path.join("reports", filename)):
        filename = f"{base_name}_{counter}{extension}"
        counter += 1
    return filename

# Gerar nome de arquivo único para o relatório atual
report_filename = generate_unique_filename("report_results")

def generate_html_report():
    detailed_results = fetch_detailed_results()
    models_and_hyperparams = fetch_models_and_hyperparams()

    # Estilo CSS para o relatório
    css_styles = """
    <style>
        body { font-family: Arial, sans-serif; margin: 40px; padding: 20px; background-color: #f4f4f4; }
        h1 { text-align: center; color: #333; }
        h2 { border-bottom: 2px solid #2F4156; padding-bottom: 5px; color: #2F4156; }
        .container { max-width: 900px; margin: auto; background: white; padding: 20px; border-radius: 8px; box-shadow: 0px 0px 10px rgba(0,0,0,0.1); }
        table { width: 100%; border-collapse: collapse; margin-top: 10px; }
        th, td { padding: 10px; border: 1px solid #ddd; text-align: left; }
        th { background-color: #2F4156; color: white; }
        .hyperparams { font-style: italic; color: #555; }
        hr { border: 0; height: 1px; background: #ddd; margin: 20px 0; }
    </style>
    """

    # Início do HTML
    html_content = f"""
    <html>
    <head>
        <title>Relatório de Resultados</title>
        {css_styles}
    </head>
    <body>
        <div class='container'>
            <h1>Relatório de Resultados</h1>
    """

    # Agrupar os resultados por dataset
    grouped_results = detailed_results.groupby("dataset")

    # Iterar sobre os datasets agrupados
    for dataset, group in grouped_results:
        html_content += f"<h2>Dataset: {dataset}</h2>"

        for index, row in group.iterrows():
            algorithm = row['algorithm']
            model_type = "Classificação" if row['model_type'] == 0 else "Regressão"
            metrics = row.drop(['dataset', 'algorithm', 'model_type']).dropna().to_dict()
            hyperparams = models_and_hyperparams.get(f"{dataset} ({algorithm})", "{}")

            html_content += f"""
                <p><strong>Algorithm:</strong> {algorithm}</p>
                <p><strong>Model Type:</strong> {model_type}</p>
                
                <h3>Métricas:</h3>
                <table>
                    <tr><th>Métrica</th><th>Valor</th></tr>
            """

            for metric, value in metrics.items():
                html_content += f"<tr><td>{metric}</td><td>{value:.4f}</td></tr>"

            html_content += f"""
                </table>
                <p class='hyperparams'><strong>Hyperparameters:</strong> {hyperparams}</p>
                <hr>
            """

    # Fechando o HTML
    html_content += """
        </div>
    </body>
    </html>
    """


    with open(f"reports/{report_filename}", "w", encoding="utf-8") as file:
        file.write(html_content)

generate_html_report()
print("Relatório HTML gerado com sucesso!")

Relatório HTML gerado com sucesso!


In [None]:
engine.dispose()
print("Conexão com o banco de dados encerrada.")