In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from tabulate import tabulate
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import make_scorer,accuracy_score, precision_score, recall_score, f1_score

In [2]:
X_train = pd.read_csv('X_training.csv')
y_train = pd.read_csv('y_training.csv')
X_validation = pd.read_csv('X_validation.csv')
y_validation = pd.read_csv('y_validation.csv')
X_test = pd.read_csv('X_test.csv')
y_test = pd.read_csv('y_test.csv')

In [3]:
# Função para calcular as métricas
def calculate_metrics(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision =  precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score (y_true, y_pred, average='weighted')
    return [accuracy, precision, recall, f1]

In [4]:
# Função para imprimir os resultados em tabela formatada
def print_table(title, df):
    print(title)
    print(tabulate(df, headers='keys', tablefmt="grid"))
    print("\n")

In [5]:
X_train = np.array(X_train)
y_train = np.array(y_train).ravel()

X_validation = np.array(X_validation)
y_validation = np.array(y_validation).ravel()

X_test = np.array(X_test)
y_test = np.array(y_test).ravel()

In [14]:
# Definindo os modelos e os parâmetros 
models = {
    'K-Neighbors': {
        'model': KNeighborsClassifier(),
        'params': {
            'n_neighbors': 7
        }
    },
    'Decision Tree': {
        'model': DecisionTreeClassifier(),
        'params': {
            'max_depth': 25
        }
    },
    'Random Forest': {
        'model': RandomForestClassifier(),
        'params': {
            'n_estimators': 100,
            'max_depth': 25
        }
    },
    'Logistic Regression': {
        'model': LogisticRegression(),
        'params': {
            'C': 1.0,
            'solver': 'lbfgs',
            'max_iter': 100
        }
    }
}

In [15]:
# Função para treinar, avaliar e otimizar os modelos
def train_and_evalutate_models (models, X_train, y_train, X_validation, y_validation, X_test, y_test):
    results = []

    for name, model_info in models.items():
        model = model_info['model']
        params = model_info['params']

        #Treinando o modelo
        model.fit(X_train, y_train)

        # Previsões 
        y_train_pred = model.predict(X_train)
        y_validation_pred = model.predict(X_validation)
        y_test_pred = model.predict(X_test)

        # Calculando as métricas
        train_metrics = calculate_metrics(y_train, y_train_pred)
        validation_metrics = calculate_metrics(y_validation, y_validation_pred)
        test_metrics = calculate_metrics(y_test, y_test_pred)

        results.append({
            'Algoritmo': name,
            'Dados': 'Treinamento',
            'Accuracy': train_metrics[0],
            'Precision': train_metrics[1],
            'Recall': train_metrics[2],
            'F1-Score': train_metrics[3]
        })
        results.append({
            'Algoritmo': name,
            'Dados': 'Validação',
            'Accuracy': train_metrics[0],
            'Precision': train_metrics[1],
            'Recall': train_metrics[2],
            'F1-Score': train_metrics[3]
        })
        results.append({
            'Algoritmo': name,
            'Dados': 'Teste',
            'Accuracy': validation_metrics[0],
            'Precision': validation_metrics[1],
            'Recall': validation_metrics[2],
            'F1-Score': validation_metrics[3]
        })

    return results

In [16]:
# Executando a função para treinar e avaliar os modelos
results = train_and_evalutate_models(models, X_train, y_train, X_validation, y_validation, X_test, y_test)

# Criando um DataFrame para os resultados
results_df = pd.DataFrame(results)

print_table("Resultados dos modelos de classificação", results_df)

Resultados dos modelos de classificação
+----+---------------------+-------------+------------+-------------+----------+------------+
|    | Algoritmo           | Dados       |   Accuracy |   Precision |   Recall |   F1-Score |
|  0 | K-Neighbors         | Treinamento |   0.781562 |    0.78097  | 0.781562 |   0.781125 |
+----+---------------------+-------------+------------+-------------+----------+------------+
|  1 | K-Neighbors         | Validação   |   0.781562 |    0.78097  | 0.781562 |   0.781125 |
+----+---------------------+-------------+------------+-------------+----------+------------+
|  2 | K-Neighbors         | Teste       |   0.675665 |    0.674196 | 0.675665 |   0.674669 |
+----+---------------------+-------------+------------+-------------+----------+------------+
|  3 | Decision Tree       | Treinamento |   1        |    1        | 1        |   1        |
+----+---------------------+-------------+------------+-------------+----------+------------+
|  4 | Decision Tree

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
