In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
from sklearn import datasets
from models.Estatisticos.Discriminante_gaussiano.model import GaussianDiscriminant

In [18]:
df = pd.read_csv(r"C:\Users\jorge\OneDrive\Área de Trabalho\pattern-recognition-RP\data\breast.csv", header=None)

#### Separando X e Y e Treino/Teste

In [19]:
X, y = df.iloc[:, :-1], df.iloc[:, -1]

In [20]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [21]:
model = GaussianDiscriminant()

In [22]:
model.fit(X_train, y_train)

In [23]:
y_pred = model.predict(X_test)

In [24]:
y_pred.shape

(114,)

#### Calculando Métricas

In [25]:
metrics = model.calcule_metrics(y_pred=y_pred, y_real=y_test)
TP = metrics['TP']
TN = metrics['TN']
FP = metrics['FP']
FN = metrics['FN']
AC = metrics['AC']

accuracy = AC
precision = (TP / (TP + FP)) 
recall = (TP / (TP + FN)) 
f1_score = 2 * ((precision * recall) / (precision + recall))

In [26]:
confusion_matrix = np.array([[TN, FP],
                             [FN, TP]])

confusion_matrix_df = pd.DataFrame(confusion_matrix, columns=["0", "1"],
                                                    index=["0", "1"])
confusion_matrix_df

Unnamed: 0,0,1
0,43,0
1,12,59


## Aplicando validação cruzada em 10 folds

In [27]:
kf = KFold(n_splits=10, shuffle=True, random_state=42)

accuracies = []
precisions = []
recalls = []
f1_scores = []

In [28]:
for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    results = model.calcule_metrics(y_real=y_test, y_pred=y_pred)
    TP = results['TP']
    TN = results['TN']
    FP = results['FP']
    FN = results['FN']
    AC = results['AC']

    accuracy = AC
    precision = (TP / (TP + FP)) 
    recall = (TP / (TP + FN)) 
    f1_score = 2 * ((precision * recall) / (precision + recall))
    
    accuracies.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)
    f1_scores.append(f1_score)


In [29]:
metrics = {
    "Accuracy": (np.mean(accuracies), np.std(accuracies)),
    "Precision": (np.mean(precisions), np.std(precisions)),
    "Recall": (np.mean(recalls), np.std(recalls)),
    "F1-Score": (np.mean(f1_scores), np.std(f1_scores)),
}

In [30]:
for metric, (mean, std) in metrics.items():
    print(f"{metric}: Média = {mean:.4f}, Desvio Padrão = {std:.4f}")

Accuracy: Média = 0.8822, Desvio Padrão = 0.0362
Precision: Média = 0.9926, Desvio Padrão = 0.0148
Recall: Média = 0.8200, Desvio Padrão = 0.0478
F1-Score: Média = 0.8972, Desvio Padrão = 0.0292
