In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from scipy import stats

from sklearn.model_selection import RepeatedStratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.svm import SVC, LinearSVC
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

df = pd.read_csv('C:\\Users\\joaov\\UFRJ\\2025.1\\IML\\Trabalho Final\\Trabalho-Final---IML\\Dry_Bean.csv')

In [3]:
X = df.drop('Class', axis=1)
y = df['Class']

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

cv_strategy = RepeatedStratifiedKFold(n_splits=10, n_repeats=30, random_state=42)

pipelines = {
    'SVC_RBF': Pipeline([
        ('scaler', StandardScaler()),
        ('svc', SVC(kernel='rbf', random_state=42))
    ]),
    'LinearSVC': Pipeline([
        ('scaler', StandardScaler()),
        ('linsvc', LinearSVC(max_iter=10000, random_state=42))
    ])
}

results = {}
print("Iniciando as avaliações do modelo de SVM...\n")
for name, pipe in pipelines.items():
    print(f"-> Avaliando o modelo: {name}")
    scores = cross_val_score(pipe, X, y_encoded, cv=cv_strategy, scoring='accuracy', n_jobs=-1)
    results[name] = scores
    mean_accuracy = np.mean(scores)
    std_accuracy = np.std(scores)
    confidence_interval = stats.t.interval(0.95, len(scores)-1, loc=mean_accuracy, scale=stats.sem(scores))
    print(f"{name} | Média: {mean_accuracy:.4f} | Desvio: {std_accuracy:.4f} | IC 95%: ({confidence_interval[0]:.4f}, {confidence_interval[1]:.4f})\n")

Iniciando as avaliações do modelo de SVM...

-> Avaliando o modelo: SVC_RBF
SVC_RBF | Média: 0.9296 | Desvio: 0.0064 | IC 95%: (0.9289, 0.9303)

-> Avaliando o modelo: LinearSVC
LinearSVC | Média: 0.9176 | Desvio: 0.0066 | IC 95%: (0.9168, 0.9183)

