In [4]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier

seed = 9

# Load Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Decision Tree Classifier
dt_classifier = DecisionTreeClassifier(random_state=seed)
# K-Nearest Neighbors Classifier
knn_classifier = KNeighborsClassifier(n_neighbors=5)

In [5]:
from sklearn.metrics import accuracy_score

# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=seed, stratify=y) #stratify equilibra


dt_classifier.fit(X_train, y_train)
y_predictions_dt = dt_classifier.predict(X_test)
print("DT Accuracy:", accuracy_score(y_test, y_predictions_dt))

knn_classifier.fit(X_train, y_train)
y_predictions_knn = knn_classifier.predict(X_test)
print("KNN Accuracy:", accuracy_score(y_test, y_predictions_knn))

DT Accuracy: 0.9333333333333333
KNN Accuracy: 0.9555555555555556


In [6]:
# Cross-validation scores
dt_cv_scores = cross_val_score(dt_classifier, X, y, cv=5)
knn_cv_scores = cross_val_score(knn_classifier, X, y, cv=5)

print("DT CV Scores:", dt_cv_scores)
print("DT CV mean:", np.mean(dt_cv_scores))
print("\nKNN CV Scores:", knn_cv_scores)
print("KNN CV mean:", np.mean(knn_cv_scores))

DT CV Scores: [0.96666667 0.96666667 0.9        0.96666667 1.        ]
DT CV mean: 0.9600000000000002

KNN CV Scores: [0.96666667 1.         0.93333333 0.96666667 1.        ]
KNN CV mean: 0.9733333333333334


In [7]:
from sklearn.metrics import precision_score, recall_score, mean_squared_error


dt_precision = precision_score(y_test, y_predictions_dt, average='macro')
knn_precision = precision_score(y_test, y_predictions_knn, average='macro')

dt_recall = recall_score(y_test, y_predictions_dt, average='macro')
knn_recall = recall_score(y_test, y_predictions_knn, average='macro')

dt_mse = mean_squared_error(y_test, y_predictions_dt)
knn_mse = mean_squared_error(y_test, y_predictions_knn)

print("\nDecision Tree Classifier Metrics:")
print("Precision:", dt_precision)
print("Recall:", dt_recall)
print("Mean Squared Absolute Error:", dt_mse)

print("\nK-Nearest Neighbors Classifier Metrics:")
print("Precision:", knn_precision)
print("Recall:", knn_recall)
print("Mean Squared Absolute Error:", knn_mse)



Decision Tree Classifier Metrics:
Precision: 0.9345238095238096
Recall: 0.9333333333333332
Mean Squared Absolute Error: 0.06666666666666667

K-Nearest Neighbors Classifier Metrics:
Precision: 0.9555555555555556
Recall: 0.9555555555555556
Mean Squared Absolute Error: 0.044444444444444446


# SVM Suport Vector Machine
## Exxercicio usando a iris para comparar os kernels

In [8]:
from sklearn.svm import SVC

# Modelos SVM com diferentes kernels
svm_linear_model = SVC(kernel='linear')
svm_poly_model = SVC(kernel='poly')
svm_rbf_model = SVC(kernel='rbf')

# Treinamento e avaliação para o protocolo de resubstituição
svm_linear_model.fit(X_train, y_train)
svm_poly_model.fit(X_train, y_train)
svm_rbf_model.fit(X_train, y_train)

svm_linear_pred = svm_linear_model.predict(X_test)
svm_poly_pred = svm_poly_model.predict(X_test)
svm_rbf_pred = svm_rbf_model.predict(X_test)

svm_linear_acc = accuracy_score(y_test, svm_linear_pred)
svm_poly_acc = accuracy_score(y_test, svm_poly_pred)
svm_rbf_acc = accuracy_score(y_test, svm_rbf_pred)

print("Acurácia (ACC) - SVM com diferentes kernels - Resubstituição:")
print("Linear kernel:", svm_linear_acc)
print("Polynomial kernel:", svm_poly_acc)
print("RBF kernel:", svm_rbf_acc)



Acurácia (ACC) - SVM com diferentes kernels - Resubstituição:
Linear kernel: 0.9555555555555556
Polynomial kernel: 0.9555555555555556
RBF kernel: 0.9333333333333333
