In [1]:
from sklearn.datasets import load_iris, fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import numpy as np




In [10]:
def run_pipeline(X, y, dataset_name):
    print(f"\n### {dataset_name} ###")

    # Normaliza os dados
    X = StandardScaler().fit_transform(X)

    # Divide em treino e teste
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    # PCA com 5 componentes
    pca = PCA(n_components=3)
    X_train_pca = pca.fit_transform(X_train)
    X_test_pca = pca.transform(X_test)

    for clf in [RandomForestClassifier(), SVC()]:
        # Sem PCA
        clf.fit(X_train, y_train)
        acc_original = accuracy_score(y_test, clf.predict(X_test))

        # Com PCA
        clf.fit(X_train_pca, y_train)
        acc_pca = accuracy_score(y_test, clf.predict(X_test_pca))

        print(f"{clf.__class__.__name__} - Sem PCA: {acc_original:.4f} | Com PCA (2 comp.): {acc_pca:.4f}")



In [11]:
# IRIS
iris = load_iris()
run_pipeline(iris.data, iris.target, "Iris")

# MNIST (reduzido para 10.000 amostras para desempenho)
print("\nCarregando MNIST...")
mnist = fetch_openml('mnist_784', version=1, as_frame=False)
X_mnist, y_mnist = mnist.data, mnist.target.astype(int)
X_sample, _, y_sample, _ = train_test_split(X_mnist, y_mnist, train_size=10000, stratify=y_mnist, random_state=42)

run_pipeline(X_sample, y_sample, "MNIST (10k amostras)")


### Iris ###
RandomForestClassifier - Sem PCA: 1.0000 | Com PCA (2 comp.): 0.9778
SVC - Sem PCA: 1.0000 | Com PCA (2 comp.): 1.0000

Carregando MNIST...

### MNIST (10k amostras) ###
RandomForestClassifier - Sem PCA: 0.9433 | Com PCA (2 comp.): 0.5270
SVC - Sem PCA: 0.9307 | Com PCA (2 comp.): 0.5570


É notável que o dataset mnist não reage bem ao pca, fazendo a acurácia cair cerca de 30%
