In [69]:
import timeit
from sklearn.datasets import fetch_openml
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report

In [70]:
# Carregar o dataset MNIST
mnist = fetch_openml('mnist_784', version=1, parser='auto')
mnist.keys()

dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])

In [71]:
X, y = mnist["data"], mnist["target"]
print(X.shape)
print(y.shape)

(70000, 784)
(70000,)


In [72]:
# Dividir em treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ============================================================
# Modelo 1: Random Forest no conjunto original
# ============================================================

In [73]:
# Criar o modelo Random Forest
rf1 = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)

In [74]:
# Tempo de treino
start = timeit.default_timer()
rf1.fit(X_train, y_train)
end = timeit.default_timer()
tempo_treino1 = end - start

In [75]:
# Tempo de previsão
start = timeit.default_timer()
y_pred1 = rf1.predict(X_test)
end = timeit.default_timer()
tempo_pred1 = end - start

In [76]:
# acc1 = accuracy_score(y_test, y_pred1)

print("\n=== Random Forest sem PCA ===")
print(f"Tempo treino: {tempo_treino1:.2f} s")
print(f"Tempo previsão: {tempo_pred1:.2f} s")
print("\n=== Relatório sem PCA ===")
print(classification_report(y_test, y_pred1))


=== Random Forest sem PCA ===
Tempo treino: 39.46 s
Tempo previsão: 0.47 s

=== Relatório sem PCA ===
              precision    recall  f1-score   support

           0       0.98      0.99      0.99      1343
           1       0.98      0.98      0.98      1600
           2       0.95      0.97      0.96      1380
           3       0.96      0.95      0.96      1433
           4       0.96      0.97      0.97      1295
           5       0.97      0.96      0.97      1273
           6       0.98      0.98      0.98      1396
           7       0.97      0.97      0.97      1503
           8       0.96      0.95      0.96      1357
           9       0.96      0.95      0.95      1420

    accuracy                           0.97     14000
   macro avg       0.97      0.97      0.97     14000
weighted avg       0.97      0.97      0.97     14000



# ============================================================
# Modelo 2: Random Forest com PCA (0.95 da variância explicada)
# ============================================================

In [81]:
# 2. Aplicar PCA
pca = PCA(n_components=0.95)  # manter 95% da variância
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

In [82]:
# X_train_pca = pca.transform(X_train)
# X_test_pca = pca.transform(X_test)

print(f"\nPCA reduziu de {X_train.shape[1]} para {X_train_pca.shape[1]} dimensões")


PCA reduziu de 784 para 154 dimensões


In [83]:
# Treinar no novo conjunto
rf2 = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)

In [84]:
start = timeit.default_timer()
rf2.fit(X_train_pca, y_train)
end = timeit.default_timer()
tempo_treino2 = end - start

In [85]:
# Aplicar no teste
start = timeit.default_timer()
y_pred2 = rf2.predict(X_test_pca)
end = timeit.default_timer()
tempo_pred2 = end - start

In [86]:
#acc2 = accuracy_score(y_test, y_pred2)

print("\n=== Random Forest com PCA (0.95) ===")
print(f"Tempo treino: {tempo_treino2:.2f} s")
print(f"Tempo previsão: {tempo_pred2:.2f} s")
print("\n=== Relatório com PCA ===")
print(classification_report(y_test, y_pred2))


=== Random Forest com PCA (0.95) ===
Tempo treino: 123.43 s
Tempo previsão: 0.32 s

=== Relatório com PCA ===
              precision    recall  f1-score   support

           0       0.97      0.98      0.97      1343
           1       0.98      0.98      0.98      1600
           2       0.94      0.95      0.94      1380
           3       0.91      0.94      0.92      1433
           4       0.93      0.96      0.94      1295
           5       0.94      0.93      0.93      1273
           6       0.97      0.97      0.97      1396
           7       0.96      0.95      0.95      1503
           8       0.94      0.90      0.92      1357
           9       0.94      0.92      0.93      1420

    accuracy                           0.95     14000
   macro avg       0.95      0.95      0.95     14000
weighted avg       0.95      0.95      0.95     14000



# ============================================================
# Comparação final
# ============================================================

In [87]:
print("\n=== Comparação Final ===")
print("\n=== Relatório sem PCA ===")
print(classification_report(y_test, y_pred1))
print("\n=== Relatório com PCA ===")
print(classification_report(y_test, y_pred2))
print(f"Tempo treino sem PCA: {tempo_treino1:.2f}s | com PCA: {tempo_treino2:.2f}s")
print(f"Tempo previsão sem PCA: {tempo_pred1:.2f}s | com PCA: {tempo_pred2:.2f}s")


=== Comparação Final ===

=== Relatório sem PCA ===
              precision    recall  f1-score   support

           0       0.98      0.99      0.99      1343
           1       0.98      0.98      0.98      1600
           2       0.95      0.97      0.96      1380
           3       0.96      0.95      0.96      1433
           4       0.96      0.97      0.97      1295
           5       0.97      0.96      0.97      1273
           6       0.98      0.98      0.98      1396
           7       0.97      0.97      0.97      1503
           8       0.96      0.95      0.96      1357
           9       0.96      0.95      0.95      1420

    accuracy                           0.97     14000
   macro avg       0.97      0.97      0.97     14000
weighted avg       0.97      0.97      0.97     14000


=== Relatório com PCA ===
              precision    recall  f1-score   support

           0       0.97      0.98      0.97      1343
           1       0.98      0.98      0.98      160