In [1]:
import pickle

from sklearn.decomposition import KernelPCA, PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split

In [None]:
# Abre o arquivo com os dados salvos
with open("cover_type.pkl", "rb") as f:
    X_cover_treinamento, y_cover_treinamento, X_cover_teste, y_cover_teste = pickle.load(f)

In [None]:
# Junta os dados de treino em uma única variável (X e y)
X_cover, y_cover = X_cover_treinamento, y_cover_treinamento

# Separa 25% dos dados para teste e o restante para treino
X_treino, X_teste, y_treino, y_teste = train_test_split(
    X_cover,
    y_cover,
    test_size=0.25,
    random_state=42,
)

In [None]:
# Criando modelo PCA com 6 componentes
pca = PCA(n_components = 6)
X_train_pca = pca.fit_transform(X_treino)
X_test_pca = pca.transform(X_teste)

In [None]:
# Mostrando a porcentagem da variância explicada por cada componente principal
pca.explained_variance_ratio_

array([0.22618668, 0.19513855, 0.15016139, 0.10259524, 0.06892279,
       0.05111358])

In [None]:
# Soma da variância total explicada pelos 6 componentes principais
pca.explained_variance_ratio_.sum()

np.float64(0.7941182271065695)

In [None]:
# Criando o classificador Random Forest com 40 árvores
tree_classifier = RandomForestClassifier(n_estimators = 40, random_state = 42)
tree_classifier.fit(X_train_pca, y_treino)
y_pred = tree_classifier.predict(X_test_pca)
accuracy = accuracy_score(y_teste, y_pred)
print(f"Accuracy: {accuracy:.4f}")

Accuracy: 0.7003


### Kernel PCA

In [None]:
# Criando modelo Kernel PCA com 6 componentes
kpca = KernelPCA(n_components = 6)
X_train_kpca = kpca.fit_transform(X_treino)
X_test_kpca = kpca.transform(X_teste)
tree_classifier_kpca = RandomForestClassifier(n_estimators = 40, random_state = 42)
tree_classifier_kpca.fit(X_train_kpca, y_treino)
y_pred_kpca = tree_classifier_kpca.predict(X_test_kpca)
accuracy_kpca = accuracy_score(y_teste, y_pred_kpca)
print(f"Accuracy with Kernel PCA: {accuracy_kpca:.4f}")

Accuracy with Kernel PCA: 0.6987


### LDA

In [None]:
# Criando modelo LDA com 6 componentes (ajuste para <= número de classes -1)
lda = LinearDiscriminantAnalysis(n_components=6)
X_train_lda = lda.fit_transform(X_treino, y_treino)
X_test_lda = lda.transform(X_teste)
tree_classifier_lda = RandomForestClassifier(n_estimators=40, random_state=42)
tree_classifier_lda.fit(X_train_lda, y_treino)
y_pred_lda = tree_classifier_lda.predict(X_test_lda)
accuracy_lda = accuracy_score(y_teste, y_pred_lda)
print(f"Accuracy with LDA: {accuracy_lda:.4f}")

Accuracy with LDA: 0.7269
