# PCA

### Part 1

In [42]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

X, y = make_classification(n_samples=1000, n_features=5, n_informative=4, n_redundant=0, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### part 2


finding pca with keras

In [43]:
from sklearn.decomposition import PCA

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)
print(X_pca)

X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

[[-1.97471825 -1.19599013]
 [-0.38192708 -2.14240783]
 [-0.30736637 -0.92453537]
 ...
 [-2.43734377 -2.35464095]
 [-0.8178059   1.52365952]
 [-2.29748854 -0.22950187]]


without

In [44]:
import numpy as np

X_centered = X - np.mean(X, axis=0)
cov_matrix = np.cov(X_centered, rowvar=False)

eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

sorted_indices = np.argsort(eigenvalues)[::-1]
top_2_eigenvectors = eigenvectors[:, sorted_indices[:2]]

X_pca_manual = X_centered @ top_2_eigenvectors


### Part 3

In [45]:
# variance to preserve  80%
pca = PCA(n_components=0.80)
X_pca_var = pca.fit_transform(X)

# variance preserved
explained_variance = pca.explained_variance_ratio_
print(explained_variance)

[0.41610784 0.27105291 0.14126091]


### Part 4

In [46]:
from sklearn.neural_network import MLPClassifier

# train NN on original dataset
nn_original = MLPClassifier( max_iter=500)
nn_original.fit(X_train, y_train)
accuracy_original = nn_original.score(X_test, y_test)




In [47]:
# train NN on PCA-transformed data
X_train_pca, X_test_pca, _, _ = train_test_split(X_pca, y, test_size=0.2, random_state=42)
nn_pca = MLPClassifier(max_iter=500)
nn_pca.fit(X_train_pca, y_train)
accuracy_pca = nn_pca.score(X_test_pca, y_test)


### Part 5

In [None]:
from sklearn.decomposition import KernelPCA
from sklearn.metrics import accuracy_score

kernels = ["linear", "rbf", "sigmoid"]
kernel_results = {}

for kernel in kernels:
    kpca = KernelPCA(
        kernel=kernel,
        n_components=2,
        gamma=0.1,
        coef0=1.0 if kernel == "sigmoid" else 0.0  
    )
    X_train_kpca = kpca.fit_transform(X_train)
    X_test_kpca = kpca.transform(X_test)
    
    nn_kpca = MLPClassifier(hidden_layer_sizes=(32,), max_iter=500, random_state=42)
    nn_kpca.fit(X_train_kpca, y_train)
    
    y_pred_kpca = nn_kpca.predict(X_test_kpca)
    accuracy_kpca = accuracy_score(y_test, y_pred_kpca)
    kernel_results[kernel] = accuracy_kpca
    print(f"Accuracy with {kernel} kernel: {accuracy_kpca:.4f}")

Accuracy with linear kernel: 0.8150
Accuracy with rbf kernel: 0.7200
Accuracy with sigmoid kernel: 0.8200


### Part 6

In [49]:
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV

pipeline = Pipeline([
    ("kpca", KernelPCA()),
    ("nn", MLPClassifier(random_state=42, max_iter=500))
])

# hyperparameter grid
param_grid = {
    "kpca__kernel": ["linear", "rbf", "sigmoid"],
    "kpca__gamma": [0.01, 0.1, 1, 10],
    "nn__hidden_layer_sizes": [(50,), (100,), (50, 50)],
    "nn__alpha": [0.0001, 0.001, 0.01],
    "nn__learning_rate_init": [0.001, 0.01, 0.1]
}

#grid search
grid_search = GridSearchCV(pipeline, param_grid, cv=3, scoring="accuracy")
grid_search.fit(X_train, y_train)

# best parameters and score
best_params = grid_search.best_params_
best_score = grid_search.best_score_


324 fits failed out of a total of 972.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
27 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\Jean\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\Jean\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "c:\Users\Jean\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\pipeline.py", line 471, in fit
    Xt = self._fit(X, y, routed_params)
  File "c:\Users\Jean\AppData\Local\Programs\Python\Python3