# PCA Analysis


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

df = pd.read_csv("data/processed/heart_processed.csv")
X = df.drop(columns=['target'])
y = df['target']

scaler = StandardScaler()
Xs = scaler.fit_transform(X)

pca = PCA(n_components=min(Xs.shape[1], Xs.shape[0]))
X_pca = pca.fit_transform(Xs)

explained = pca.explained_variance_ratio_
cum_explained = explained.cumsum()

plt.figure(figsize=(8,5))
plt.plot(range(1, len(explained)+1), cum_explained, marker='o')
plt.xlabel("Number of Components")
plt.ylabel("Cumulative Explained Variance")
plt.grid(True)
plt.show()

import numpy as np
n_comp = np.argmax(cum_explained >= 0.95) + 1 if (cum_explained < 0.95).any() else len(explained)
print("Suggested components to retain (>=95%):", n_comp)

pca_df = pd.DataFrame(X_pca[:, :n_comp], columns=[f"PC{i+1}" for i in range(n_comp)])
pca_df['target'] = y.values
pca_df.to_csv("data/processed/heart_pca.csv", index=False)
print("PCA transformed data saved to data/processed/heart_pca.csv")

