In [None]:
# 1. Import Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA


In [None]:
# Manually Create Dataset
# samples with 3 features
data = np.array([
    [2.5, 2.4, 0.5],
    [0.5, 0.7, 0.2],
    [2.2, 2.9, 0.4],
    [1.9, 2.2, 0.3],
    [3.1, 3.0, 0.9],
    [2.3, 2.7, 0.6]
])

In [None]:
labels = np.array([0, 1, 0, 1, 0, 1])  # dummy labels for visualization

df = pd.DataFrame(data, columns=['Feature_1', 'Feature_2', 'Feature_3'])
df['Label'] = labels
print("Original Dataset:\n", df)

# 3. Standardize the Features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df.drop('Label', axis=1))

# 4. Apply PCA to reduce to 2 components
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

# 5. Create a new DataFrame for PCA output
pca_df = pd.DataFrame(X_pca, columns=['PC1', 'PC2'])
pca_df['Label'] = labels

# 6. Visualize Original vs PCA-reduced Data
plt.figure(figsize=(12, 5))


In [None]:
# Original (First two features)
plt.subplot(1, 2, 1)
plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=labels, cmap='coolwarm', edgecolor='k')
plt.title("Original Data (Feature 1 vs Feature 2)")
plt.xlabel("Feature 1 (scaled)")
plt.ylabel("Feature 2 (scaled)")

# PCA
plt.subplot(1, 2, 2)
plt.scatter(pca_df['PC1'], pca_df['PC2'], c=pca_df['Label'], cmap='coolwarm', edgecolor='k')
plt.title("Data after PCA (2 Components)")
plt.xlabel("Principal Component 1")
plt.ylabel("Principal Component 2")

plt.tight_layout()
plt.show()

# 7. Explained Variance
print("Explained Variance Ratio:", pca.explained_variance_ratio_)
print("Total Explained Variance:", np.sum(pca.explained_variance_ratio_))