# Principle Component Analysis

In [113]:
from sklearn.datasets import make_classification
import pandas as pd
import numpy as np

import plotly.express as px

In [114]:
# Create a dataset with 100 rows and 10 features
X, y = make_classification(n_samples=100, n_features=10, random_state=42)
# Convert to DataFrame
df = pd.DataFrame(X, columns=[f'Feature_{i+1}' for i in range(10)])
df['Target'] = y
df.head()  # Display first 5 rows

Unnamed: 0,Feature_1,Feature_2,Feature_3,Feature_4,Feature_5,Feature_6,Feature_7,Feature_8,Feature_9,Feature_10,Target
0,-1.140526,1.359706,0.861991,0.846092,0.60601,-1.556629,1.754794,1.696456,-1.280429,-2.081929,1
1,-0.078734,-1.329332,0.627375,-1.193006,-0.77301,0.097676,0.497998,0.959271,0.02451,1.451144,1
2,0.807427,0.730198,-1.28568,0.889484,-1.804882,-0.763259,0.048085,-0.904317,-1.627542,0.259723,0
3,0.588465,-0.375121,-0.575002,-0.149518,-0.563725,0.412931,0.243687,-0.506943,-0.82222,0.244967,0
4,1.636312,-1.640607,-1.360456,-0.941163,-1.430141,1.632411,0.130741,-1.435862,-0.440044,1.441273,0


In [115]:
# Compute mean-centered data
X_centered = X - np.mean(X, axis=0)
# Convert to DataFrame
df_centered = pd.DataFrame(X_centered, columns=[f'Feature_{i+1}' for i in range(10)])

In [116]:
df_cov = df_centered.cov() # Calculating Covariance-Matrix

In [117]:
eigen_value, eigen_vector = np.linalg.eig(df_cov) # Calculating Eigen_value and Eigen_vector from covariance-matrix

In [118]:
pc = eigen_vector[0:3] # taking 3 principle components

In [119]:
# projecting the data points on the principle component vector
Transformed_df = np.dot(df_centered.iloc[:,0:10], pc.T)
# Converting into dataframe and adding target feature
new_df = pd.DataFrame(Transformed_df, columns=("PC1","PC2","PC3"))
new_df["Target"]=df["Target"]

new_df

Unnamed: 0,PC1,PC2,PC3,Target
0,-1.123377,-0.340034,-0.391468,1
1,0.436500,-0.103307,1.213742,1
2,1.610428,0.019997,-0.026570,0
3,0.736708,-0.390563,0.211055,0
4,1.668823,-0.622251,0.454228,0
...,...,...,...,...
95,1.429839,-0.405846,0.306082,0
96,-0.921025,0.183197,-0.220400,1
97,0.012561,-1.662918,0.736076,1
98,-0.854596,1.084591,-0.021232,1


In [120]:
px.scatter_3d(new_df, x="PC1",y="PC2",z="PC3", color="Target")