Feature of Extraction Technique
* To Reduce Curse of Dimentionality
* Transform Features and take the most effective one

Benefits
* Faster Execution
* Visualizaiton




In [11]:
import numpy as np
import pandas as pd
import plotly.express as px
from sklearn.preprocessing import StandardScaler

In [12]:
np.random.seed(23)

# Data Creation
mu_vec1 = np.array([0, 0, 0])
cov_mat1 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
class1_sample = np.random.multivariate_normal(mu_vec1, cov_mat1, 20)

df = pd.DataFrame(class1_sample, columns=['feature1', 'feature2', 'feature3'])
df['target'] = 1

mu_vec2 = np.array([1, 1, 1])
cov_mat2 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
class2_sample = np.random.multivariate_normal(mu_vec2, cov_mat2, 20)

df1 = pd.DataFrame(class2_sample, columns=['feature1', 'feature2', 'feature3'])
df1['target'] = 0

df = pd.concat([df, df1], ignore_index=True)
df = df.sample(n=40)

In [13]:
df.head()

Unnamed: 0,feature1,feature2,feature3,target
2,-0.367548,-1.13746,-1.322148,1
34,0.177061,-0.598109,1.226512,0
14,0.420623,0.41162,-0.071324,1
11,1.968435,-0.547788,-0.679418,1
12,-2.50623,0.14696,0.606195,1


In [14]:
# Plotting

fig = px.scatter_3d(df, x='feature1', y='feature2', z='feature3', color='target')
fig.show()

In [15]:
# Step 1: Applying Standard Scaling

scaler = StandardScaler()
df.iloc[:, 0:3] = scaler.fit_transform(df.iloc[:, 0:3])

In [16]:
# Step 2: Finding Covariance Matrix

covariance_matrix = np.cov(df.iloc[:, 0:3], rowvar=False)

print(covariance_matrix)

[[1.02564103 0.20478114 0.080118  ]
 [0.20478114 1.02564103 0.19838882]
 [0.080118   0.19838882 1.02564103]]


In [17]:
# Step 3: Finding Eigen_Vectors and Eigen_Values

eigen_values, eigen_vectors = np.linalg.eig(covariance_matrix)
sorted_indices = np.argsort(eigen_values)[::-1]
pc = eigen_vectors[:, sorted_indices[:2]]


In [18]:
pc

array([[-0.53875915, -0.69363291],
       [-0.65608325, -0.01057596],
       [-0.52848211,  0.72025103]])

In [19]:
transformed_df = np.dot(df.iloc[:, 0:3], pc)

new_df = pd.DataFrame(transformed_df, columns=['PC1', 'PC2'])
new_df['target'] = df['target'].values
new_df.head()

Unnamed: 0,PC1,PC2,target
0,2.302877,-0.757593,1
1,0.524464,0.520221,0
2,0.380275,-0.459443,1
3,0.534262,-1.773773,1
4,1.610434,1.747819,1


In [20]:
fig = px.scatter(new_df, x='PC1', y='PC2', color='target')
fig.show()