## **First we gonna create a 3D dataframe with 3 features and one target column**

In [1]:
import numpy as np
import pandas as pd
np.random.seed(23)
mu_vec1 = np.array([0, 0, 0])
cov_mat1 = np.array([[1, 0, 0],[0, 1, 0],[0, 0, 1]])
class1_sample = np.random.multivariate_normal(mu_vec1, cov_mat1, 20)
df_class1 = pd.DataFrame(class1_sample,columns=['feature1', 'feature2', 'feature3'])
df_class1['target'] = 1
mu_vec2 = np.array([1, 1, 1])
cov_mat2 = np.array([[1, 0, 0],[0, 1, 0],[0, 0, 1]])
class2_sample = np.random.multivariate_normal(mu_vec2, cov_mat2, 20)
df_class2 = pd.DataFrame(class2_sample,columns=['feature1', 'feature2', 'feature3'])
df_class2['target'] = 0
df = pd.concat([df_class1, df_class2], ignore_index=True)
df = df.sample(frac=1, random_state=23).reset_index(drop=True)

In [2]:
df.head()

Unnamed: 0,feature1,feature2,feature3,target
0,-0.331617,-1.632386,0.619114,1
1,1.010229,1.43783,2.327788,0
2,0.241106,-0.95251,-0.136267,1
3,1.67686,4.187503,-0.080565,0
4,2.823378,-0.332863,2.637391,0


In [4]:
df.shape


(40, 4)

In [6]:
import plotly.express as px

fig= px.scatter_3d(x=df['feature1'], y=df['feature2'], z=df['feature3'], color=df['target'].astype('str'))
fig.show()

**Step 1: Applying Standard scaler**

In [None]:
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler();
df[:, 0:3]= scaler.fit_transform(df[:, 0:3])

**Step 2: Finding Covariance Matrix**

In [9]:
cov_m=np.cov([df.iloc[:, 0], df.iloc[:, 1], df.iloc[:, 2]])

In [10]:
cov_m

array([[1.34076734, 0.24528825, 0.10420627],
       [0.24528825, 1.12566959, 0.23643388],
       [0.10420627, 0.23643388, 1.327287  ]])

**Step 3: Finding Eigen values nad Eigen vectors**

In [11]:
eigen_values, eigen_vectors= np.linalg.eig(cov_m)

In [12]:
eigen_values

array([1.65691289, 1.22963722, 0.90717381])

In [13]:
eigen_vectors

array([[-0.61002583, -0.69239396,  0.3853039 ],
       [-0.53982115,  0.00718615, -0.84174906],
       [-0.58005311,  0.72148387,  0.37815264]])

# **Step 4: Now doing dot product**

(40,3).(3,2)

In [15]:
pc=eigen_vectors[0:2]
pc

array([[-0.61002583, -0.69239396,  0.3853039 ],
       [-0.53982115,  0.00718615, -0.84174906]])

In [16]:
transformed=np.dot(df.iloc[:, :3], pc.T)

In [18]:
new=pd.DataFrame(transformed, columns=['PC1','PC2'])
new['Target']=df['target']
new.head()

Unnamed: 0,PC1,PC2,Target
0,1.571096,-0.353855,1
1,-0.714905,-2.494424,0
2,0.459927,-0.022297,1
3,-3.953371,-0.807297,0
4,-0.475664,-3.746533,0
