In [229]:
import pandas as pd
import numpy as np 

In [230]:
np.random.seed(23)

muVec1 = np.array([0, 0, 0])
covMat1 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
class1Sample = np.random.multivariate_normal(muVec1, covMat1, 20)

df = pd.DataFrame(class1Sample, columns=["feature1", "feature2", "feature3"])
df["target"] = 1
muVec2 = np.array([1, 1, 1])
covMat2 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
class2Sample = np.random.multivariate_normal(muVec1, covMat1, 20)

df1 = pd.DataFrame(class2Sample, columns=["feature1", "feature2", "feature3"])
df1["target"] = 0
df = pd.concat([df, df1], ignore_index=True)
df = df.sample(40)

In [231]:
df.head()

Unnamed: 0,feature1,feature2,feature3,target
2,-0.367548,-1.13746,-1.322148,1
34,-0.822939,-1.598109,0.226512,0
14,0.420623,0.41162,-0.071324,1
11,1.968435,-0.547788,-0.679418,1
12,-2.50623,0.14696,0.606195,1


In [232]:
import plotly.express as px

px.scatter_3d(x=df["feature1"], y=df["feature2"], z=df["feature3"],color=df["target"])

### Apply Principle Component Analysis (PCA)

In [233]:
# step 1 - standard scaling
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
df.iloc[:, :3] = scaler.fit_transform(df.iloc[:, :3])

In [234]:
# step 2 - Find Covariance Matrix
covarianceMatrix = np.cov([df.iloc[:, 0], df.iloc[:,1], df.iloc[:,2]])
covarianceMatrix

array([[ 1.02564103,  0.06781177, -0.12497686],
       [ 0.06781177,  1.02564103, -0.15241116],
       [-0.12497686, -0.15241116,  1.02564103]])

In [235]:
# step 3 - Find Eigen Vector and Eigen Values
eigenValues, eigenVector = np.linalg.eig(covarianceMatrix)

In [236]:
eigenValues

array([1.25911792, 0.95953081, 0.85827434])

In [237]:
eigenVector

array([[-0.51038783, -0.78846385,  0.34326234],
       [-0.569092  ,  0.60894401,  0.55255904],
       [ 0.64470037, -0.08667156,  0.75950607]])

In [238]:
# For 2D Choose 2 Eigen Vectoe with Maximum eigen values
pc = eigenVector[0:2]
pc

array([[-0.51038783, -0.78846385,  0.34326234],
       [-0.569092  ,  0.60894401,  0.55255904]])

In [239]:
# Transform The Data From 3D to 2D
new2DDf = df.iloc[:,:3].dot(pc.T)
new2DDf.rename(columns={0:"PC1",1:"PC2"}, inplace=True)
new2DDf["target"] =  df["target"]
new2DDf.head()

Unnamed: 0,PC1,PC2,target
2,0.560815,-1.471948,1
34,1.740278,-0.6434,0
14,-0.706342,-0.080489,1
11,-0.786474,-1.876564,1
12,1.105696,1.622028,1


In [240]:
px.scatter(x=new2DDf.iloc[:,0],y=new2DDf.iloc[:,1], color=new2DDf.iloc[:,2])

In [241]:
# Similarly From 3D to 1D
pc = eigenVector[1]
new1DDF = pd.DataFrame(df.iloc[:, :3].dot(pc.T))
new1DDF.rename(columns={0: "PC1"}, inplace=True)
new1DDF["target"] = df["target"]
new1DDF.head()

Unnamed: 0,PC1,target
2,-1.471948,1
34,-0.6434,0
14,-0.080489,1
11,-1.876564,1
12,1.622028,1


In [242]:
px.scatter(x=new1DDF.iloc[:,0], color=new1DDF.iloc[:,1])