In [1]:
import numpy as np
import pandas as pd

In [3]:
## Generate Data

In [6]:
np.random.seed(23)

#******************************************************************************
vec1 = np.array([0,0,0])
cov_mat1 = np.array([[1,0,0],[0,1,0],[0,0,1]])
class1_sample = np.random.multivariate_normal(vec1,cov_mat1,20)

df = pd.DataFrame(class1_sample,columns = ['feature1','feature2','feature3'])
df['target'] = 1
#******************************************************************************

#******************************************************************************
vec2 = np.array([1,1,1])
cov_mat2 = np.array([[1,0,0],[0,1,0],[0,0,1]])
class2_sample = np.random.multivariate_normal(vec2,cov_mat2,20)

df1 = pd.DataFrame(class2_sample,columns = ['feature1','feature2','feature3'])
df1['target'] = 0
#******************************************************************************

df = df.append(df1, ignore_index=True)

df = df.sample(40)


In [8]:
df.head()

Unnamed: 0,feature1,feature2,feature3,target
2,-0.367548,-1.13746,-1.322148,1
34,0.177061,-0.598109,1.226512,0
14,0.420623,0.41162,-0.071324,1
11,1.968435,-0.547788,-0.679418,1
12,-2.50623,0.14696,0.606195,1


In [9]:
import plotly.express as px
fig = px.scatter_3d(df,x=df['feature1'],y=df['feature2'],z=df['feature3'],
                    color = df['target'].astype('str'))
fig.update_traces(marker=dict(
    size = 12,
    line = dict(
        width=2,
        color = 'DarkSlateGrey'
    )
),
selector = dict(
    mode = 'markers'
))
fig.show()

In [10]:
# Step1 -> Apply standard scalling (mean cenric)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

df.iloc[:,0:3] = scaler.fit_transform(df.iloc[:,0:3])

In [11]:
# Step2 -> Find covariance matrix
cov_matrix = np.cov([df.iloc[:,0],df.iloc[:,1],df.iloc[:,2]])
print("Covaricance Matrix : \n",cov_matrix)

Covaricance Matrix : 
 [[1.02564103 0.20478114 0.080118  ]
 [0.20478114 1.02564103 0.19838882]
 [0.080118   0.19838882 1.02564103]]


In [12]:
# Step3 -> Find eigenvalues and vectors
eigen_values, eigen_vectores= np.linalg.eig(cov_matrix)

In [13]:
eigen_values

array([1.3536065 , 0.94557084, 0.77774573])

In [14]:
eigen_vectores

array([[-0.53875915, -0.69363291,  0.47813384],
       [-0.65608325, -0.01057596, -0.75461442],
       [-0.52848211,  0.72025103,  0.44938304]])

In [15]:
# Here we get actully 3 PC but we will consider 2
pc = eigen_vectores[0:2]
print(pc)

[[-0.53875915 -0.69363291  0.47813384]
 [-0.65608325 -0.01057596 -0.75461442]]


In [17]:
# Transform Data Point from 3d space to 2d space
transformed_df = np.dot(df.iloc[:,0:3],pc.T)

final_df = pd.DataFrame(transformed_df, columns = ['PC1','PC2'])
final_df['target'] = df['target']
final_df.head()

Unnamed: 0,PC1,PC2,target
0,0.599433,1.795862,1
1,1.056919,-0.212737,1
2,-0.271876,0.498222,1
3,-0.621586,0.02311,1
4,1.567286,1.730967,1


In [21]:
fig = px.scatter(final_df,x=final_df['PC1'],y=final_df['PC2'],
                    color = final_df['target'].astype('str'))
fig.update_traces(marker=dict(
    size = 12,
    line = dict(
        width=2,
        color = 'DarkSlateGrey'
    )
),
selector = dict(
    mode = 'markers'
))
fig.show()