# Sample data

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

In [42]:
np.random.seed(42)
df = pd.DataFrame({
    'f1': np.random.normal(0, 1, 100),
    'f2': np.random.normal(0, 1, 100),
    'f3': np.random.normal(0, 1, 100),
    'op': np.random.choice([1, 0], 100)
})


In [43]:
df

Unnamed: 0,f1,f2,f3,op
0,0.496714,-1.415371,0.357787,1
1,-0.138264,-0.420645,0.560785,0
2,0.647689,-0.342715,1.083051,0
3,1.523030,-0.802277,1.053802,1
4,-0.234153,-0.161286,-1.377669,1
...,...,...,...,...
95,-1.463515,0.385317,-0.692910,0
96,0.296120,-0.883857,0.899600,1
97,0.261055,0.153725,0.307300,0
98,0.005113,0.058209,0.812862,1


# Plot the data

In [44]:
# === Plotly 3D Scatter ===
fig = go.Figure(scatter)

scatter = go.Scatter3d(
    x=df['f1'], y=df['f2'], z=df['f3'],
    mode='markers',
    marker=dict(size=5, color=df['op'].astype('category').cat.codes, colorscale='Viridis'),
    name='Data Points',
    
)

fig.update_layout(
    scene=dict(
        xaxis_title='Feature 1',
        yaxis_title='Feature 2',
        zaxis_title='Feature 3'
    ),
    title='3D PCA with Eigenvectors',
    width=800,
    height=700,
)
fig.show()

# S1: Apply standard scaling

In [45]:
scalar = StandardScaler()

In [47]:
df.iloc[:, 0:3] = scalar.fit_transform(df.iloc[:, 0:3])
df.iloc[:, 0:3].head(5)

Unnamed: 0,f1,f2,f3
0,0.664619,-1.515115,0.271485
1,-0.038089,-0.466809,0.459646
2,0.831697,-0.384681,0.943743
3,1.800406,-0.868997,0.916631
4,-0.144206,-0.193479,-1.337135


# S2: Find Covariance Matrix

In [49]:
df.columns

Index(['f1', 'f2', 'f3', 'op'], dtype='object')

In [56]:
covariance_matrix = df[['f1', 'f2', 'f3']].cov()
covariance_matrix

Unnamed: 0,f1,f2,f3
f1,1.010101,-0.1378,0.192768
f2,-0.1378,1.010101,-0.037002
f3,0.192768,-0.037002,1.010101


In [52]:
# Step 2 - Find Covariance Matrix
covariance_matrix = np.cov([df.iloc[:,0],df.iloc[:,1],df.iloc[:,2]])
print('Covariance Matrix:\n', covariance_matrix)

Covariance Matrix:
 [[ 1.01010101 -0.13780021  0.19276759]
 [-0.13780021  1.01010101 -0.03700176]
 [ 0.19276759 -0.03700176  1.01010101]]


# S3: finding Eige Vector and Eigen Vector 

<img src='https://th.bing.com/th/id/OIP.TUJvCoXugGJACoHOsBv_xAHaEK?rs=1&pid=ImgDetMain'>

In [61]:
egien_values, egien_vectors = np.linalg.eig(covariance_matrix)

In [62]:
egien_vectors

array([[ 0.67987019, -0.73143236,  0.05275631],
       [-0.45068202, -0.35999193,  0.81687914],
       [ 0.57849999,  0.5791481 ,  0.57439119]])

In [None]:
egien_values

array([1.26547347, 0.78964597, 0.97518358])

# Plot the Egien Vectors

In [None]:
# === PCA ===
features = ['f1', 'f2', 'f3']
X = df[features]
X_centered = X - X.mean()
pca = PCA(n_components=3)
pca.fit(X_centered)
eigen_vectors = pca.components_
eigen_values = pca.explained_variance_

mean = X.mean().values  # Convert mean Series to NumPy array

# === Plotly 3D Scatter ===
scatter = go.Scatter3d(
    x=df['f1'], y=df['f2'], z=df['f3'],
    mode='markers',
    marker=dict(size=5, color=df['op'].astype('category').cat.codes, colorscale='Viridis'),
    name='Data Points',
)

# === PCA Arrows (as lines) ===
arrows = []
for i, vec in enumerate(eigen_vectors):
    scaled_vec = vec * eigen_values[i]
    arrows.append(go.Scatter3d(
        x=[mean[0], mean[0] + scaled_vec[0]],
        y=[mean[1], mean[1] + scaled_vec[1]],
        z=[mean[2], mean[2] + scaled_vec[2]],
        mode='lines+text',
        line=dict(color='red', width=6),
        name=f'PC{i+1}',
        text=[None, f'PC{i+1}'],
        textposition='top center'
    ))

# === Combine and Show Plot ===
fig = go.Figure(data=[scatter] + arrows)
fig.update_layout(
    scene=dict(
        xaxis_title='Feature 1',
        yaxis_title='Feature 2',
        zaxis_title='Feature 3'
    ),
    title='3D PCA with Eigenvectors (Plotly)',
    width=800,
    height=700
)
fig.show()


# Make into lower dimension

In [76]:
pc = egien_vectors[0:2]
pc

array([[ 0.67987019, -0.73143236,  0.05275631],
       [-0.45068202, -0.35999193,  0.81687914]])

In [77]:
# Points 
x = df.iloc[:, 0:3]
x.shape

(100, 3)

In [80]:
# Unit Vector 
u = pc.T
u.shape

(3, 2)

In [81]:
# Now take the dot product
transformed_df = np.dot(df.iloc[:, 0:3], pc.T )
transformed_df

array([[ 1.57438141,  0.46766772],
       [ 0.33979302,  0.56068889],
       [ 0.89660241,  0.53457484],
       [ 1.90801364,  0.25019834],
       [-0.02706666, -0.95763597],
       [-0.44132436, -0.83908291],
       [-0.14841053, -1.20572153],
       [ 0.56011589, -0.15243557],
       [-0.43441609,  0.43395457],
       [ 0.74615436,  2.58237072],
       [ 1.25044683,  1.29887545],
       [-0.18228956,  1.00969812],
       [ 0.27442603,  0.48634876],
       [-3.21426594,  0.42049325],
       [-1.07279457,  0.60210321],
       [-0.52623389,  0.64824566],
       [-0.68092447, -0.15931315],
       [ 1.21786066,  0.01486064],
       [-1.49569147, -0.44066285],
       [-1.54605748,  0.38864488],
       [ 0.698326  ,  0.62903541],
       [ 0.5319543 , -1.04870807],
       [-0.90479826, -0.13872502],
       [ 0.02191555, -0.07114512],
       [-0.7928798 , -0.40093642],
       [-1.45961044, -0.15428516],
       [-0.00716256,  0.90605631],
       [ 0.75864049, -0.88105103],
       [-0.4715541 ,

In [82]:
new_df = pd.DataFrame(transformed_df, columns=['pc1', 'pc2'])
new_df['op'] = df['op'].values
new_df

Unnamed: 0,pc1,pc2,op
0,1.574381,0.467668,1
1,0.339793,0.560689,0
2,0.896602,0.534575,0
3,1.908014,0.250198,1
4,-0.027067,-0.957636,1
...,...,...,...
95,-1.339877,-0.033374,0
96,1.040245,0.776316,1
97,0.185099,-0.048313,0
98,0.090880,0.498377,1


# Now visualize the 2D  data  

In [99]:
new_df['op'] = new_df['op'].astype('str')

fig = px.scatter(
    x=new_df['pc1'],
    y= new_df['pc2'],
    color= new_df['op'],
    width= 800,
    height= 500,
    color_discrete_sequence=  px.colors.qualitative.G10
)


fig.show()