In [163]:
import numpy as np
import plotly.express as px
from sklearn import datasets
import pandas as pd
import plotly.graph_objects as go

In [164]:
iris = datasets.load_iris()

In [165]:
iris.target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [166]:
def label(x):
    if x == 0:
        return "setosa"
    elif x == 1:
        return "versicolor"
    return "virginica"

In [167]:
X = iris.data
y = iris.target
y = list(map(label, y))
feature_names = iris.feature_names

In [168]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X)
X_trans = scaler.transform(X)

In [169]:
df = pd.DataFrame(data=X_trans, columns=feature_names)
df["target"] = y
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,-0.900681,1.019004,-1.340227,-1.315444,setosa
1,-1.143017,-0.131979,-1.340227,-1.315444,setosa
2,-1.385353,0.328414,-1.397064,-1.315444,setosa
3,-1.506521,0.098217,-1.283389,-1.315444,setosa
4,-1.021849,1.249201,-1.340227,-1.315444,setosa


In [170]:
fig = px.scatter_3d(df, x=feature_names[0], y=feature_names[1], size=X[:,-1],
                    z=feature_names[2],
                    color="target", width=1000, height=700)

fig.update_layout(
    scene=dict(
        xaxis=dict(
            title='Sepal Length',         # Set title for z-axis
            range=[-5, 5],               # Define range for z-axis
            nticks=5                      # Set number of ticks on z-axis
        )
    )
)
fig.update_layout(
    scene=dict(
        yaxis=dict(
            title='Sepal Width',         
            range=[-5, 5],               
            nticks=5                     
        )
    )
)
fig.update_layout(
    scene=dict(
        zaxis=dict(
            title='Petal Length',         
            range=[-5, 5],               
            nticks=5                     
        )
    )
)
fig.update_layout(
    dragmode='drawopenpath',
    scene=dict(
        aspectmode='manual',           # Set aspect mode to manual
        aspectratio=dict(x=1, y=1, z=1)  # Set equal ratios for all axes
    )
)

## Compute Covariance Matrix

In [171]:
cov_matrix = np.cov(X_trans, rowvar=False)

In [172]:
from numpy import linalg as LA

In [173]:
eigenvalues, eigenvectors = LA.eig(cov_matrix)
print(f"Eigenvalues: {eigenvalues}")
print(f"Eigenvectors: {eigenvectors}")

Eigenvalues: [2.93808505 0.9201649  0.14774182 0.02085386]
Eigenvectors: [[ 0.52106591 -0.37741762 -0.71956635  0.26128628]
 [-0.26934744 -0.92329566  0.24438178 -0.12350962]
 [ 0.5804131  -0.02449161  0.14212637 -0.80144925]
 [ 0.56485654 -0.06694199  0.63427274  0.52359713]]


In [174]:
eigenvectors

array([[ 0.52106591, -0.37741762, -0.71956635,  0.26128628],
       [-0.26934744, -0.92329566,  0.24438178, -0.12350962],
       [ 0.5804131 , -0.02449161,  0.14212637, -0.80144925],
       [ 0.56485654, -0.06694199,  0.63427274,  0.52359713]])

In [175]:
# fig.add_trace(go.Scatter3d(
#     x=[-eigenvalues[0]*eigenvectors[0][0], eigenvalues[0]*eigenvectors[0][0]],
#     y=[-eigenvalues[0]*eigenvectors[0][1], eigenvalues[0]*eigenvectors[0][1]],
#     z=[-eigenvalues[0]*eigenvectors[0][2], eigenvalues[0]*eigenvectors[0][2]],
#     line=dict(color='red', width=1),
#     mode="lines",
#     name="PC 1"
# ))

# fig.add_trace(go.Scatter3d(
#     x=[-eigenvalues[1]*eigenvectors[1][0], eigenvalues[1]*eigenvectors[1][0]],
#     y=[-eigenvalues[1]*eigenvectors[1][1], eigenvalues[1]*eigenvectors[1][1]],
#     z=[-eigenvalues[1]*eigenvectors[1][2], eigenvalues[1]*eigenvectors[1][2]],
#     line=dict(color='green', width=1),
#     mode="lines",
#     name="PC 2"
# ))
sc_val_1 = eigenvalues[0]
fig.add_trace(go.Scatter3d(
    x=[-sc_val_1*eigenvectors[0][0], sc_val_1*eigenvectors[0][0]],
    y=[-sc_val_1*eigenvectors[1][0], sc_val_1*eigenvectors[1][0]],
    z=[-sc_val_1*eigenvectors[2][0], sc_val_1*eigenvectors[2][0]],
    line=dict(color='red', width=1),
    mode="lines",
    name="PC 1"
))
sc_val_2 = eigenvalues[1]
fig.add_trace(go.Scatter3d(
    x=[-sc_val_2*eigenvectors[0][1], sc_val_2*eigenvectors[0][1]],
    y=[-sc_val_2*eigenvectors[1][1], sc_val_2*eigenvectors[1][1]],
    z=[-sc_val_2*eigenvectors[2][1], sc_val_2*eigenvectors[2][1]],
    line=dict(color='green', width=1),
    mode="lines",
    name="PC 2"
))
# Set camera position based on eigenvectors
camera_eye = dict(
    x=2*eigenvectors[0, 0],  # Scale for better visibility
    y=2*eigenvectors[1, 0],
    z=0.3*eigenvectors[2, 0]
)

# Set up the 'up' direction for the camera based on the second eigenvector
camera_up = dict(
    x=0*eigenvectors[0, 1], 
    y=eigenvectors[1, 1], 
    z=0*eigenvectors[2, 1]
)

# Update layout with camera settings
fig.update_layout(
    scene_camera=dict(eye=camera_eye, up=camera_up),
    title='Original Dataset (size of markers set to petal width)'
)
fig.show()

## Principal Component Analysis

In [176]:
from sklearn.decomposition import PCA

In [177]:
pca = PCA(n_components=2).fit(X_trans)

In [178]:
X_pca = pca.transform(X_trans)

In [179]:
X_pca[:,0].shape

(150,)

In [180]:
df_pca = pd.DataFrame(data=X_pca, columns=["PC 1", "PC 2"])
df_pca["target"] = y
df_pca.head()

Unnamed: 0,PC 1,PC 2,target
0,-2.264703,0.480027,setosa
1,-2.080961,-0.674134,setosa
2,-2.364229,-0.341908,setosa
3,-2.299384,-0.597395,setosa
4,-2.389842,0.646835,setosa


In [181]:
fig2 = px.scatter(df_pca,x="PC 1", y="PC 2", 
                    color="target", width=1000, height=700)
fig2.show()

In [182]:
pca.components_

array([[ 0.52106591, -0.26934744,  0.5804131 ,  0.56485654],
       [ 0.37741762,  0.92329566,  0.02449161,  0.06694199]])

In [183]:
eigenvectors

array([[ 0.52106591, -0.37741762, -0.71956635,  0.26128628],
       [-0.26934744, -0.92329566,  0.24438178, -0.12350962],
       [ 0.5804131 , -0.02449161,  0.14212637, -0.80144925],
       [ 0.56485654, -0.06694199,  0.63427274,  0.52359713]])

In [185]:
pca.explained_variance_ratio_[0]

0.7296244541329989