In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

In [2]:
from sklearn.datasets import load_digits
digits = load_digits()

In [3]:
digits["data"]

array([[ 0.,  0.,  5., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ..., 10.,  0.,  0.],
       [ 0.,  0.,  0., ..., 16.,  9.,  0.],
       ...,
       [ 0.,  0.,  1., ...,  6.,  0.,  0.],
       [ 0.,  0.,  2., ..., 12.,  0.,  0.],
       [ 0.,  0., 10., ..., 12.,  1.,  0.]])

In [6]:
df_data = pd.DataFrame(digits["data"],columns = digits["feature_names"])

In [7]:
df_data.head()

Unnamed: 0,pixel_0_0,pixel_0_1,pixel_0_2,pixel_0_3,pixel_0_4,pixel_0_5,pixel_0_6,pixel_0_7,pixel_1_0,pixel_1_1,...,pixel_6_6,pixel_6_7,pixel_7_0,pixel_7_1,pixel_7_2,pixel_7_3,pixel_7_4,pixel_7_5,pixel_7_6,pixel_7_7
0,0.0,0.0,5.0,13.0,9.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,6.0,13.0,10.0,0.0,0.0,0.0
1,0.0,0.0,0.0,12.0,13.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,11.0,16.0,10.0,0.0,0.0
2,0.0,0.0,0.0,4.0,15.0,12.0,0.0,0.0,0.0,0.0,...,5.0,0.0,0.0,0.0,0.0,3.0,11.0,16.0,9.0,0.0
3,0.0,0.0,7.0,15.0,13.0,1.0,0.0,0.0,0.0,8.0,...,9.0,0.0,0.0,0.0,7.0,13.0,13.0,9.0,0.0,0.0
4,0.0,0.0,0.0,1.0,11.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,2.0,16.0,4.0,0.0,0.0


In [9]:
df_data.shape

(1797, 64)

**PREPARING FOR PCA**

In [10]:
from sklearn.preprocessing import StandardScaler

In [11]:
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df_data)

In [12]:
df_new = pd.DataFrame(scaled_data,columns = digits["feature_names"])
df_new.head()

Unnamed: 0,pixel_0_0,pixel_0_1,pixel_0_2,pixel_0_3,pixel_0_4,pixel_0_5,pixel_0_6,pixel_0_7,pixel_1_0,pixel_1_1,...,pixel_6_6,pixel_6_7,pixel_7_0,pixel_7_1,pixel_7_2,pixel_7_3,pixel_7_4,pixel_7_5,pixel_7_6,pixel_7_7
0,0.0,-0.335016,-0.043081,0.274072,-0.664478,-0.844129,-0.409724,-0.125023,-0.059078,-0.624009,...,-0.757436,-0.209785,-0.023596,-0.299081,0.086719,0.208293,-0.366771,-1.146647,-0.50567,-0.196008
1,0.0,-0.335016,-1.094937,0.038648,0.268751,-0.13802,-0.409724,-0.125023,-0.059078,-0.624009,...,-0.757436,-0.209785,-0.023596,-0.299081,-1.089383,-0.24901,0.849632,0.548561,-0.50567,-0.196008
2,0.0,-0.335016,-1.094937,-1.844742,0.735366,1.097673,-0.409724,-0.125023,-0.059078,-0.624009,...,0.25923,-0.209785,-0.023596,-0.299081,-1.089383,-2.078218,-0.164037,1.565686,1.695137,-0.196008
3,0.0,-0.335016,0.377661,0.744919,0.268751,-0.844129,-0.409724,-0.125023,-0.059078,1.879691,...,1.072563,-0.209785,-0.023596,-0.299081,0.282736,0.208293,0.24143,0.37904,-0.50567,-0.196008
4,0.0,-0.335016,-1.094937,-2.551014,-0.197863,-1.020657,-0.409724,-0.125023,-0.059078,-0.624009,...,-0.757436,-0.209785,-0.023596,-0.299081,-1.089383,-2.306869,0.849632,-0.468564,-0.50567,-0.196008


**PCA with two components**

In [13]:
from sklearn.decomposition import PCA

In [14]:
pca = PCA(n_components = 2)

In [15]:
df_pca = pca.fit_transform(df_new)

In [17]:
df_pca.shape

(1797, 2)

In [19]:
component_df = pd.DataFrame(data = df_pca,columns = ["first_component","second_component"])

In [20]:
component_df

Unnamed: 0,first_component,second_component
0,-1.914214,-0.954502
1,-0.588980,0.924636
2,-1.302039,-0.317189
3,3.020770,-0.868772
4,-4.528949,-1.093480
...,...,...
1792,-0.104331,0.255024
1793,-2.423234,-1.429611
1794,-1.022596,-0.147911
1795,-1.076055,-0.380906


In [21]:
pca.explained_variance_

array([7.34477606, 5.83549054])

In [22]:
pca.explained_variance_ratio_

array([0.12033916, 0.09561054])

In [23]:
np.cumsum(pca.explained_variance_ratio_)

array([0.12033916, 0.21594971])

**PCA WITH 4 COMPONENTS**

In [24]:
pca_new = PCA(n_components= 4)

In [25]:
df_pca_new = pca_new.fit_transform(df_new)

In [26]:
component_df = pd.DataFrame(df_pca_new,columns = "1 2 3 4".split())
component_df

Unnamed: 0,1,2,3,4
0,-1.914214,-0.954502,-3.946035,-2.028723
1,-0.588980,0.924636,3.924755,1.779850
2,-1.302039,-0.317189,3.023333,2.043376
3,3.020770,-0.868772,-0.801744,2.187039
4,-4.528949,-1.093480,0.973121,1.419510
...,...,...,...,...
1792,-0.104331,0.255024,-3.765861,1.947006
1793,-2.423234,-1.429611,-3.045245,-2.632089
1794,-1.022596,-0.147911,2.469974,0.620307
1795,-1.076055,-0.380906,-2.455487,1.312013


In [27]:
pca.explained_variance_

array([7.34477606, 5.83549054])

In [28]:
pca.explained_variance_ratio_

array([0.12033916, 0.09561054])

In [29]:
np.cumsum(pca.explained_variance_ratio_)

array([0.12033916, 0.21594971])