Example:

If you perform PCA and get the following explained variances for 5 components:

PC1: 60% variance explained

PC2: 25% variance explained

PC3: 10% variance explained

PC4: 4% variance explained

PC5: 1% variance explained

The cumulative variance will be:

After PC1: 60%

After PC1 + PC2: 60% + 25% = 85%

After PC1 + PC2 + PC3: 85% + 10% = 95%

After PC1 + PC2 + PC3 + PC4: 95% + 4% = 99%

After PC1 + PC2 + PC3 + PC4 + PC5: 99% + 1% = 100%

In [1]:
from sklearn.decomposition import PCA
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

In [2]:
# Mounting the drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Loading the data


In [3]:
# Paths to the .npz files
output_dir = '/content/drive/My Drive/Fabric Detection Project/Extracted_Features_New'
data_path = os.path.join(output_dir, 'X.npz')
labels_path = os.path.join(output_dir, 'y.npz')
groups_path = os.path.join(output_dir, 'groups.npz')


# Loading the .npz files
data = np.load(data_path)
labels = np.load(labels_path)
groups = np.load(groups_path)

#  Access the arrays stored inside the .npz files
X = data['data']
y = labels['labels']
groups = groups['groups']

In [4]:
np.unique(y)

array([1, 2], dtype=uint8)

n_components=1000

In [5]:
pca = PCA(n_components=1000)  # Set n_components to the number of features or less
pca.fit(X)


In [6]:
explained_variance_ratio = pca.explained_variance_ratio_
print(explained_variance_ratio)


[0.09568056 0.01090567 0.00550499 0.00507042 0.0044059  0.00407807
 0.00384958 0.00376336 0.00366593 0.00344158 0.00328821 0.0031958
 0.00312555 0.00294858 0.00289061 0.0027828  0.00270791 0.00262851
 0.00257062 0.00251389 0.00242153 0.00235045 0.00234112 0.00228847
 0.00220151 0.0021482  0.00210714 0.00202117 0.00201544 0.00198602
 0.00197392 0.00194657 0.00193097 0.00189905 0.00187715 0.00183855
 0.00180641 0.00178174 0.00177426 0.00175186 0.00173733 0.00172192
 0.00169208 0.00167946 0.00165266 0.00164594 0.0016133  0.00160972
 0.00158981 0.001581   0.00156065 0.0015537  0.00153288 0.00152007
 0.00151344 0.0014972  0.00147429 0.00147265 0.00144557 0.00143301
 0.00142697 0.00142189 0.00140955 0.00139571 0.00138768 0.00136852
 0.00135476 0.00134493 0.00133951 0.00133735 0.00130895 0.00129753
 0.00128723 0.00128207 0.00127794 0.00126064 0.00125578 0.00123353
 0.0012289  0.00122626 0.00121018 0.0012065  0.00119511 0.00118936
 0.00118219 0.00117573 0.00116479 0.00115917 0.00115243 0.00114

In [7]:
cumulative_variance = np.cumsum(explained_variance_ratio)
print(cumulative_variance)


[0.09568056 0.10658623 0.11209122 0.11716164 0.12156754 0.12564562
 0.12949519 0.13325856 0.13692449 0.14036607 0.14365428 0.14685008
 0.14997563 0.15292421 0.15581481 0.15859762 0.16130553 0.16393403
 0.16650465 0.16901854 0.17144007 0.17379052 0.17613164 0.17842011
 0.18062162 0.18276982 0.18487696 0.18689812 0.18891356 0.19089959
 0.19287351 0.19482008 0.19675105 0.1986501  0.20052725 0.2023658
 0.20417221 0.20595395 0.2077282  0.20948007 0.2112174  0.21293932
 0.2146314  0.21631086 0.21796351 0.21960946 0.22122276 0.22283248
 0.22442229 0.22600329 0.22756394 0.22911764 0.23065051 0.23217059
 0.23368403 0.23518122 0.23665551 0.23812817 0.23957373 0.24100674
 0.24243371 0.2438556  0.24526515 0.24666086 0.24804854 0.24941705
 0.25077181 0.25211674 0.25345625 0.2547936  0.25610255 0.25740008
 0.25868731 0.25996938 0.26124732 0.26250796 0.26376374 0.26499728
 0.26622617 0.26745244 0.26866262 0.26986913 0.27106424 0.2722536
 0.2734358  0.27461153 0.27577632 0.27693549 0.27808792 0.279230

n_components=2000

In [8]:
pca = PCA(n_components=2000)  # Set n_components to the number of features or less
pca.fit(X)

In [9]:
explained_variance_ratio = pca.explained_variance_ratio_
print(explained_variance_ratio)

[0.09568056 0.01090567 0.00550499 ... 0.00010439 0.00010421 0.00010405]


In [10]:
cumulative_variance = np.cumsum(explained_variance_ratio)
print(cumulative_variance)

[0.09568056 0.10658623 0.11209122 ... 0.82032857 0.82043278 0.82053683]


In [None]:
del pca

n_components=3000

In [11]:
pca = PCA(n_components=3000)  # Set n_components to the number of features or less
pca.fit(X)

In [12]:
explained_variance_ratio = pca.explained_variance_ratio_
print(explained_variance_ratio)

[9.56805565e-02 1.09056731e-02 5.50499426e-03 ... 6.04458339e-05
 6.03105672e-05 6.02382440e-05]


In [13]:
cumulative_variance = np.cumsum(explained_variance_ratio)
print(cumulative_variance)

[0.09568056 0.10658623 0.11209122 ... 0.91424695 0.91430726 0.9143675 ]


In [14]:
del pca

n_components=4000

In [15]:
pca = PCA(n_components=4000)  # Set n_components to the number of features or less
pca.fit(X)

In [16]:
explained_variance_ratio = pca.explained_variance_ratio_
print(explained_variance_ratio)

[9.56805565e-02 1.09056731e-02 5.50499426e-03 ... 3.01335298e-05
 3.00337877e-05 2.99086224e-05]


In [17]:
cumulative_variance = np.cumsum(explained_variance_ratio)
print(cumulative_variance)

[0.09568056 0.10658623 0.11209122 ... 0.96700553 0.96703557 0.96706547]
