In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "2"

import sys
sys.path.append('/home/rustam/hyperspecter_segmentation/makitorch')

In [2]:
import numpy as np
import torch
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.decomposition import IncrementalPCA
from hsi_dataset_api import HsiDataset

from makitorch.dataloaders.HsiDataloader import HsiDataloader

from tqdm import tqdm

In [3]:
train_indices = np.load('data/kfold1_indx_train.npy')

In [4]:
BATCH_SIZE = 32
dataset_hsi_loader = HsiDataloader(
    '/raid/rustam/hyperspectral_dataset/new_cropped_hsi_data', 
    indices=train_indices,
    shuffle_data=True,
)

data_loader = torch.utils.data.DataLoader(dataset_hsi_loader, batch_size=BATCH_SIZE)

In [5]:
ipca = IncrementalPCA(n_components=17, whiten=True, batch_size=BATCH_SIZE)

iterator = tqdm(data_loader)

for img_batch, _ in iterator:
    # (N, C, H, W) --> (N, H, W, C)
    img_batch = img_batch.permute(0, 2, 3, 1)
    # (N, H, W, C) --> (N * H * W, C)
    img_batch = img_batch.reshape(-1, img_batch.shape[-1]).numpy()
    ipca.partial_fit(img_batch)
iterator.close()

5it [12:49, 153.91s/it]


In [6]:
ipca.components_

array([[ 5.94041692e-03,  6.17406620e-03,  6.40885581e-03, ...,
         1.32417739e-02,  1.36040696e-02,  1.39409300e-02],
       [ 1.56223431e-02,  1.71524272e-02,  1.88355180e-02, ...,
         1.03341938e-03,  1.04387183e-03,  1.00132590e-03],
       [ 1.71505817e-02,  1.82380252e-02,  1.94289486e-02, ...,
         2.55656967e-02,  2.42376295e-02,  2.38274416e-02],
       ...,
       [-1.98367918e-02, -1.77856696e-02, -1.43135654e-02, ...,
        -2.60749195e-02, -2.05887812e-02, -7.49240862e-03],
       [-1.31248826e-02, -8.86261477e-03, -1.07007232e-02, ...,
        -1.45510934e-02, -1.33852488e-02, -1.69953391e-02],
       [ 1.57129308e-05, -1.77587457e-03,  8.79379235e-03, ...,
         2.62618767e-02,  2.74461030e-02,  2.58835426e-02]])

In [7]:
ipca.components_.shape

(17, 237)

In [8]:
ipca.explained_variance_

array([1.78688144e+05, 2.03257969e+04, 3.02812228e+03, 1.29176412e+03,
       6.26608577e+02, 4.18992875e+02, 2.46295932e+02, 2.07298956e+02,
       1.30819276e+02, 1.09051722e+02, 5.54460845e+01, 4.02725821e+01,
       3.77948673e+01, 1.99517033e+01, 1.84420540e+01, 1.75538161e+01,
       1.28157546e+01])

In [9]:
ipca.mean_ 

array([  4.40140152,   4.61499274,   4.83650158,   5.08789823,
         5.34545559,   5.57671649,   5.85836698,   6.10160323,
         6.28024819,   6.46281886,   6.69092028,   6.90837365,
         7.11085365,   7.25810611,   7.39967324,   7.52722532,
         7.67951983,   7.91812737,   8.20292996,   8.49335202,
         8.77115749,   8.88534666,   8.85517775,   8.87340979,
         9.12055004,   9.52306536,   9.94490379,  10.33200861,
        10.62344419,  10.81687785,  11.00720665,  11.29070951,
        11.70180716,  12.20482818,  12.75307253,  13.24732304,
        13.71012652,  14.35697174,  15.29704058,  16.33505564,
        17.39231452,  18.5366588 ,  19.77446735,  20.95085063,
        22.01079377,  22.87835147,  23.45305058,  23.82797116,
        24.19188227,  24.51998402,  24.87326586,  25.41701082,
        26.10947229,  26.66882927,  26.98380809,  27.09423811,
        27.02688643,  26.68138689,  26.1201736 ,  25.55504064,
        25.10116324,  24.90703721,  24.85618244,  24.85

In [10]:
np.save('data/kfold1_PcaExplainedVariance_.npy', ipca.explained_variance_)
np.save('data/kfold1_PcaMean.npy', ipca.mean_)
np.save('data/kfold1_PcaComponents.npy', ipca.components_)

In [11]:
ipca.explained_variance_

array([1.78688144e+05, 2.03257969e+04, 3.02812228e+03, 1.29176412e+03,
       6.26608577e+02, 4.18992875e+02, 2.46295932e+02, 2.07298956e+02,
       1.30819276e+02, 1.09051722e+02, 5.54460845e+01, 4.02725821e+01,
       3.77948673e+01, 1.99517033e+01, 1.84420540e+01, 1.75538161e+01,
       1.28157546e+01])