In [2]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "1"

import sys
sys.path.append('/home/rustam/hyperspecter_segmentation/makitorch')

In [3]:
import numpy as np
import torch
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.decomposition import IncrementalPCA
from hsi_dataset_api import HsiDataset

from makitorch.dataloaders.HsiDataloader import HsiDataloader

from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
train_indices = np.load('data/kfold0_indx_train.npy')

In [5]:
BATCH_SIZE = 32
dataset_hsi_loader = HsiDataloader(
    '/raid/rustam/hyperspectral_dataset/new_cropped_hsi_data', 
    indices=train_indices,
    shuffle_data=True,
)

data_loader = torch.utils.data.DataLoader(dataset_hsi_loader, batch_size=BATCH_SIZE)

In [6]:
ipca = IncrementalPCA(n_components=17, whiten=True, batch_size=BATCH_SIZE)

iterator = tqdm(data_loader)

for img_batch, _ in iterator:
    # (N, C, H, W) --> (N, H, W, C)
    img_batch = img_batch.permute(0, 2, 3, 1)
    # (N, H, W, C) --> (N * H * W, C)
    img_batch = img_batch.reshape(-1, img_batch.shape[-1]).numpy()
    ipca.partial_fit(img_batch)
iterator.close()

8it [20:43, 155.38s/it]


In [7]:
ipca.components_

array([[ 0.00592977,  0.00620114,  0.00648811, ...,  0.01307005,
         0.01338683,  0.0137318 ],
       [ 0.01680339,  0.01839626,  0.01970557, ...,  0.0024214 ,
         0.00232731,  0.00222623],
       [-0.02308232, -0.02407542, -0.02500362, ..., -0.03638205,
        -0.03553838, -0.03495494],
       ...,
       [-0.09040377, -0.08883746, -0.07986533, ..., -0.08103579,
        -0.07863147, -0.07355542],
       [ 0.0265192 ,  0.02200145,  0.0252962 , ..., -0.00910596,
        -0.00938501, -0.00223727],
       [ 0.00479922,  0.0066775 ,  0.01479004, ...,  0.00214682,
         0.00400002,  0.00439974]])

In [8]:
ipca.components_.shape

(17, 237)

In [9]:
ipca.explained_variance_

array([1.70858690e+05, 3.62624393e+04, 2.79299915e+03, 1.13751374e+03,
       9.48780144e+02, 5.17981251e+02, 2.91816937e+02, 2.18061813e+02,
       1.40690878e+02, 1.20776839e+02, 7.40887453e+01, 5.57834527e+01,
       4.05165564e+01, 2.76996196e+01, 1.97714034e+01, 1.81361902e+01,
       1.53609412e+01])

In [10]:
ipca.mean_ 

array([  4.73259056,   5.01937824,   5.27343558,   5.59651924,
         5.93002443,   6.23108354,   6.58426591,   6.93343109,
         7.14824705,   7.37758247,   7.67928938,   7.98181139,
         8.26026634,   8.48396604,   8.66173564,   8.80558231,
         8.99238655,   9.30637363,   9.69032632,  10.09900292,
        10.50196223,  10.70143042,  10.64464234,  10.55041767,
        10.87109328,  11.43650165,  12.00198717,  12.52461403,
        12.92949804,  13.14481747,  13.3078103 ,  13.61076554,
        14.07450028,  14.62883037,  15.22879231,  15.76351723,
        16.14302388,  16.64785209,  17.53635676,  18.60926548,
        19.63058877,  20.71080367,  21.97831657,  23.25521175,
        24.32389835,  25.23159527,  25.8571247 ,  26.19710178,
        26.55213677,  26.89074212,  27.23674557,  27.80154447,
        28.5748337 ,  29.30362954,  29.77647453,  29.9735202 ,
        30.00867278,  29.7824472 ,  29.30461842,  28.77366498,
        28.39598718,  28.38104181,  28.57750588,  28.74

In [13]:
np.save('data/kfold0_PcaExplainedVariance_.npy', ipca.explained_variance_)
np.save('data/kfold0_PcaMean.npy', ipca.mean_)
np.save('data/kfold0_PcaComponents.npy', ipca.components_)

In [14]:
ipca.explained_variance_

array([1.70858690e+05, 3.62624393e+04, 2.79299915e+03, 1.13751374e+03,
       9.48780144e+02, 5.17981251e+02, 2.91816937e+02, 2.18061813e+02,
       1.40690878e+02, 1.20776839e+02, 7.40887453e+01, 5.57834527e+01,
       4.05165564e+01, 2.76996196e+01, 1.97714034e+01, 1.81361902e+01,
       1.53609412e+01])