In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "2"

import sys
sys.path.append('/home/rustam/hyperspecter_segmentation/makitorch')

In [2]:
import numpy as np
import torch
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.decomposition import IncrementalPCA
from hsi_dataset_api import HsiDataset

from makitorch.dataloaders.HsiDataloader import HsiDataloader

from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
train_indices = np.load('data/kfold2_indx_train.npy')

In [4]:
BATCH_SIZE = 32
dataset_hsi_loader = HsiDataloader(
    '/raid/rustam/hyperspectral_dataset/new_cropped_hsi_data', 
    indices=train_indices,
    shuffle_data=True,
)

data_loader = torch.utils.data.DataLoader(dataset_hsi_loader, batch_size=BATCH_SIZE)

In [5]:
ipca = IncrementalPCA(n_components=17, whiten=True, batch_size=BATCH_SIZE)

iterator = tqdm(data_loader)

for img_batch, _ in iterator:
    # (N, C, H, W) --> (N, H, W, C)
    img_batch = img_batch.permute(0, 2, 3, 1)
    # (N, H, W, C) --> (N * H * W, C)
    img_batch = img_batch.reshape(-1, img_batch.shape[-1]).numpy()
    ipca.partial_fit(img_batch)
iterator.close()

8it [19:01, 142.71s/it]


In [6]:
ipca.components_

array([[ 0.00649798,  0.00681769,  0.00713788, ...,  0.01328596,
         0.01358245,  0.0139107 ],
       [ 0.01707642,  0.01866979,  0.02000987, ...,  0.00201532,
         0.00191822,  0.00179395],
       [-0.02451369, -0.02571161, -0.02681541, ..., -0.03665968,
        -0.03585593, -0.03539749],
       ...,
       [-0.03900384, -0.03589407, -0.03719444, ..., -0.03182553,
        -0.03110521, -0.03910105],
       [-0.06491089, -0.06438104, -0.05609436, ..., -0.07960148,
        -0.07694496, -0.07098033],
       [-0.07618136, -0.07929219, -0.07782593, ...,  0.06082513,
         0.06507892,  0.06732339]])

In [7]:
ipca.components_.shape

(17, 237)

In [8]:
ipca.explained_variance_

array([1.75994827e+05, 3.62342503e+04, 2.79567227e+03, 1.16420492e+03,
       1.02289680e+03, 5.80574545e+02, 3.11559775e+02, 2.32630066e+02,
       1.45611420e+02, 1.33962770e+02, 7.53302806e+01, 5.59754745e+01,
       4.18245358e+01, 3.07413423e+01, 2.04476425e+01, 1.99064140e+01,
       1.53271866e+01])

In [9]:
ipca.mean_ 

array([  4.68445041,   4.97031589,   5.22100094,   5.54506283,
         5.87356837,   6.17064818,   6.51955595,   6.86993873,
         7.08577177,   7.30839886,   7.6027888 ,   7.90566115,
         8.17963303,   8.40403481,   8.58669813,   8.72284568,
         8.90111696,   9.20426264,   9.58032472,   9.98355872,
        10.38135656,  10.59422172,  10.55552053,  10.44540851,
        10.73385832,  11.28701124,  11.85443493,  12.36965803,
        12.7818202 ,  13.008285  ,  13.16137717,  13.44391795,
        13.90007997,  14.44722137,  15.03731379,  15.58026232,
        15.96600004,  16.4527298 ,  17.30177767,  18.36692386,
        19.40377608,  20.46634065,  21.71249159,  23.0054138 ,
        24.08398882,  24.99873123,  25.64818425,  25.99397422,
        26.34668825,  26.67972883,  27.02177555,  27.57796936,
        28.34276596,  29.07384542,  29.56135678,  29.77444012,
        29.80660741,  29.57799883,  29.10823639,  28.57649874,
        28.1632294 ,  28.11108324,  28.29322562,  28.45

In [12]:
np.save('data/kfold2_PcaExplainedVariance_.npy', ipca.explained_variance_)
np.save('data/kfold2_PcaMean.npy', ipca.mean_)
np.save('data/kfold2_PcaComponents.npy', ipca.components_)

In [13]:
ipca.explained_variance_

array([1.75994827e+05, 3.62342503e+04, 2.79567227e+03, 1.16420492e+03,
       1.02289680e+03, 5.80574545e+02, 3.11559775e+02, 2.32630066e+02,
       1.45611420e+02, 1.33962770e+02, 7.53302806e+01, 5.59754745e+01,
       4.18245358e+01, 3.07413423e+01, 2.04476425e+01, 1.99064140e+01,
       1.53271866e+01])