In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "2"

import sys
sys.path.append('/home/rustam/hyperspecter_segmentation/makitorch')

In [2]:
import numpy as np
import torch
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.decomposition import IncrementalPCA
from hsi_dataset_api import HsiDataset

from makitorch.dataloaders.HsiDataloader import HsiDataloader

from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
train_indices = np.load('data/kfold0_indx_train.npy')

In [4]:
BATCH_SIZE = 32
dataset_hsi_loader = HsiDataloader(
    '/raid/rustam/hyperspectral_dataset/new_cropped_hsi_data', 
    indices=train_indices,
    shuffle_data=True,
)

data_loader = torch.utils.data.DataLoader(dataset_hsi_loader, batch_size=BATCH_SIZE)

In [5]:
ipca = IncrementalPCA(n_components=17, whiten=True, batch_size=BATCH_SIZE)

iterator = tqdm(data_loader)

for img_batch, _ in iterator:
    # (N, C, H, W) --> (N, H, W, C)
    img_batch = img_batch.permute(0, 2, 3, 1)
    # (N, H, W, C) --> (N * H * W, C)
    img_batch = img_batch.reshape(-1, img_batch.shape[-1]).numpy()
    ipca.partial_fit(img_batch)
iterator.close()

8it [20:06, 150.87s/it]


In [6]:
ipca.components_

array([[ 0.00602891,  0.00633054,  0.00664099, ...,  0.01327099,
         0.01358193,  0.01391081],
       [ 0.01630769,  0.01790755,  0.01906753, ...,  0.0019729 ,
         0.00186968,  0.00174187],
       [-0.02425773, -0.02540149, -0.02608737, ..., -0.03692343,
        -0.03625269, -0.03589918],
       ...,
       [-0.05324423, -0.05299331, -0.04989048, ..., -0.07514371,
        -0.07335743, -0.06687218],
       [-0.03823472, -0.03297001, -0.03064029, ..., -0.0589724 ,
        -0.05986649, -0.06677085],
       [ 0.07033014,  0.07507158,  0.07855331, ..., -0.05758581,
        -0.06045565, -0.06368101]])

In [7]:
ipca.components_.shape

(17, 237)

In [8]:
ipca.explained_variance_

array([1.67993122e+05, 3.32940284e+04, 2.89402278e+03, 1.14323898e+03,
       9.04428596e+02, 5.68760994e+02, 2.92021398e+02, 2.07138123e+02,
       1.43676712e+02, 1.37669745e+02, 7.45658290e+01, 5.45695061e+01,
       4.15558740e+01, 3.04114182e+01, 1.97128098e+01, 1.83965865e+01,
       1.66125194e+01])

In [9]:
ipca.mean_ 

array([  4.60854069,   4.88661797,   5.13074796,   5.45433634,
         5.78427622,   6.07800176,   6.41802079,   6.76648847,
         6.97347037,   7.18946428,   7.48936012,   7.79727577,
         8.0634546 ,   8.27795647,   8.45424223,   8.59141925,
         8.77870406,   9.08975904,   9.47336009,   9.87858482,
        10.26788361,  10.46201326,  10.41240631,  10.3077532 ,
        10.61675855,  11.19848953,  11.77862988,  12.28374666,
        12.6776985 ,  12.88587593,  13.02901677,  13.32241566,
        13.79876329,  14.37296911,  14.97966731,  15.51356934,
        15.9080799 ,  16.42453216,  17.31391883,  18.43364877,
        19.50586002,  20.61024884,  21.90479057,  23.23988807,
        24.35559524,  25.2766468 ,  25.91699288,  26.25082912,
        26.62076117,  26.98058907,  27.36047726,  27.96471354,
        28.77973045,  29.5191907 ,  29.9863191 ,  30.17747954,
        30.18804581,  29.91982104,  29.39979667,  28.8361286 ,
        28.41944403,  28.36249914,  28.55145587,  28.71

In [10]:
np.save('data/kfold0_PcaExplainedVariance_.npy', ipca.explained_variance_)
np.save('data/kfold0_PcaMean.npy', ipca.mean_)
np.save('data/kfold0_PcaComponents.npy', ipca.components_)

In [12]:
ipca.explained_variance_

array([1.67993122e+05, 3.32940284e+04, 2.89402278e+03, 1.14323898e+03,
       9.04428596e+02, 5.68760994e+02, 2.92021398e+02, 2.07138123e+02,
       1.43676712e+02, 1.37669745e+02, 7.45658290e+01, 5.45695061e+01,
       4.15558740e+01, 3.04114182e+01, 1.97128098e+01, 1.83965865e+01,
       1.66125194e+01])