## Идея заключается в том, чтобы в качестве свертки использовать не случайно инициализированную матрицу,  а матрицу посчитанную по всей трейн выборке с использованием PCA


Статья: **Image noise types recognition using CNN with PCA**

![title](img/pcanet.png)

0. все изображения привести к виду (m, m) 

1. Пусть у нас N изображений каждая (m, m). Вытаскиваем из одного изображения патчи размера $(k_a, k_a)$ вокруг каждого пикселя со stride = 1 на $a$ той итерации. 

2. Центрируем патчи: для каждого патча вычисляем среднее и вычитаем из каждого элемента патча среднее значение, для i-того изображения:
$\overline{X_i} = [\overline{x_1}, ..., \overline{x_N}] \subset \mathbb{R}^{k^2 x m^2}$

3. Проделываем это для N обучающих изображений: $X = [\overline{X_1}, ..., \overline{X_N}] \subset \mathbb{R}^{k^2 x Nm^2}$

4. для $ XX^T$ находим L **первых собственных векторов**, каждый из которых имеет размерность $ \mathbb{R}^{k^2}$, после этого делаем reshape до $ \mathbb{R}^{k x k}$ и получаем L искомых фильтров

Важно учесть, что первый фильтр работает с изображениями напрямую -> его можно просчитать заранее, в то время как остальные фильтры готовятся после предыдущего слоя!!!

In [1]:
import numpy as np

In [2]:
test_image = np.arange(20).reshape(4,5)
test_image

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [7]:
np.pad(test_image, (3, 3), 'constant', constant_values=0)

array([[ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  1,  2,  3,  4,  0,  0,  0],
       [ 0,  0,  0,  5,  6,  7,  8,  9,  0,  0,  0],
       [ 0,  0,  0, 10, 11, 12, 13, 14,  0,  0,  0],
       [ 0,  0,  0, 15, 16, 17, 18, 19,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0]])

In [42]:
# Step 1: get patches k x k, где k - НЕЧЕТНЫЕ, чтобы можно было приложить к центру изображения
# TO DO: написать тесты

def get_patches(k, image) -> np.ndarray:
    padded_image = np.pad(image, (k // 2, k // 2), 'constant', constant_values=0) # можно не 0
    
    print(padded_image)
    for i in range(image.shape[0]):
        for j in range(image.shape[1]):
            patch = padded_image[i:i+k, j:j+k]
            print(patch)
            print()
            
get_patches(3, test_image)

[[ 0  0  0  0  0  0  0]
 [ 0  0  1  2  3  4  0]
 [ 0  5  6  7  8  9  0]
 [ 0 10 11 12 13 14  0]
 [ 0 15 16 17 18 19  0]
 [ 0  0  0  0  0  0  0]]
[[0 0 0]
 [0 0 1]
 [0 5 6]]

[[0 0 0]
 [0 1 2]
 [5 6 7]]

[[0 0 0]
 [1 2 3]
 [6 7 8]]

[[0 0 0]
 [2 3 4]
 [7 8 9]]

[[0 0 0]
 [3 4 0]
 [8 9 0]]

[[ 0  0  1]
 [ 0  5  6]
 [ 0 10 11]]

[[ 0  1  2]
 [ 5  6  7]
 [10 11 12]]

[[ 1  2  3]
 [ 6  7  8]
 [11 12 13]]

[[ 2  3  4]
 [ 7  8  9]
 [12 13 14]]

[[ 3  4  0]
 [ 8  9  0]
 [13 14  0]]

[[ 0  5  6]
 [ 0 10 11]
 [ 0 15 16]]

[[ 5  6  7]
 [10 11 12]
 [15 16 17]]

[[ 6  7  8]
 [11 12 13]
 [16 17 18]]

[[ 7  8  9]
 [12 13 14]
 [17 18 19]]

[[ 8  9  0]
 [13 14  0]
 [18 19  0]]

[[ 0 10 11]
 [ 0 15 16]
 [ 0  0  0]]

[[10 11 12]
 [15 16 17]
 [ 0  0  0]]

[[11 12 13]
 [16 17 18]
 [ 0  0  0]]

[[12 13 14]
 [17 18 19]
 [ 0  0  0]]

[[13 14  0]
 [18 19  0]
 [ 0  0  0]]



In [132]:
def get_centered_patches(k, image) -> np.ndarray:
    padded_image = np.pad(image, (k // 2, k // 2), 'constant', constant_values=0) # можно не 0
    X_im = []
    # print(padded_image)
    for i in range(image.shape[0]):
        for j in range(image.shape[1]):
            patch_matrix = padded_image[i:i+k, j:j+k]
            patch_vector = (patch_matrix.flatten()).astype(np.float32)
            #mean = np.mean(patch_vector)
            #patch_vector -= mean
            #print(patch_vector)
            X_im.append(patch_vector)
    return np.array(X_im).T

            
get_centered_patches(3, test_image).shape

(9, 20)

In [None]:
# Step 3: проделать это с N изображениями, и собрать единую ДВУМЕРНУЮ матрицу


In [52]:
# numpy concatenate вроде как самый быстрый вариант, так как нужно будет это сделать для N изображений, где N - большое число

a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6], [7, 8]])
print(a)
print()
print(b)
np.concatenate((a,b), axis=1)

[[1 2]
 [3 4]]

[[5 6]
 [7 8]]


array([[1, 2, 5, 6],
       [3, 4, 7, 8]])

In [77]:
%%time

import cProfile

def compute_n_images():
    k = 3
    N = 600
    m = 64

    np.random.seed(0)

    all_patches = np.zeros((k ** 2, N * m ** 2))
    for i in range(N):
        #image_arr = Image.open()
        #gray = to_gray(image_arr)
        gray_im = np.random.rand(m, m)
        image_patches = get_centered_patches_profiled(k, gray_im) # k ** 2 x m ** 2
        #print(image_patches)
        # print(image_patches.shape, m**2)
        all_patches[:, i:i+m**2] = image_patches

    print(all_patches.shape)
    #print(all_patches)
    #del all_patches

cProfile.run('compute_n_images()')

(9, 2457600)
         41807441 function calls (41806241 primitive calls) in 54.812 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      600    0.001    0.000    0.005    0.000 <__array_function__ internals>:2(around)
  2457600    1.591    0.000   35.796    0.000 <__array_function__ internals>:2(mean)
      600    0.001    0.000    0.056    0.000 <__array_function__ internals>:2(pad)
      600    0.001    0.000    0.007    0.000 <__array_function__ internals>:2(round_)
      600   12.196    0.020   54.199    0.090 <ipython-input-72-9cbab2703260>:1(get_centered_patches)
        1    0.000    0.000   54.812   54.812 <string>:1(<module>)
        1    0.584    0.584   54.812   54.812 <timed exec>:3(compute_n_images)
     1200    0.001    0.000    0.004    0.000 _asarray.py:16(asarray)
  2457600    0.672    0.000    1.382    0.000 _asarray.py:88(asanyarray)
  2457600   16.787    0.000   29.157    0.000 _methods.py:134(_mean)
  2

In [137]:
def get_centered_patches_profiled(k, image) -> np.ndarray:
    padded_image = np.pad(image, (k // 2, k // 2), 'constant', constant_values=0) # можно не 0
    X_im = []
    #padded_image.astype(np.float32)
    # print(padded_image)
    for i in range(image.shape[0]):
        for j in range(image.shape[1]):
            patch_matrix = padded_image[i:i+k, j:j+k]
            X_im.append(patch_matrix)

    X_im = np.array(X_im, dtype=np.float32).T # k x k x m^2
    X_im = X_im.reshape(-1, X_im.shape[2])
    patch_mean = np.mean(X_im, axis=0)

    X_im -= patch_mean.reshape(-1, patch_mean.shape[0])
    return X_im

In [128]:
def matprint(mat, fmt="g"):
    col_maxes = [max([len(("{:"+fmt+"}").format(x)) for x in col]) for col in mat.T]
    for x in mat:
        for i, y in enumerate(x):
            print(("{:"+str(col_maxes[i])+fmt+"}").format(y), end="  ")
        print("")

In [131]:
np.set_printoptions(precision=2)
patches1 = get_centered_patches_profiled(3, test_image)
matprint(patches1)

0  0  0  0  0   0   0   1   2   3   0   5   6   7   8   0  10  11  12  13  
0  0  1  2  3   0   5   6   7   8   0  10  11  12  13   0  15  16  17  18  
0  5  6  7  8   0  10  11  12  13   0  15  16  17  18   0   0   0   0   0  
0  0  0  0  0   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  
0  1  2  3  4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  
5  6  7  8  9  10  11  12  13  14  15  16  17  18  19   0   0   0   0   0  
0  0  0  0  0   1   2   3   4   0   6   7   8   9   0  11  12  13  14   0  
1  2  3  4  0   6   7   8   9   0  11  12  13  14   0  16  17  18  19   0  
6  7  8  9  0  11  12  13  14   0  16  17  18  19   0   0   0   0   0   0  


In [134]:
patches2 = get_centered_patches(3, test_image)
matprint(patches2)

0  0  0  0  0   0   0   1   2   3   0   5   6   7   8   0  10  11  12  13  
0  0  0  0  0   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  
0  0  0  0  0   1   2   3   4   0   6   7   8   9   0  11  12  13  14   0  
0  0  1  2  3   0   5   6   7   8   0  10  11  12  13   0  15  16  17  18  
0  1  2  3  4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  
1  2  3  4  0   6   7   8   9   0  11  12  13  14   0  16  17  18  19   0  
0  5  6  7  8   0  10  11  12  13   0  15  16  17  18   0   0   0   0   0  
5  6  7  8  9  10  11  12  13  14  15  16  17  18  19   0   0   0   0   0  
6  7  8  9  0  11  12  13  14   0  16  17  18  19   0   0   0   0   0   0  


In [118]:
patches2.mean(axis=0)

array([-5.29819069e-08,  1.05963814e-07,  0.00000000e+00, -1.58945724e-07,
       -1.85436676e-07, -1.05963814e-07,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  2.64909545e-07,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  1.05963814e-07,
        0.00000000e+00, -3.17891448e-07,  3.17891448e-07, -5.29819069e-08],
      dtype=float32)

In [109]:
print(patches1[:,1])

[0. 0. 5. 0. 1. 6. 0. 2. 7.]


In [110]:
print(patches2[:,1])

[0. 0. 0. 0. 1. 2. 5. 6. 7.]


In [138]:
%%time

import cProfile

def compute_n_images():
    k = 3
    N = 600
    m = 64

    np.random.seed(0)

    all_patches = np.zeros((k ** 2, N * m ** 2))
    for i in range(N):
        #image_arr = Image.open()
        #gray = to_gray(image_arr)
        gray_im = np.random.rand(m, m)
        image_patches = get_centered_patches_profiled(k, gray_im) # k ** 2 x m ** 2
        #print(image_patches)
        # print(image_patches.shape, m**2)
        all_patches[:, i:i+m**2] = image_patches

    print(all_patches.shape)
    
    return all_patches

cProfile.run('compute_n_images()')

(9, 2457600)
         2494841 function calls (2493641 primitive calls) in 2.968 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      600    0.001    0.000    0.003    0.000 <__array_function__ internals>:2(around)
      600    0.001    0.000    0.030    0.000 <__array_function__ internals>:2(mean)
      600    0.001    0.000    0.047    0.000 <__array_function__ internals>:2(pad)
      600    0.001    0.000    0.005    0.000 <__array_function__ internals>:2(round_)
      600    1.626    0.003    2.922    0.005 <ipython-input-137-b873b8a1d142>:1(get_centered_patches_profiled)
        1    0.000    0.000    2.968    2.968 <string>:1(<module>)
        1    0.020    0.020    2.968    2.968 <timed exec>:3(compute_n_images)
     1200    0.000    0.000    0.003    0.000 _asarray.py:16(asarray)
      600    0.000    0.000    0.001    0.000 _asarray.py:88(asanyarray)
      600    0.011    0.000    0.025    0.000 _methods.py:134(_me

In [139]:
all_patches = compute_n_images()

(9, 2457600)


In [141]:
X = all_patches

In [None]:
#Step4: Извлечение фильтров

In [149]:
from sklearn.decomposition import IncrementalPCA
n_filters = 8
pca = PCA(n_components=n_filters)

In [150]:
%time
pca.fit(X @ X.T)

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 6.91 µs


PCA(copy=True, iterated_power='auto', n_components=8, random_state=None,
    svd_solver='auto', tol=0.0, whiten=False)

In [152]:
# получаем таблицу - строка - номер фильтра, столбцы - flattend фильтр
pca.components_

array([[-0.32, -0.3 , -0.29, -0.31,  0.37,  0.35, -0.26,  0.38,  0.39],
       [-0.02, -0.02,  0.04,  0.02,  0.56,  0.45, -0.06, -0.42, -0.55],
       [-0.06,  0.04,  0.29, -0.13, -0.42,  0.52, -0.17, -0.5 ,  0.42],
       [ 0.1 , -0.17, -0.18, -0.17, -0.45,  0.5 ,  0.42,  0.34, -0.39],
       [ 0.15,  0.59,  0.17, -0.45, -0.06,  0.04, -0.49,  0.3 , -0.25],
       [ 0.7 ,  0.08, -0.61, -0.16,  0.06,  0.02, -0.03, -0.25,  0.2 ],
       [-0.26,  0.3 ,  0.01, -0.6 ,  0.19, -0.16,  0.61, -0.21,  0.12],
       [-0.45,  0.57, -0.53,  0.4 , -0.12,  0.16, -0.  , -0.04,  0.01]])

In [154]:
first_eigenvector = pca.components_[0].reshape(3,3)
first_eigenvector

array([[-0.32, -0.3 , -0.29],
       [-0.31,  0.37,  0.35],
       [-0.26,  0.38,  0.39]])

## Архитектура будет следующей:
1. этап - прохожусь по всем изображениям, формирую фильтры, пропускаю все изображения через эти фильтры (свертка), могу пересохранять в те же pickle на каждом этапе, например.
Отдельно stage1, отдельно все остальные этапы:
Для каждого нового этапа - свертка + maxpooling.
1. прохожусь по всему train датасету, формирую фильтры

In [None]:
class PCAFilter(filter_size, n_channels, im_size):
    def __init__(self,):
        self.pca = PCA(n_channels)
        self.filter_size = filter_size
        self.im_size = im_size
        self.n_channels = n_channels
        self.filters = None
    
    # возможно не train_paths, а train_dataset
    def get_stage1_filters(self, train_paths):
        N = len(train_paths)
        m = self.im_size
        k = self.filter_size
        
        # all patches of all images
        X = np.zeros((k ** 2, N * m ** 2))
        # заполняем матрицу, от одного изображения (k^2 x m^2), поэтому i:i+m**2
        for i, path in enumerate(train_paths):
            image_array = load_image(path) ########
            image_array = image_array.resize((m, m)) ##########
            im_patches = self.get_centered_patches(image_array)
            X[:, i:i+m**2] = im_patches
        
        self.pca.fit(X @ X.T) # for incrementalPCA - partial_fit
        
        filters = np.zeros((n_channels, k, k))
        
        for i in range(n_channels):
            filters[i, :, :] = self.pca.components_[i].reshape(k, k)
        
        self.filters = filters
            
    def get_centered_patches(self, image) -> np.ndarray:
        k = self.filter_size
        # по краям добавим k // 2 ячеек с нулями
        padded_image = np.pad(image, (k // 2, k // 2), 'constant', constant_values=0) # можно не 0
        patches = []

        for i in range(image.shape[0]):
            for j in range(image.shape[1]):
                patch_matrix = padded_image[i:i+k, j:j+k]
                patches.append(patch_matrix)

        patches = np.array(patches, dtype=np.float32).T # k x k x m^2
        patches = patches.reshape(-1, X_im.shape[2]) # k^2 x m^2
        
        patch_mean = np.mean(patches, axis=0)
        patches -= patch_mean.reshape(-1, patch_mean.shape[0])
        return patches  

In [3]:
test_image

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [21]:
pad1_im = np.pad(test_image, (1, 1), 'constant', constant_values=0)
pad1_im

array([[ 0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  1,  2,  3,  4,  0],
       [ 0,  5,  6,  7,  8,  9,  0],
       [ 0, 10, 11, 12, 13, 14,  0],
       [ 0, 15, 16, 17, 18, 19,  0],
       [ 0,  0,  0,  0,  0,  0,  0]])

In [11]:
kernel = np.eye(3)
kernel

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [12]:
# Займемся конволюцией:
from scipy import signal


In [22]:
signal.convolve2d(pad1_im, kernel, mode='valid')

array([[ 6.,  8., 10., 12.,  4.],
       [16., 18., 21., 24., 12.],
       [26., 33., 36., 39., 22.],
       [15., 26., 28., 30., 32.]])

In [23]:
signal.convolve2d(test_image, kernel, mode='same')

array([[ 6.,  8., 10., 12.,  4.],
       [16., 18., 21., 24., 12.],
       [26., 33., 36., 39., 22.],
       [15., 26., 28., 30., 32.]])

In [59]:
from numpy import savez_compressed

def convolution(filters, image, src_path, pad_size=(1, 1)):
    h_in, w_in = image.shape
    nchannels = filters.shape[0]
    kernel_size = filters.shape[1:]
    print(kernel_size)
    
    h_out = int((h_in + 2 * pad_size[0] - (kernel_size[0] - 1) - 1) + 1)
    w_out = int((w_in + 2 * pad_size[1] - (kernel_size[1] - 1) - 1) + 1)
    
    print(image.shape, h_out, w_out)
    print(image)
    
    features_map = np.zeros((nchannels, h_out, w_out))
    for ch in range(nchannels):
        kernel = filters[ch]
        padded_image = np.pad(image, pad_size, 'constant', constant_values=0)
        features_map[ch, :, :] = signal.convolve2d(padded_image, kernel, mode='valid')
    print()
    print(features_map)
    #for i, path in enumerate(train_paths):
    #    image = cv2.imread(path, 0)
    #    for ch in range(nchannels):
    #        kernel = filters[ch]
    #        padded_image = np.pad(image, pad_size, 'constant', constant_values=0)
    #        features_map[ch, :, :] = signal.convelve2d(padded_image, kernel, mode='valid')
        # save features_map
    #savez_compressed(src_path + '.npz', features_map)
        
    

In [62]:
filters = np.array([np.eye(3), [[0, 0, 0], [0, 0, 0], [0, 0, 1]]])
filters

array([[[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 1.]]])

In [63]:
convolution(filters, test_image, '1', (0, 0))

(3, 3)
(4, 5) 2 3
[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]]

[[[18. 21. 24.]
  [33. 36. 39.]]

 [[ 0.  1.  2.]
  [ 5.  6.  7.]]]


In [68]:
import torch
from torch.nn.functional import conv2d

In [113]:
image

tensor([[[[ 0,  1,  2,  3,  4],
          [ 5,  6,  7,  8,  9],
          [10, 11, 12, 13, 14],
          [15, 16, 17, 18, 19]]]])

In [114]:
filters

array([[[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 1.]]])

In [None]:
# torch.save(tensor, 'file.pt') and torch.load('file.pt')

In [116]:
weights = torch.LongTensor(filters)
weights = weights.view(2, 1, 3, 3)
weights.shape
weights

tensor([[[[1, 0, 0],
          [0, 1, 0],
          [0, 0, 1]]],


        [[[0, 0, 0],
          [0, 0, 0],
          [0, 0, 1]]]])

In [110]:
image = torch.from_numpy(test_image)
image = image.view(1, 1, *test_image.shape)
image.shape

torch.Size([1, 1, 4, 5])

In [111]:
weights.shape

torch.Size([2, 1, 3, 3])

In [115]:
conv2d(image, weights).shape

torch.Size([1, 2, 2, 3])

In [2]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

import torch
from torchvision import transforms, models

# from PIL import Image
# from colour_demosaicing import demosaicing_CFA_Bayer_Menon2007

In [444]:
import torch.nn.functional as F
from sklearn.decomposition import PCA, IncrementalPCA
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

import torch
from torchvision import transforms, models


class PCANet(torch.nn.Module):
    def __init__(self, num_filters: list, filters_sizes: list, batch_size=256):
        self.params = {
            'num_filters': num_filters,
            'filters_sizes': filters_sizes,
        }
        self.W_1 = None
        self.W_2 = None
        self.batch_size = batch_size

    def forward(self, x):
        x = F.conv2d(x, self.W_1)
        N, C, H, W = x.shape
        x = x.view(-1, 1, H, W)
        
        x = F.conv2d(x, self.W_2)
        N, C, H, W = x.shape
        x = x.view(N*C, H, W)
        
        x_flat = x.view(N*C, H*W)
        print("N = {}, C = {}, H = {}, W = {}".format(N, C, H, W), x_flat.shape)
        x_flat = torch.nn.Linear(H*W, 2, bias=True)(x_flat)
        return x_flat
            
    @staticmethod        
    def _extract_image_patches(imgs: torch.Tensor, filter_size, stride=1, remove_mean=True):
        # imgs.shape = (N, C, H, W) -> (N, 1, H, W) 
        # так должно быть, но сюда могут прийти не grayscale изображения первого шага, а со второго
        # на котором применено L1 фильтров -> L1 каналов
        N, n_channels, H, W = imgs.shape
        
        if n_channels > 1:
            # изображение вида (N, C, H, W) - N C-канальных изображений
            # приводим к виду (N*C, 1, H, W) - N*C одно-канальных изображений
            imgs = imgs.view(-1, 1, H, W)
        print('images shape', imgs.shape)
            
        k = filter_size
        patches = torch.nn.functional.unfold(imgs, k, padding=k//2) # (N, k^2, H*W)
        print('patches_shape, ', patches.shape)
        print('should be patches shape, ', (imgs.shape[0], k**2, H*W))
        
        if remove_mean:
            patches -= patches.mean(dim=1, keepdim=True) # последнее измерение - количество патчей
        
        print('filter_size', k)
        X = patches.view(k**2, -1) # (k^2, N*H*W)

        return X
    
    def _convolve(self, imgs: torch.Tensor, filter_bank: torch.Tensor) -> torch.Tensor:
        weight = filter_bank
        output = F.conv2d(imgs, weight) #, padding=padding)
        return output
    
    def _first_stage(self, imgs: torch.Tensor, train: bool) -> torch.Tensor:
        # (N, C, H, W) image
        # (train_size, 1, H, W) - grayscale
        assert imgs.dim() == 4 and imgs.nelement() > 0

        print('PCANet first stage...')

        if train:
            # достаем все патчи из всех N изображений
            filter_size1 = self.params['filters_sizes'][0]
            X = self._extract_image_patches(
                imgs, filter_size1)
            
            n_filters = self.params['num_filters'][0]
            
            eigenvectors = self.get_pca_eigenvectors(X, n_components=n_filters, batch_size=self.batch_size)
            self.W_1 = torch.FloatTensor(eigenvectors).view(n_filters, 1, filter_size1, filter_size1)
         
        I = self._convolve(imgs, self.W_1)  # (N, 1, H, W) * (L1, k1, k1) -> (N, L1, H', W')
        return I
    
    @staticmethod
    def conv_output_size(w, filter_size, padding=0, stride=1):
        return int((w - filter_size + 2 * padding) / stride + 1)
    
    @staticmethod
    def get_pca_eigenvectors(X, n_components, batch_size=100):
        ipca = IncrementalPCA(n_components=n_components, batch_size=batch_size)
        print('pca fitting ...')
        ipca.fit(X @ X.t())
        eigenvectors = ipca.components_
        print('eigenvectors shape:', eigenvectors.shape)
        return eigenvectors
        
    def _second_stage(self, I: torch.Tensor, train):
        print('PCANet second stage...')
        # I: (N, L1, H, W)
        if train:
            N, L1, H, W = I.shape
            I = I.view(-1, 1, H, W)
            filter_size2 = self.params['filters_sizes'][1]
            n_filters = self.params['num_filters'][1]
            
            H_new = self.conv_output_size(I.shape[2], filter_size2)
            W_new = self.conv_output_size(I.shape[3], filter_size2)
            
            X = self._extract_image_patches(I, filter_size2)
            print('X_SHAPE ', X.shape)
            eigenvectors = self.get_pca_eigenvectors(X, n_components=n_filters, batch_size=self.batch_size)
            self.W_2 = torch.FloatTensor(eigenvectors).view(n_filters, 1, filter_size2, filter_size2)
        return self._convolve(I, self.W_2)

In [445]:
net = PCANet([8, 8],[5, 3])
net.params

{'num_filters': [8, 8], 'filters_sizes': [5, 3]}

In [446]:
imgs = torch.randn(10, 1, 10, 10)

In [447]:
I = net._first_stage(imgs=imgs, train=True)
I.shape

PCANet first stage...
images shape torch.Size([10, 1, 10, 10])
patches_shape,  torch.Size([10, 25, 100])
should be patches shape,  (10, 25, 100)
filter_size 5
pca fitting ...
eigenvectors shape: (8, 25)


torch.Size([10, 8, 6, 6])

In [448]:
net._second_stage(I, train=True).shape

PCANet second stage...
images shape torch.Size([80, 1, 6, 6])
patches_shape,  torch.Size([80, 9, 36])
should be patches shape,  (80, 9, 36)
filter_size 3
X_SHAPE  torch.Size([9, 2880])
pca fitting ...
eigenvectors shape: (8, 9)


torch.Size([80, 8, 4, 4])

In [449]:
imgs.shape

torch.Size([10, 1, 10, 10])

In [450]:
net.forward(imgs)

N = 80, C = 8, H = 4, W = 4 torch.Size([640, 16])


tensor([[ 0.6253,  0.6168],
        [-0.8436, -0.8203],
        [ 1.3822, -0.0567],
        ...,
        [ 0.3068, -0.6910],
        [ 0.6599, -0.2581],
        [ 0.9608, -0.0107]], grad_fn=<AddmmBackward>)

In [439]:
S = 2 # channel dim
W = 5 # width
H = 5 # height
batch_size = 2

x = torch.randn(batch_size, S, W, H)
x.shape

torch.Size([2, 2, 5, 5])

In [178]:
x

tensor([[[[-1.7921e-01, -9.1692e-01, -4.6317e-01,  1.6869e+00,  1.4613e+00],
          [-5.7550e-01,  5.8393e-01, -1.5300e+00,  9.7792e-01,  8.4493e-01],
          [-4.2220e-01,  7.0116e-01, -1.4611e+00,  1.2394e+00, -1.0279e+00],
          [ 4.7105e-01, -2.6948e-01,  4.5076e-01, -9.2152e-01,  4.1819e-01],
          [-1.2578e+00,  1.6262e-01,  1.7925e-01,  8.2062e-01,  5.4471e-01]],

         [[-5.7162e-01,  4.2349e-01,  7.7400e-01, -1.0808e+00, -1.7399e+00],
          [-9.5978e-01, -1.2405e+00,  5.1695e-01,  1.4557e-01,  2.6387e-02],
          [-3.9332e-01,  4.8027e-02, -1.0529e+00, -6.9203e-01,  1.4546e+00],
          [-2.3435e+00,  1.4405e+00,  8.3949e-01,  5.8923e-01, -5.4668e-01],
          [-1.4460e+00,  8.4668e-01, -1.4356e-01, -1.1934e-01, -1.6069e-02]]],


        [[[ 7.5386e-01, -2.7837e+00,  2.1490e+00, -9.4989e-01, -4.0952e-01],
          [ 1.2360e+00, -2.5064e-01,  6.0600e-03,  1.4662e-01, -2.5596e-02],
          [ 3.1904e-02, -1.5852e+00, -6.8290e-01,  8.3062e-01, -6.3950

In [143]:
patch_size = 3

In [179]:
patches = torch.nn.functional.unfold(x, patch_size, padding=patch_size//2)
patches.shape

torch.Size([2, 18, 25])

In [187]:
x.shape

torch.Size([2, 2, 5, 5])

In [189]:
x.view(-1, 1, 5, 5).shape

torch.Size([4, 1, 5, 5])

In [203]:
patches = torch.nn.functional.unfold(x.view(-1, 1, 5, 5), patch_size, padding=patch_size//2)

In [185]:
dim=1
stride=1

(x.unfold(dim + 1, patch_size, stride)
             .unfold(dim + 2, patch_size, stride)).shape

torch.Size([2, 2, 3, 3, 3, 3])

In [146]:
# извлекает блоки patch_size x patch_size. данный элемент - левый верхний в кернеле

In [169]:
patches.mean(dim=1, keepdim=True)

tensor([[[ 0.0445, -0.0971, -0.1128,  0.0563,  0.1979,  0.0251, -0.1666,
          -0.1968,  0.2062,  0.3979, -0.0838, -0.4481, -0.6883, -0.3130,
           0.0513, -0.0806, -0.0708, -0.0035,  0.4447,  0.4349, -0.0611,
          -0.0012,  0.0804,  0.2948,  0.2349]]])

In [159]:
patches

tensor([[[ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000, -0.2136,
           0.6934,  0.7696,  1.4638,  0.0000,  0.2672, -0.3461, -2.0440,
          -1.5522,  0.0000, -0.5958,  0.4206, -0.4511, -0.7251,  0.0000,
           0.4785, -0.9787, -0.7833,  0.2650],
         [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000, -0.2136,  0.6934,
           0.7696,  1.4638,  2.4094,  0.2672, -0.3461, -2.0440, -1.5522,
          -0.5400, -0.5958,  0.4206, -0.4511, -0.7251,  2.5252,  0.4785,
          -0.9787, -0.7833,  0.2650,  0.4890],
         [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.6934,  0.7696,
           1.4638,  2.4094,  0.0000, -0.3461, -2.0440, -1.5522, -0.5400,
           0.0000,  0.4206, -0.4511, -0.7251,  2.5252,  0.0000, -0.9787,
          -0.7833,  0.2650,  0.4890,  0.0000],
         [ 0.0000, -0.2136,  0.6934,  0.7696,  1.4638,  0.0000,  0.2672,
          -0.3461, -2.0440, -1.5522,  0.0000, -0.5958,  0.4206, -0.4511,
          -0.7251,  0.0000,  0.4785, -0.9787, -0.7833,  

In [204]:
patches.shape

torch.Size([4, 9, 25])

In [205]:
patches_centerd = patches - patches.mean(dim=1, keepdim=True)
patches_centerd.shape

torch.Size([4, 9, 25])

In [153]:
patches.view(patch_size**2, batch_size*H**2).shape

torch.Size([9, 25])

In [172]:
patches_centerd[0,0]

tensor([-0.0445,  0.0971,  0.1128, -0.0563, -0.1979, -0.0251, -0.0469,  0.8902,
         0.5634,  1.0659,  0.0838,  0.7153,  0.3422, -1.7310, -1.6035,  0.0806,
        -0.5250,  0.4241, -0.8958, -1.1599,  0.0611,  0.4797, -1.0591, -1.0781,
         0.0301])

In [171]:
patches[0, 0] - patches.mean(dim=1, keepdim=True)[0,0]

tensor([-0.0445,  0.0971,  0.1128, -0.0563, -0.1979, -0.0251, -0.0469,  0.8902,
         0.5634,  1.0659,  0.0838,  0.7153,  0.3422, -1.7310, -1.6035,  0.0806,
        -0.5250,  0.4241, -0.8958, -1.1599,  0.0611,  0.4797, -1.0591, -1.0781,
         0.0301])

In [193]:
from sklearn.decomposition import PCA, IncrementalPCA

In [200]:
patches_centerd.shape

torch.Size([2, 18, 25])

In [217]:
X = patches.view(patch_size**2, -1)
X.shape

torch.Size([9, 100])

In [206]:
ipca = IncrementalPCA(n_components=5, batch_size=10)

In [218]:
ipca.fit(X@X.t())

IncrementalPCA(batch_size=10, copy=True, n_components=5, whiten=False)

In [219]:
ipca.components_.shape

(5, 9)

In [220]:
ipca.components_[0].reshape(patch_size, patch_size)

array([[ 0.20935935, -0.24869507, -0.17104915],
       [-0.11947887, -0.17755327,  0.67835001],
       [-0.0100398 , -0.35135031,  0.48533975]])

In [221]:
pca = PCA(n_components=5)
pca.fit(X@X.t())

PCA(copy=True, iterated_power='auto', n_components=5, random_state=None,
    svd_solver='auto', tol=0.0, whiten=False)

In [222]:
pca.components_[0].reshape(patch_size, patch_size)

array([[ 0.20935935, -0.24869507, -0.17104915],
       [-0.11947887, -0.17755327,  0.67835001],
       [-0.0100398 , -0.35135031,  0.48533975]])

In [223]:
# надо достать и отсортировать с.в. соответствующие отсортированным в порядке убывания с.з.


In [226]:
ipca.singular_values_

array([129.20387758,  96.00793685,  89.69254703,  75.68971386,
        65.87686794])