In [1]:
import numpy as np
from scipy import signal
import matplotlib.pyplot as plt
import pathlib 
import os
import pickle

import torch
from torch import nn
from torch.utils.data import DataLoader
device = "cuda" if torch.cuda.is_available() else "cpu"

import PT_files.save_load as sl
from DnCNN_NP.layers  import relu, np_BatchNorm2d

import time 
from collections import OrderedDict
import pdb

**The goal of this notebook is to implement the optimization we found in notebook `11B_testing_im2col_times` where we call `get_indices` 3 times and then saving those 3 indice matrices. Then we use `np.ravel_multi_index()` in `im2col`.**

**NOTE: This is for a full 6k by 6k image.**

This notebook is creating the respective index matrices, so that it can be used in another notebook which just uses the already created matrices, instead of creating their own every call. This is supposedly to save a lot of time.

In [2]:
# Loading data & weights dictionary

PATH = pathlib.Path(os.getenv('PSCRATCH'))
DATA = PATH / 'DESI_dn' /'Model_params'
assert DATA.exists()
# name = '6k_model_wb_e800_lys20_58feat.pth'
name = '6k_model_wb_e800.pth'
# weights = np.load(DATA / name)
weights = torch.load(str(DATA / name))


#Load the actual data that we're working on & print the shape of this data
test_data = sl.NERSC_load('test_data_40%_6000.npy')
sample = test_data[0]
print('Shape of test set=', sample.shape)

samp = sample[0][0]
samp = samp.reshape((1, 1, 6000, 6000))

Shape of test set= (108, 1, 6000, 6000)


Need to call this three times:
1. First for the untransformed input. (1 channel -> 64 channels)
2. For the middle layers (64 channels -> 64 channels)
3. For the last layer (64 channels -> 1 channel)

In [3]:
def get_indices(input_data, weights_dict, prefix, stride=1, padding=1):

    # Get input size
    
    # Checking to see if a single sample or a batch of samples is given.
    # If batch take the batch_size, in_channels, H, and W
    # If single sample is given reshape so the values above can be calculated
    if len(input_data.shape) == 4:
    
        batch_size, input_channels, height, width = input_data.shape # (N, Cin, Hin, Win)
        
    elif len(input_data.shape) == 3:
        
        input_data = input_data.reshape((1, 1, 6000 , 6000))
        batch_size, input_channels, height, width = input_data.shape # (N, Cin, Hin, Win)
        
    # Load the weights and biases needed for a convolution
    # then take off gpu memory, move to CPU memory,
    # and lastly transform to numpy
    weight = weights_dict[str(prefix) + 'weight']
    weight = weight.detach().cpu().numpy()
    
    bias = weights_dict[str(prefix) + 'bias']
    bias = bias.detach().cpu().numpy()
    
    # Calculate the kernel size and output channels from
    # the loaded weights from above
    kernel_size = weight[0][0].shape
    output_channels = len(weight)
    
    # Calculations for the output H and W dimensions.
    height_out = ((height + (2*padding) - (kernel_size[0] - 1) - 1) / stride) + 1
    height_out = int(height_out)
    width_out = ((width + (2*padding) - (kernel_size[1] - 1) - 1) / stride) + 1
    width_out = int(width_out)
    
    
    # ----Compute matrix of index i----

    # Level 1 vector.
    level1 = np.repeat(np.arange(kernel_size[0]), kernel_size[1])
    # Duplicate for the other channels.
    level1 = np.tile(level1, input_channels)
    # Create a vector with an increase by 1 at each level.
    everyLevels = stride * np.repeat(np.arange(height_out), width_out)
    # Create matrix of index i at every levels for each channel.
    i = level1.reshape(-1, 1) + everyLevels.reshape(1, -1)
    
    # ----Compute matrix of index j----
    
    # Slide 1 vector.
    slide1 = np.tile(np.arange(kernel_size[1]), kernel_size[0])
    # Duplicate for the other channels.
    slide1 = np.tile(slide1, input_channels)
    # Create a vector with an increase by 1 at each slide.
    everySlides = stride * np.tile(np.arange(width_out), height_out)
    # Create matrix of index j at every slides for each channel.
    j = slide1.reshape(-1, 1) + everySlides.reshape(1, -1)
    
    # ----Compute matrix of index d----

    # This is to mark delimitation for each channel
    # during multi-dimensional arrays indexing.
    d = np.repeat(np.arange(input_channels), kernel_size[0] * kernel_size[1]).reshape(-1, 1)
    
    
    return i, j, d


def im2col(input_data, weights_dict, prefix, stride=1, padding=1):
    """
        Transforms our input image into a matrix.

        Parameters:
        -----------
        input_data: nd.array
            The input image(s)
        weights_dict: OrderedDict
            Dictionary containing the PyTorch trained weights for every 
            layer of the model
        prefix: str
            The prefix that picks out the specific layer's weights to be used
            E.g. prefix='layers.0.0.' would be the first layers convolutional
            weights and bias's

        Returns:
        --------
        cols: output matrix.
    """

    if len(input_data.shape) == 4:
    
        batch_size, input_channels, height, width = input_data.shape # (N, Cin, Hin, Win)
        
    elif len(input_data.shape) == 3:
        
        input_data = input_data.reshape((1, 1, 6000 , 6000))
        batch_size, input_channels, height, width = input_data.shape # (N, Cin, Hin, Win)

    # Padding
    input_padded = np.pad(input_data, ((0,0), (0,0), (padding, padding), (padding, padding)), mode='constant')
    i, j, d = get_indices(input_data=input_data, weights_dict=weights_dict, prefix=prefix)
    # Multi-dimensional arrays indexing.
    cols = input_padded[:, d, i, j]
    cols = np.concatenate(cols, axis=-1)
    
    
    return cols


def np_Conv2d(input_data, weights_dict, prefix):
    """
        Performs a forward convolution.

        Parameters:
        - X : Last conv layer of shape (m, n_C_prev, n_H_prev, n_W_prev).
        Returns:
        - out: previous layer convolved.
    """
    
    conv_start = time.perf_counter()
    if len(input_data.shape) == 4:
    
        batch_size, input_channels, height, width = input_data.shape # (N, Cin, Hin, Win)
        
    elif len(input_data.shape) == 3:
        
        input_data = input_data.reshape((1, 1, 6000 , 6000))
        batch_size, input_channels, height, width = input_data.shape # (N, Cin, Hin, Win)


    output_channels = len(weights_dict[str(prefix) + 'weight']) # num_of_filters
    height_out = int((height + 2 * 1 - 3)/ 1) + 1
    width_out = int((width + 2 * 1 - 3)/ 1) + 1

    X_col = im2col(input_data=input_data, weights_dict=weights_dict, prefix=prefix)
    w_col = weights_dict[str(prefix) + 'weight'].detach().cpu().numpy().reshape((output_channels, -1))
    b_col = weights_dict[str(prefix) + 'bias'].detach().cpu().numpy().reshape(-1, 1)
    # Perform matrix multiplication.
    out = w_col @ X_col + b_col
    # Reshape back matrix to image.
    out = np.array(np.hsplit(out, batch_size)).reshape((batch_size, output_channels, height_out, width_out))
    
    conv_end = time.perf_counter()
    print('Conv takes:', conv_end-conv_start, 'seconds')
    return out


def im2col_save(input_data, weights_dict, prefix, stride=1, padding=1):
    """
        Transforms our input image into a matrix.

        Parameters:
        -----------
        input_data: nd.array
            The input image(s)
        weights_dict: OrderedDict
            Dictionary containing the PyTorch trained weights for every 
            layer of the model
        prefix: str
            The prefix that picks out the specific layer's weights to be used
            E.g. prefix='layers.0.0.' would be the first layers convolutional
            weights and bias's

        Returns:
        --------
        cols: output matrix.
    """
    im2col_start = time.perf_counter()

    if len(input_data.shape) == 4:
    
        batch_size, input_channels, height, width = input_data.shape # (N, Cin, Hin, Win)
        
    elif len(input_data.shape) == 3:
        
        input_data = input_data.reshape((1, 1, 6000 , 6000))
        batch_size, input_channels, height, width = input_data.shape # (N, Cin, Hin, Win)

    # Padding
    input_padded = np.pad(input_data, ((0,0), (0,0), (padding, padding), (padding, padding)), mode='constant')
    i, j, d = get_indices(input_data=input_data, weights_dict=weights_dict, prefix=prefix)
    # Multi-dimensional arrays indexing.
    idx = np.ravel_multi_index(([0], d, i, j), input_padded.shape)
    cols2 = input_padded.reshape(-1)[idx]  

    return cols2

In [4]:
# DnCNN Model

# 1st layer block
idx_start = im2col_save(input_data=samp, weights_dict=weights, prefix='layers.0.0.')
conv_out0 = np_Conv2d(input_data=samp, weights_dict=weights, prefix='layers.0.0.')
out0 = relu(conv_out0)

Conv takes: 4.894506996031851 seconds


In [13]:
6000*6000 / 2000*2000

36000000.0

# **Code is to see how long the first layer of the convolutional layer takes with the saved/stored index arrays.**

**This is for comparison with the 2k by 2k first layer convolution, which is found in notebook `12_Testing_Opt_Conv2d.ipynb` and takes 0.413180093979463 seconds**
- First layer convolution of 6k by 6k takes 2.392761211958714 seconds
    - That is 6 times **longer** for a 9 **larger** image 
- If the extrapolation can be allowed then the intermediate layers of the 6k by 6k model would also be 6 times longer than the intermediate convolutions of the 2k by 2k model
    - Ie. going from ~5 seconds for 2k by 2k to ~ 30 seconds for 6k by 6k
    - This would be 30 seconds for 18 convolution layers ie. 540 seconds == 1 hour
- **Total 6k by 6k model would take **~ $1 \pm 0.1$ hrs****
    - Compared to the 37 minutes of 21 calls needed by the afterburner of the 2k by 2k model
        - **~13 hours of computation**

**Intermediate Convolutions of the 2k by 2k model with saved index arrays takes, ~5.155830833973596 seconds**


**In Summary:**
- 2k by 2k model is faster even with the 21 calls by almost half the time. Need to tell David & also recheck these times again to make sure they're correct

In [9]:
def im2col2(input_data, im2col_mat, stride=1, padding=1):
    """
        Transforms our input image into a matrix.

        Parameters:
        -----------
        input_data: nd.array
            The input image(s)
        weights_dict: OrderedDict
            Dictionary containing the PyTorch trained weights for every 
            layer of the model
        prefix: str
            The prefix that picks out the specific layer's weights to be used
            E.g. prefix='layers.0.0.' would be the first layers convolutional
            weights and bias's

        Returns:
        --------
        cols: output matrix.
    """
    # Padding
    input_padded = np.pad(input_data, ((0,0), (0,0), (padding, padding), (padding, padding)), mode='constant')
    # Multi-dimensional arrays indexing.
    idx = im2col_mat
    cols2 = input_padded.reshape(-1)[idx]  
    
    return cols2


def np_Conv2d2(input_data, weights_dict, prefix, im2col_mat):
    """
        Performs a forward convolution.

        Parameters:
        - X : Last conv layer of shape (m, n_C_prev, n_H_prev, n_W_prev).
        Returns:
        - out: previous layer convolved.
    """
    
    conv_start = time.perf_counter()
    if len(input_data.shape) == 4:
    
        batch_size, input_channels, height, width = input_data.shape # (N, Cin, Hin, Win)
        
    elif len(input_data.shape) == 3:
        
        input_data = input_data.reshape((1, 1, 2000 , 2000))
        batch_size, input_channels, height, width = input_data.shape # (N, Cin, Hin, Win)


    output_channels = len(weights_dict[str(prefix) + 'weight']) # num_of_filters
    height_out = int((height + 2 * 1 - 3)/ 1) + 1
    width_out = int((width + 2 * 1 - 3)/ 1) + 1

    
    X_col = im2col_mat
    w_col = weights_dict[str(prefix) + 'weight'].detach().cpu().numpy().reshape((output_channels, -1))
    b_col = weights_dict[str(prefix) + 'bias'].detach().cpu().numpy().reshape(-1, 1)
    # Perform matrix multiplication.
    out = w_col @ X_col + b_col
    # Reshape back matrix to image.
    out = np.array(np.hsplit(out, batch_size)).reshape((batch_size, output_channels, height_out, width_out))
    
    conv_end = time.perf_counter()
    print('Conv takes:', conv_end-conv_start, 'seconds')
    return out

In [11]:
conv_out0_2 = np_Conv2d2(input_data=samp, weights_dict=weights, prefix='layers.0.0.', im2col_mat=idx_start)

Conv takes: 2.392761211958714 seconds


Compare values of normal conv versus saved indice conv to check that they are in fact given similar values

In [12]:
np.allclose(conv_out0, conv_out0_2)

True

# **Code below is creating the model to use the outputs to create the multi-dimensional arrays of indices**

In [None]:

# 2nd layer block
idx_mid = im2col_save(input_data=out0, weights_dict=weights, prefix='layers.1.0.')
# conv_out1 = np_Conv2d(input_data=out0, weights_dict=weights, prefix='layers.1.0.')
# batch_out1 = np_BatchNorm2d(x=conv_out1, weights_dict=weights, prefix='layers.1.1.')
# out1 = relu(batch_out1)
print(2)

In [None]:
# 2nd layer block
conv_out2 = np_Conv2d(input_data=out1, weights_dict=weights, prefix='layers.2.0.')
batch_out2 = np_BatchNorm2d(x=conv_out2, weights_dict=weights, prefix='layers.2.1.')
out2 = relu(batch_out2)
print(3)

# 2nd layer block
conv_out3 = np_Conv2d(input_data=out2, weights_dict=weights, prefix='layers.3.0.')
batch_out3= np_BatchNorm2d(x=conv_out3, weights_dict=weights, prefix='layers.3.1.')
out3 = relu(batch_out3)
print(4)

In [None]:
# 2nd layer block
conv_out4 = np_Conv2d(input_data=out3, weights_dict=weights, prefix='layers.4.0.')
batch_out4 = np_BatchNorm2d(x=conv_out4, weights_dict=weights, prefix='layers.4.1.')
out4 = relu(batch_out4)
print(5)

# 2nd layer block
conv_out5 = np_Conv2d(input_data=out4, weights_dict=weights, prefix='layers.5.0.')
batch_out5 = np_BatchNorm2d(x=conv_out5, weights_dict=weights, prefix='layers.5.1.')
out5 = relu(batch_out5)
print(6)

In [None]:
# 2nd layer block
conv_out6 = np_Conv2d(input_data=out5, weights_dict=weights, prefix='layers.6.0.')
batch_out6 = np_BatchNorm2d(x=conv_out6, weights_dict=weights, prefix='layers.6.1.')
out6 = relu(batch_out6)
print(7)

# 2nd layer block
conv_out7 = np_Conv2d(input_data=out6, weights_dict=weights, prefix='layers.7.0.')
batch_out7 = np_BatchNorm2d(x=conv_out7, weights_dict=weights, prefix='layers.7.1.')
out7 = relu(batch_out7)
print(8)

In [None]:
# 2nd layer block
conv_out8 = np_Conv2d(input_data=out7, weights_dict=weights, prefix='layers.8.0.')
batch_out8 = np_BatchNorm2d(x=conv_out8, weights_dict=weights, prefix='layers.8.1.')
out8 = relu(batch_out8)
print(9)

# 2nd layer block
conv_out9 = np_Conv2d(input_data=out8, weights_dict=weights, prefix='layers.9.0.')
batch_out9 = np_BatchNorm2d(x=conv_out9, weights_dict=weights, prefix='layers.9.1.')
out9 = relu(batch_out9)
print(10)

In [None]:
# 2nd layer block
conv_out10 = np_Conv2d(input_data=out9, weights_dict=weights, prefix='layers.10.0.')
batch_out10 = np_BatchNorm2d(x=conv_out10, weights_dict=weights, prefix='layers.10.1.')
out10 = relu(batch_out10)
print(11)

# 2nd layer block
conv_out11 = np_Conv2d(input_data=out10, weights_dict=weights, prefix='layers.11.0.')
batch_out11 = np_BatchNorm2d(x=conv_out11, weights_dict=weights, prefix='layers.11.1.')
out11 = relu(batch_out11)
print(12)

In [None]:
# 2nd layer block
conv_out12 = np_Conv2d(input_data=out11, weights_dict=weights, prefix='layers.12.0.')
batch_out12 = np_BatchNorm2d(x=conv_out12, weights_dict=weights, prefix='layers.12.1.')
out12 = relu(batch_out12)
print(13)

# 2nd layer block
conv_out13 = np_Conv2d(input_data=out12, weights_dict=weights, prefix='layers.13.0.')
batch_out13 = np_BatchNorm2d(x=conv_out13, weights_dict=weights, prefix='layers.13.1.')
out13 = relu(batch_out13)
print(14)

In [None]:
# 2nd layer block
conv_out14 = np_Conv2d(input_data=out13, weights_dict=weights, prefix='layers.14.0.')
batch_out14 = np_BatchNorm2d(x=conv_out14, weights_dict=weights, prefix='layers.14.1.')
out14 = relu(batch_out14)
print(15)

# 2nd layer block
conv_out15 = np_Conv2d(input_data=out14, weights_dict=weights, prefix='layers.15.0.')
batch_out15 = np_BatchNorm2d(x=conv_out15, weights_dict=weights, prefix='layers.15.1.')
out15 = relu(batch_out15)
print(16)

In [None]:
# 2nd layer block
conv_out16 = np_Conv2d(input_data=out15, weights_dict=weights, prefix='layers.16.0.')
batch_out16 = np_BatchNorm2d(x=conv_out16, weights_dict=weights, prefix='layers.16.1.')
out16 = relu(batch_out16)
print(17)

# 2nd layer block
conv_out17 = np_Conv2d(input_data=out16, weights_dict=weights, prefix='layers.17.0.')
batch_out17 = np_BatchNorm2d(x=conv_out17, weights_dict=weights, prefix='layers.17.1.')
out17 = relu(batch_out17)
print(18)

In [None]:
# 2nd layer block
conv_out18 = np_Conv2d(input_data=out17, weights_dict=weights, prefix='layers.18.0.')
batch_out18 = np_BatchNorm2d(x=conv_out18, weights_dict=weights, prefix='layers.18.1.')
out18 = relu(batch_out18)
print(19)

# Last layer
idx_last = im2col_save(input_data=out18, weights_dict=weights, prefix='layers.19.')
conv_out19 = np_Conv2d(input_data=out18, weights_dict=weights, prefix='layers.19.')
print(20)

resid_img = samp - conv_out19

In [None]:
im2col_layer_dict = {'start': idx_start, 'mid':idx_mid, 'last': idx_last}

sl.NERSC_save(name='im2col_layer_dict_6k.pkl', data=im2col_layer_dict)

In [None]:
im2col_mat = sl.NERSC_load(name='im2col_layer_dict_6k.pkl')