#Tutorial 6, Example 1

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

print(torch.__version__)

2.0.1+cu118


In [2]:
# Input image
I = np.array([[0.7, 0.1, 0.2, 0.3, 0.3, 0.5],
              [0.8, 0.1, 0.3, 0.5, 0.1, 0.0],
              [1.0, 0.2, 0.0, 0.3, 0.2, 0.7],
              [0.8, 0.1, 0.5, 0.6, 0.3, 0.4],
              [0.1, 0.0, 0.9, 0.3, 0.3, 0.2],
              [1.0, 0.1, 0.4, 0.5, 0.2, 0.8]]).astype(np.float32)

# Convert to PyTorch tensor
I_tensor = torch.tensor(I).unsqueeze(0).unsqueeze(0)

In [3]:
# Model definition
class MyModel(nn.Module):
    def __init__(self, pad, stride):
        super(MyModel, self).__init__()
        self.conv = nn.Conv2d(1, 2, 3, stride=stride, padding=pad)

    def forward(self, x):
        u = self.conv(x)
        y = torch.sigmoid(u)
        return u,y

# MyModel Explanation

`MyModel` is a simple neural network model built using PyTorch's `nn.Module`. Here's a breakdown of its components:

## Initialization (`__init__` method)

1. **Convolutional Layer (`self.conv`)**:
    - **Input Channels**: 1 (Assuming grayscale images)
    - **Output Channels**: 2 (Number of filters)
    - **Kernel Size**: 3x3
    - **Stride**: Defined by the `stride` parameter passed during model initialization.
    - **Padding**: Defined by the `pad` parameter passed during model initialization.

## Forward Pass (`forward` method)

1. **Convolution**:
    - The input `x` is passed through the convolutional layer to produce `u`.

2. **Activation**:
    - The output `u` from the convolutional layer is passed through a sigmoid activation function to produce the final output `y`.

The model returns the activated output `y`.

This model is designed to be flexible with respect to the stride and padding of the convolutional layer, as these are parameters that can be specified during model instantiation.


In [4]:
# Set filters and bias
w = np.array([[[0, 1, 1],[1, 0, 1], [1, 1, 0]],
              [[-1, -1, -1],[0, 0, 0], [1, 1, 1]]]).astype(np.float32)

w = w.reshape(2, 3, 3).reshape(2, 1, 3, 3)
b = np.array([0.1, 0.1]).astype(np.float32)

# Convert to tensors
w_tensor = torch.tensor(w)
b_tensor = torch.tensor(b)

print(w)

[[[[ 0.  1.  1.]
   [ 1.  0.  1.]
   [ 1.  1.  0.]]]


 [[[-1. -1. -1.]
   [ 0.  0.  0.]
   [ 1.  1.  1.]]]]


# Setting Filters and Bias

We define the filters (or kernels) and bias for a convolutional layer.

## Filters (`w`)

1. **Filter 1**:
    ```
    [[ 0,  1,  1],
     [ 1,  0,  1],
     [ 1,  1,  0]]
    ```

2. **Filter 2**:
    ```
    [[-1, -1, -1],
     [ 0,  0,  0],
     [ 1,  1,  1]]
    ```

- The filters are reshaped to fit the expected shape for PyTorch's convolutional layer, which is `(out_channels, in_channels, height, width)`. Given that these filters are for grayscale images (single channel), the shape becomes `(2, 1, 3, 3)`.

## Bias (`b`)

- The bias values are set to `0.1` for both filters. Bias helps in shifting the output of the filter and can improve the flexibility of the model.

## Conversion to Tensors

- The numpy arrays for filters and bias are converted to PyTorch tensors using `torch.tensor()`. This conversion is necessary because PyTorch models expect parameters in tensor format.

Finally, the filters (`w`) are printed to verify their values.


In [5]:
# padding = 0 and strides = (1,1) for convolution
model = MyModel(0, (1,1))
model.conv.weight.data = w_tensor
model.conv.bias.data = b_tensor
u_v, y_v = model(I_tensor)
print('padding = 0 and strides = (1,1) for convolution:\n')
print(u_v.detach().numpy())
print(y_v.detach().numpy())

padding = 0 and strides = (1,1) for convolution:

[[[[ 2.70000005e+00  1.39999998e+00  1.39999998e+00  1.89999998e+00]
   [ 2.40000010e+00  2.00000000e+00  2.00000000e+00  2.09999990e+00]
   [ 1.70000005e+00  2.00000000e+00  2.59999990e+00  2.59999990e+00]
   [ 2.79999995e+00  2.00000000e+00  3.09999990e+00  2.00000000e+00]]

  [[ 3.00000012e-01  7.45058060e-09 -2.00000018e-01  1.99999958e-01]
   [ 2.99999982e-01  4.00000006e-01  6.00000024e-01  8.00000072e-01]
   [-1.00000046e-01  8.00000012e-01  1.10000002e+00 -2.99999982e-01]
   [ 2.00000048e-01 -1.00000046e-01 -2.00000018e-01  2.99999982e-01]]]]
[[[[0.9370266  0.8021839  0.8021839  0.8698916 ]
   [0.9168273  0.880797   0.880797   0.8909032 ]
   [0.8455348  0.880797   0.9308616  0.9308616 ]
   [0.9426758  0.880797   0.9568927  0.880797  ]]

  [[0.5744425  0.5        0.45016602 0.54983395]
   [0.5744425  0.59868765 0.6456563  0.6899745 ]
   [0.4750208  0.6899745  0.7502601  0.4255575 ]
   [0.549834   0.4750208  0.45016602 0.5744425 ]

In [6]:
# Max pooling of square window of size=2, stride=2
pool = nn.MaxPool2d(2, stride=2)
max_output = pool(y_v)
print(max_output)

# Mean pooling of square window of size=2, stride=2
pool = nn.AvgPool2d(2, stride=2)
mean_output = pool(y_v)
print(mean_output)

tensor([[[[0.9370, 0.8909],
          [0.9427, 0.9569]],

         [[0.5987, 0.6900],
          [0.6900, 0.7503]]]], grad_fn=<MaxPool2DWithIndicesBackward0>)
tensor([[[[0.8842, 0.8609],
          [0.8875, 0.9249]],

         [[0.5619, 0.5839],
          [0.5475, 0.5501]]]], grad_fn=<AvgPool2DBackward0>)


In [7]:
# padding = 1 and strides = (2,2) for convolution
model = MyModel(1, (2,2))
model.conv.weight.data = w_tensor
model.conv.bias.data = b_tensor
u_v, y_v = model(I_tensor)
print('padding = 1 and strides = (2,2) for convolution:\n')
print(u_v.detach().numpy())
print(y_v.detach().numpy())

padding = 1 and strides = (2,2) for convolution:

[[[[ 1.          0.9000001   1.5000001 ]
   [ 2.          2.          2.1       ]
   [ 2.          2.          2.        ]]

  [[ 1.          1.          0.70000005]
   [ 0.1         0.4         0.8000001 ]
   [ 0.29999998 -0.10000005  0.29999998]]]]
[[[[0.7310586  0.71094954 0.8175745 ]
   [0.880797   0.880797   0.8909032 ]
   [0.880797   0.880797   0.880797  ]]

  [[0.7310586  0.7310586  0.6681878 ]
   [0.5249792  0.59868765 0.6899745 ]
   [0.5744425  0.4750208  0.5744425 ]]]]


In [8]:
# Max pooling of square window of size=2, stride=2
pool = nn.MaxPool2d(2, stride=2)
max_output = pool(y_v)
print(max_output)

# Mean pooling of square window of size=2, stride=2
pool = nn.AvgPool2d(2, stride=2)
mean_output = pool(y_v)
print(mean_output)

tensor([[[[0.8808]],

         [[0.7311]]]], grad_fn=<MaxPool2DWithIndicesBackward0>)
tensor([[[[0.8009]],

         [[0.6464]]]], grad_fn=<AvgPool2DBackward0>)
