# Test of DQN

In [1]:
%run ../DQN

## Import

Test that the imports, ie., `torch`.

In [2]:
bar = nn.MaxPool2d(kernel_size = 1)
bar(torch.randn(1, 2, 3)) # batch of 1, 2 rows, 3 cols
bar(torch.randn(1, 3, 2)) # batch of 1, 3 rows, 2 cols

tensor([[[-1.0912,  1.5232],
         [ 0.0530, -0.3152],
         [-1.1230,  1.6398]]])

Calculating output of convolution sizes.

In [3]:
def convSize(W_or_H, K, P, S):
    return( (W_or_H - K + (2 * P)) / S + 1)

In [4]:
for i in range(0, 3):
    print(convSize( 256 / (i + 1), 4, 1, 2))

128.0
64.0
42.666666666666664


In [5]:
for i in range(0, 3):
    print(convSize( 240 / (i + 1), 4, 1, 2))

120.0
60.0
40.0


The padding is problematic for the conventional height.

The use of "Adaptive" pooling seems like a convenient way to keep it simple.

Demonstration below.

In [6]:
#nn.AdaptiveAvgPool2d?

In [7]:
torch.manual_seed(5004 * 2040);

In [8]:
rando = torch.randn(1, 1, 4)
m = nn.MaxPool1d(kernel_size = 4)
am = nn.AdaptiveMaxPool1d(output_size = 1)
av = nn.AdaptiveAvgPool2d(output_size = 1)

print(rando)
print(m(rando))
print(am(rando))
print(av(rando))

tensor([[[-1.2976, -0.7983, -1.3913, -0.8556]]])
tensor([[[-0.7983]]])
tensor([[[-0.7983]]])
tensor([[[-1.0857]]])


In [9]:
rando = torch.randn(1, 1, 2, 2)
m = nn.MaxPool2d(kernel_size = (2, 2))
am = nn.AdaptiveMaxPool2d(output_size = (1, 1))
av = nn.AdaptiveAvgPool2d(output_size = (1, 1))

print(rando)
print(m(rando))
print(am(rando))
print(av(rando))

tensor([[[[ 0.0954,  0.0912],
          [-0.8089,  0.0414]]]])
tensor([[[[0.0954]]]])
tensor([[[[0.0954]]]])
tensor([[[[-0.1452]]]])


In [10]:
rando = torch.randn(1, 1, 3, 3)
m = nn.MaxPool2d(kernel_size = (2, 2))
am = nn.AdaptiveMaxPool2d(output_size = (2, 2))
av = nn.AdaptiveAvgPool2d(output_size = (2, 2))

print(rando)
print(m(rando))
print(am(rando))
print(av(rando))

tensor([[[[-2.4695, -0.6672,  0.4332],
          [-0.6629, -0.5542,  0.5011],
          [-0.1603,  0.3374,  0.6364]]]])
tensor([[[[-0.5542]]]])
tensor([[[[-0.5542,  0.5011],
          [ 0.3374,  0.6364]]]])
tensor([[[[-1.0884, -0.0718],
          [-0.2600,  0.2302]]]])


## Test of DQN methods

### Initialization.

In [11]:
torch.manual_seed(5004);
foo = DQN(actionSpaceSize = 4, RGB = True)
torch.manual_seed(5004 + 1);
bar = DQN(actionSpaceSize = 4, RGB = True)

In [12]:
foo.network

Sequential(
  (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2))
  (1): AdaptiveMaxPool2d(output_size=(128, 120))
  (2): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2))
  (3): AdaptiveMaxPool2d(output_size=(64, 60))
  (4): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2))
  (5): AdaptiveAvgPool2d(output_size=(32, 30))
  (6): Flatten(start_dim=1, end_dim=-1)
  (7): Linear(in_features=61440, out_features=16, bias=True)
  (8): ReLU()
  (9): Linear(in_features=16, out_features=4, bias=True)
)

Without prompting, the weights are being randomly initialized.

In [13]:
foo.network[0].weight[0]

tensor([[[-0.0933, -0.0275, -0.0402],
         [ 0.1401, -0.0463,  0.1098],
         [ 0.1761,  0.1548, -0.0608]],

        [[ 0.0390,  0.0166, -0.1324],
         [ 0.1270, -0.0665, -0.1211],
         [ 0.1313,  0.0997,  0.1840]],

        [[-0.1708, -0.1800,  0.0861],
         [-0.1086, -0.0730, -0.1493],
         [-0.1591,  0.0706,  0.0783]]], grad_fn=<SelectBackward0>)

### Saving and loading

In [14]:
foo.saveModel("../savedModels/foo5004.pth")
bar.saveModel("../savedModels/bar5005.pth")

rando = torch.randn(1, 3, 256, 240) # random tensor for testing forward pass

Initial value for foo.

In [15]:
foo.network[0].weight[0]

tensor([[[-0.0933, -0.0275, -0.0402],
         [ 0.1401, -0.0463,  0.1098],
         [ 0.1761,  0.1548, -0.0608]],

        [[ 0.0390,  0.0166, -0.1324],
         [ 0.1270, -0.0665, -0.1211],
         [ 0.1313,  0.0997,  0.1840]],

        [[-0.1708, -0.1800,  0.0861],
         [-0.1086, -0.0730, -0.1493],
         [-0.1591,  0.0706,  0.0783]]], grad_fn=<SelectBackward0>)

In [16]:
foo.forward(rando)

tensor([[ 0.0408,  0.1812, -0.0207, -0.2181]], grad_fn=<AddmmBackward0>)

Foo's values are adjusted when loading another model's state.

In [17]:
foo.loadModel("../savedModels/bar5005.pth")

In [18]:
foo.network[0].weight[0]

tensor([[[-0.0153,  0.1182,  0.1306],
         [ 0.1257,  0.1272, -0.1412],
         [ 0.1001,  0.0758,  0.0817]],

        [[-0.1051, -0.1869,  0.0104],
         [ 0.0491, -0.1830, -0.0792],
         [ 0.0801, -0.0096,  0.0970]],

        [[ 0.1878, -0.0070, -0.1889],
         [ 0.0627, -0.1807,  0.0965],
         [ 0.0827, -0.1768,  0.1561]]], grad_fn=<SelectBackward0>)

In [19]:
foo.forward(rando)

tensor([[-0.0475,  0.0820, -0.1572,  0.1885]], grad_fn=<AddmmBackward0>)

And are restored when loading a copy of its original state.

In [20]:
foo.loadModel("../savedModels/foo5004.pth")

In [21]:
foo.network[0].weight[0]

tensor([[[-0.0933, -0.0275, -0.0402],
         [ 0.1401, -0.0463,  0.1098],
         [ 0.1761,  0.1548, -0.0608]],

        [[ 0.0390,  0.0166, -0.1324],
         [ 0.1270, -0.0665, -0.1211],
         [ 0.1313,  0.0997,  0.1840]],

        [[-0.1708, -0.1800,  0.0861],
         [-0.1086, -0.0730, -0.1493],
         [-0.1591,  0.0706,  0.0783]]], grad_fn=<SelectBackward0>)

In [22]:
foo.forward(rando)

tensor([[ 0.0408,  0.1812, -0.0207, -0.2181]], grad_fn=<AddmmBackward0>)