In [1]:
import torch
import torch.nn as nn
import torch.autograd as autograd
# With square kernels and equal stride
m = nn.Conv2d(16, 33, 3, stride=2)
# non-square kernels and unequal stride and with padding
m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
# non-square kernels and unequal stride and with padding and dilation
m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1))
input = autograd.Variable(torch.randn(20, 16, 50, 100))
output = m(input)


In [4]:
torch.randn(20, 3, 50, 100).shape

torch.Size([20, 16, 50, 100])

In [84]:
input = autograd.Variable(torch.randn(20, 3, 128, 128))
def getWH(img_w, img_h):
    img_w, img_h = np.ceil(img_w / 2), np.ceil(img_h / 2)
    img_w, img_h = np.ceil(img_w / 2), np.ceil(img_h / 2)
    img_w, img_h = np.ceil(img_w / 2), np.ceil(img_h / 2)
    img_w, img_h = np.ceil(img_w - 2), np.ceil(img_h - 2)
    return int(img_w), int(img_h)
print(getWH(input.shape[2], input.shape[3]))
model = nn.Sequential(
    # conv + max pool -> /2
    # 64 个 3*3 filters, strike = (1, 1), output_img.shape = ceil(L/S) = ceil(input/strike) = (H, W)
    nn.Conv2d(in_channels=3, out_channels=64,  kernel_size=3, stride=1, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),

    # conv + max pool -> /2
    nn.Conv2d(in_channels=64, out_channels=128,  kernel_size=3, stride=1, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),

    # regular conv -> id
    nn.Conv2d(in_channels=128, out_channels=256,  kernel_size=3, stride=1, padding=1),
    nn.ReLU(),
    nn.Conv2d(in_channels=256, out_channels=256,  kernel_size=3, stride=1, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=(2, 1), stride=(2, 1)),
    nn.Conv2d(in_channels=256, out_channels=512,  kernel_size=3, stride=1, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=(1, 2), stride=(1, 2)),

    # conv
    nn.Conv2d(in_channels=512, out_channels=512,  kernel_size=3, stride=1, padding=0),
    nn.ReLU(),
)
model(input).shape

(14, 14)


torch.Size([20, 512, 14, 14])

In [85]:
model2 = nn.Sequential(
    # conv + max pool -> /2
    # 64 个 3*3 filters, strike = (1, 1), output_img.shape = ceil(L/S) = ceil(input/strike) = (H, W)
    nn.Conv2d(in_channels=3, out_channels=64,  kernel_size=3, stride=1, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),

    # conv + max pool -> /2
    nn.Conv2d(in_channels=64, out_channels=128,  kernel_size=3, stride=1, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),

    # regular conv -> id
    nn.Conv2d(in_channels=128, out_channels=256,  kernel_size=3, stride=1, padding=1),
    nn.ReLU(),
    nn.Conv2d(in_channels=256, out_channels=256,  kernel_size=3, stride=1, padding=1),
    nn.ReLU(),

    nn.Conv2d(in_channels=256, out_channels=512,  kernel_size=3, stride=1, padding=1),
    nn.ReLU(),

    nn.Conv2d(in_channels=512, out_channels=512,  kernel_size=(2, 4), stride=2, padding=1),
    nn.ReLU(),

    # conv
    nn.Conv2d(in_channels=512, out_channels=512,  kernel_size=3, stride=1, padding=0),
    nn.ReLU(),
)
model2(input).shape

torch.Size([20, 512, 15, 14])

In [40]:
import math
from torch.nn.functional import pad
def add_timing_signal_nd_torch(x, min_timescale=1.0, max_timescale=1.0e4):
    static_shape = x.shape
    num_dims = len(static_shape) - 2
    channels = static_shape[1]
    num_timescales = channels // (num_dims * 2)
    log_timescale_increment = (
        math.log(float(max_timescale) / float(min_timescale)) /
        (float(num_timescales) - 1))
    inv_timescales = min_timescale * torch.exp(torch.linspace(0, num_timescales).float() * -log_timescale_increment)
    for dim in range(num_dims):
        length = static_shape[dim + 2]
        position = torch.linspace(0, length).float()
        scaled_time = position.unsqueeze(1) * inv_timescales.unsqueeze(0)
        signal = torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], dim=1)
        print(signal, signal.shape)
        prepad = dim * 2 * num_timescales
        postpad = channels - (dim + 2) * 2 * num_timescales
        signal = pad(signal, [[0, 0], [prepad, postpad]])
        for _ in range(2 + dim):
            signal = signal.unsqueeze(0)
        for _ in range(num_dims - 1 - dim):
            signal = signal.unsqueeze(-2)
        x += signal
    return x
add_timing_signal_nd_torch(input)

tensor([[ 0.0000,  0.0000,  0.0000,  ...,  1.0000,  1.0000,  1.0000],
        [ 0.9616,  0.9616,  0.9616,  ...,  0.2743,  0.2743,  0.2743],
        [ 0.5276,  0.5276,  0.5276,  ..., -0.8495, -0.8495, -0.8495],
        ...,
        [-0.2470, -0.2470, -0.2470,  ...,  0.9690,  0.9690,  0.9690],
        [ 0.8641,  0.8641,  0.8641,  ...,  0.5033,  0.5033,  0.5033],
        [ 0.7210,  0.7210,  0.7210,  ..., -0.6929, -0.6929, -0.6929]]) torch.Size([100, 200])


TypeError: constant_pad_nd(): argument 'pad' must be tuple of ints, but found element of type list at pos 1

NameError: name 'add_timing_signal_nd_torch' is not defined

In [27]:
a = torch.Tensor([1,2,3,4,5])
a.shape

torch.Size([5])

In [31]:
a.unsqueeze(1).shape

torch.Size([5, 1])

$$
\text{log_timescale_increment} = \frac{log(\frac{\text{max_timescale}}{\text{min_timescale}})}{\text{num_timescales} - 1}
$$
$$
\text{inv_timescales} = \text{min_timescale} * e^{range(\text{num_timescales}) * -\text{log_timescale_increment}}
$$

In [81]:
x = autograd.Variable(torch.randn(20, 512, 14, 14))
min_timescale=1.0
max_timescale=1.0e4
static_shape = x.shape # [20, 512, 14, 14]
num_dims = len(static_shape) - 2 # 2
channels = static_shape[1] # 512
num_timescales = channels // (num_dims * 2) # 128
log_timescale_increment = (
    math.log(float(max_timescale) / float(min_timescale)) /
    (float(num_timescales) - 1)
) # 0.1
inv_timescales = min_timescale * torch.exp(torch.arange(num_timescales).float() * (-log_timescale_increment)) # len == 128
for dim in range(num_dims): # dim == 0; 1
    length = static_shape[dim + 2] # 14
    position = torch.arange(length).float() # len == 14
    scaled_time = inv_timescales.unsqueeze(1) * position.unsqueeze(0) # inv = [128, 1]， pos = [1, 14], scaled_time = [128, 14]
    signal = torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], dim=0) # [256， 14]
    
    prepad = dim * 2 * num_timescales # 0; 256
    postpad = channels - (dim + 1) * 2 * num_timescales # 256; 0
    
    signal = pad(signal, (0, 0, prepad, postpad)) # [512, 14]
    
    signal = signal.unsqueeze(0)
    for _ in range(dim):
        signal = signal.unsqueeze(2) # [512, 14]
    for _ in range(num_dims - 1 - dim):
        signal = signal.unsqueeze(-1)
    x += signal # [1, 512, 14, 1]; [1, 512, 1, 14]

In [83]:
x.shape

torch.Size([20, 512, 14, 14])

In [77]:
for i in range(1):
    print(i)

0


In [None]:
class PositionalEncoder(nn.Module):
    def __init__(self, d_model, max_seq_len = 80):
        super().__init__()
        self.d_model = d_model
        
        # create constant 'pe' matrix with values dependant on 
        # pos and i
        pe = torch.zeros(max_seq_len, d_model)
        for pos in range(max_seq_len):
            for i in range(0, d_model, 2):
                pe[pos, i] = \
                math.sin(pos / (10000 ** ((2 * i)/d_model)))
                pe[pos, i + 1] = \
                math.cos(pos / (10000 ** ((2 * (i + 1))/d_model)))
                
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)
 
    
    def forward(self, x):
        # make embeddings relatively larger
        x = x * math.sqrt(self.d_model)
        #add constant to embedding
        seq_len = x.size(1)
        x = x + Variable(self.pe[:,:seq_len], \
        requires_grad=False).cuda()
        return x