### PyTorch Building Models 
#### Link: https://pytorch.org/tutorials/beginner/introyt/modelsyt_tutorial.html

In [4]:
import torch


#### torch.nn.Module and torch.nn.Parameter

In [5]:
class TinyModel(torch.nn.Module):

    def __init__(self):
        super(TinyModel, self).__init__()

        self.linear1 = torch.nn.Linear(100, 200)
        self.activation = torch.nn.ReLU()
        self.linear2 = torch.nn.Linear(200, 10)
        self.softmax = torch.nn.Softmax()

    def forward(self, x):
        x = self.linear1(x)
        x = self.activation(x)
        x = self.linear2(x)
        x = self.softmax(x)
        return x

In [9]:
tinyModel = TinyModel()

print("The model: ")
print(tinyModel)

print("\n\nJust one layer: ")
print(tinyModel.linear2)

print("\n\nModel params: ")
for param in tinyModel.parameters():
    print(param)

print("\n\nLayer params: ")
for param in tinyModel.linear2.parameters():
    print(param)

The model: 
TinyModel(
  (linear1): Linear(in_features=100, out_features=200, bias=True)
  (activation): ReLU()
  (linear2): Linear(in_features=200, out_features=10, bias=True)
  (softmax): Softmax(dim=None)
)


Just one layer: 
Linear(in_features=200, out_features=10, bias=True)


Model params: 
Parameter containing:
tensor([[ 0.0951, -0.0967, -0.0756,  ..., -0.0835,  0.0216, -0.0002],
        [-0.0279,  0.0085, -0.0953,  ...,  0.0814, -0.0417,  0.0193],
        [-0.0302, -0.0915,  0.0849,  ...,  0.0020,  0.0604,  0.0769],
        ...,
        [ 0.0314,  0.0407,  0.0219,  ..., -0.0105,  0.0622,  0.0702],
        [ 0.0586, -0.0422,  0.0004,  ...,  0.0510,  0.0877,  0.0550],
        [ 0.0719,  0.0182,  0.0557,  ..., -0.0353,  0.0376, -0.0960]],
       requires_grad=True)
Parameter containing:
tensor([ 0.0033, -0.0116, -0.0965, -0.0786, -0.0259, -0.0741, -0.0149, -0.0001,
         0.0488,  0.0552,  0.0431,  0.0161,  0.0094, -0.0809,  0.0344, -0.0473,
         0.0090,  0.0908,  0.0121, -0

### Common layer types
#### Linear layers

In [10]:
lin = torch.nn.Linear(3, 2)
x = torch.rand(1, 3)
print("Input: ")
print(x)

print("\n\nWeight and Bias parameters:")
for param in lin.parameters():
    print(param)

y = lin(x)
print("\n\nOutput:")
print(y)

Input: 
tensor([[0.5004, 0.1327, 0.7218]])


Weight and Bias parameters:
Parameter containing:
tensor([[-0.2634, -0.3039,  0.2384],
        [ 0.1816,  0.0813, -0.4919]], requires_grad=True)
Parameter containing:
tensor([-0.4500,  0.5373], requires_grad=True)


Output:
tensor([[-0.4501,  0.2839]], grad_fn=<AddmmBackward0>)


#### Convolutional Layers

In [14]:
import torch.functional as F

class LeNet(torch.nn.Module):
    
    def __init__(self):
        super(LeNet, self).__init__()
        # 1 input image channel (black and white), 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = torch.nn.Conv2d(1, 6, 5)
        self.conv2 = torch.nn.Conv2D(6, 16, 3)
        # an affine operation: y = Wx + b
        self.fc1 = torch.nn.Linear(16 * 6 * 6, 120)
        self.fc2 = torch.nn.Linear(120, 84)
        self.fc3 = torch.nn.Linear(84, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, (2, 2))
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, (2, 2))
        x = self.num_flat_features(x)
        x = x.view(-1, x)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)

    def num_flat_features(self, x):
        size = x.size()[1:] # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


#### Recurrent Layers

In [15]:
class LSTMTagger(torch.nn.Module):

    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(LSTMTagger, self).__init__()
        self.hidden_dim = hidden_dim

        self.word_embeddings = torch.nn.Embedding(vocab_size, embedding_dim)

        # The LSTM tages word embeddings as input and outputs hidden states with dimensionality 
        # hidden_dim
        self.lstm = torch.nn.LSTM(embedding_dim, hidden_dim)
        # The linear layer that maps from hidden state space to tag space

        self.hidden2tag = torch.nn.Linear(hidden_dim, tagset_size)

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        lstm_out, _ = self.lstm(embeds.view(len(sentence), 1, -1))
        tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = F.log_softmax(tag_space, dim=1)
        return tag_scores

#### Transformers - for another time

### Other Layers and Functions
#### Data Manipulation Layers

In [18]:
my_tensor = torch.rand(1, 6, 6)
print(my_tensor)

maxpool_layer = torch.nn.MaxPool2d(3)
print(maxpool_layer(my_tensor))


tensor([[[0.8991, 0.5049, 0.9618, 0.4432, 0.9218, 0.7740],
         [0.8438, 0.9428, 0.0590, 0.3887, 0.3772, 0.8513],
         [0.8923, 0.2522, 0.3289, 0.3688, 0.6376, 0.7956],
         [0.1940, 0.5930, 0.7226, 0.6153, 0.8193, 0.3464],
         [0.2910, 0.6459, 0.6008, 0.5372, 0.1866, 0.1060],
         [0.4981, 0.6329, 0.7854, 0.3184, 0.0791, 0.1000]]])
tensor([[[0.9618, 0.9218],
         [0.7854, 0.8193]]])


In [19]:
my_tensor = torch.rand(1, 4, 4) * 20 + 5
print(my_tensor)

print(my_tensor.mean())

norm_layer = torch.nn.BatchNorm1d(4)
normed_tensor = norm_layer(my_tensor)
print(normed_tensor)

print(normed_tensor.mean())

tensor([[[22.8302, 11.3415, 14.9114,  5.8982],
         [14.0067, 20.4779,  6.2450, 22.4110],
         [19.0712, 19.6771, 24.6926, 19.7017],
         [14.9916, 22.9226, 23.0302, 12.0728]]])
tensor(17.1426)
tensor([[[ 1.4774, -0.3909,  0.1896, -1.2761],
         [-0.2811,  0.7417, -1.5080,  1.0473],
         [-0.7553, -0.4884,  1.7213, -0.4775],
         [-0.6750,  0.9658,  0.9880, -1.2788]]],
       grad_fn=<NativeBatchNormBackward0>)
tensor(-1.2293e-07, grad_fn=<MeanBackward0>)


In [21]:
my_tensor = torch.rand(1, 4, 4)

dropout = torch.nn.Dropout(p=0.4)
print(dropout(my_tensor))
print(dropout(my_tensor))

tensor([[[0.2278, 0.0000, 0.0103, 1.4919],
         [0.0000, 0.0000, 0.1444, 0.3125],
         [0.0000, 0.0000, 0.9807, 0.3606],
         [0.4373, 0.0628, 1.5629, 1.4654]]])
tensor([[[0.2278, 0.0000, 0.0000, 1.4919],
         [0.9628, 0.0000, 0.1444, 0.3125],
         [0.6341, 1.0745, 0.0000, 0.3606],
         [0.0000, 0.0628, 0.0000, 1.4654]]])
