# Tensors

Tensors are data structures used in PyTorch. Tensors are used as the input and ouput for models, model parameters or loss functions.Tensors are created nearly in the same way as numpy arrays. The advantage of tensors is that they can run on a GPU.

In [60]:
import torch
import numpy as np

In [66]:
#create tensor from data
A = [[1, 2], [3, 4]]
A_tensor = torch.tensor(A)
print(A_tensor)

#create tensor fom numpy array
B = np.array(A)
B_tensor = torch.from_numpy(B)
print(B_tensor)

#create tensor from another tensor
C_tensor = torch.ones_like(A_tensor) # retains the properties of A_tensor
print(f"Ones Tensor: \n {C_tensor} \n")

x_rand = torch.rand_like(A_tensor, dtype=torch.float) # overrides the datatype of A_tensor
print(f"Random Tensor: \n {x_rand} \n")

#numpy array to tensor
n = np.ones(5)
t = torch.from_numpy(n)
print(t)

tensor([[1, 2],
        [3, 4]])
tensor([[1, 2],
        [3, 4]])
Ones Tensor: 
 tensor([[1, 1],
        [1, 1]]) 

Random Tensor: 
 tensor([[0.5548, 0.2044],
        [0.4441, 0.0072]]) 

tensor([1., 1., 1., 1., 1.], dtype=torch.float64)


## Tensor attributes

In [65]:
tensor = torch.rand(3,4)

print(f"Shape of tensor: {tensor.shape}")
print(f"Datatype of tensor: {tensor.dtype}")
print(f"Device tensor is stored on: {tensor.device}")

Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu


## Tensor operations

In [68]:
# Compute the absolute value of a tensor
tens_abs = torch.abs(torch.tensor([-10, -22, 3]))
print(tens_abs)

# Add scalar value to tensor
a = torch.randn(3)
a = torch.add(a, 20)
print(a)

# Subtract scalar value to tensor
a = torch.randn(3)
a = torch.sub(a, 20)
print(a)

# Divide tensor by scalar value
a = torch.randn(5)
torch.div(a, 0.2)
print(a)

# Divide tensor with scalar value
a = torch.randn(3)
a = torch.mul(a, 5)
print(a)

# Power of tensor values
a = torch.arange(1., 6.)
a = torch.pow(a, 2)
print(a)

# Square of tensor values
a = torch.randn(4)
a = torch.square(a)
print(a)

tensor([10, 22,  3])
tensor([19.7161, 20.8993, 22.2515])
tensor([-19.4759, -19.4562, -19.9132])
tensor([-0.7880,  0.6196,  0.1363, -1.1374,  0.1128])
tensor([-1.5570,  1.4378, -5.2548])
tensor([ 1.,  4.,  9., 16., 25.])
tensor([0.0746, 1.0001, 7.9826, 5.4257])


# Dataset and DataLoaders

In [None]:
import torch

class Dataset(torch.utils.data.Dataset):
  'Characterizes a dataset for PyTorch'
  def __init__(self, list_IDs, labels, transforms = None):
        'Initialization'
        self.labels = labels
        self.list_IDs = list_IDs
        self.transforms = transforms

  def __len__(self):
        'Denotes the total number of samples'
        return len(self.list_IDs)

  def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        ID = self.list_IDs[index]
        
        # Load data and get label
        X = torch.load('data/' + ID + '.pt')
        y = self.labels[ID]
        
        if self.transforms is not None:
            self.transforms(X,y)

        return X, y

In [None]:
import torch
from my_classes import Dataset
from torch import transforms

# CUDA for PyTorch
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
torch.backends.cudnn.benchmark = True

# Parameters
params = {'batch_size': 64,
          'shuffle': True,
          'num_workers': 6}
max_epochs = 100

# Datasets
partition = # IDs
labels = # Labels

# Create training transforms
transforms_training = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5), std=(0.5)),
])

# Generators
training_set = Dataset(partition['train'], labels, transforms = transforms_training)
# create a training dataloader
training_generator = torch.utils.data.DataLoader(training_set, **params)

# Create validation transforms
transforms_validation = transforms.Compose([
    transforms.ToTensor()
])

validation_set = Dataset(partition['validation'], labels, transforms = transforms_validation)
#create a validation dataloader
validation_generator = torch.utils.data.DataLoader(validation_set, **params)

# Loop over epochs
for epoch in range(max_epochs):
    # Training
    for local_batch, local_labels in training_generator:
        # Transfer to GPU
        local_batch, local_labels = local_batch.to(device), local_labels.to(device)

        # Model computations (see next section)
        [...]

    # Validation
    with torch.set_grad_enabled(False):
        for local_batch, local_labels in validation_generator:
            # Transfer to GPU
            local_batch, local_labels = local_batch.to(device), local_labels.to(device)

            # Model computations
            [...]


# PyTorch training loop

A training loop in PyTorch consists of two steps:

- training
- validation / test

The basic idea of the loop is to make predictions with the model, compute the loss by comparing model predictions with the ground truth (GT) and then update the model weights (training only!).

In [54]:
def run_iteration(training = True):
    # Compute prediction and loss
    pred = model(X)
    loss = loss_fn(pred, y)
    
    if training:
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    return loss

In [56]:
#define the loss function
loss_fn = nn.CrossEntropyLoss()

#define the optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

epochs = 10
for t in range(epochs):
    
    #set model to train (activates dropout, batchnorm...)
    model.train()
    
    training_losses_per_epoch = []
    
    for batch, (X, y) in enumerate(dataloader_train):
        
        #reset gradients
        #for every mini-batch during the training phase, we typically want to explicitly set the gradients to zero 
        #before starting to do backpropragation (i.e., updating the Weights and biases) 
        #because PyTorch accumulates the gradients on subsequent backward passes. 
        
        optimizer.zero_grad()
        
        loss = run_interation(training = True)
        training_losses_per_epoch.append(loss)
    
    # Tensorboard
    print(np.mean(training_losses_per_epoch))
    
    model.eval()
    
    for batch, (X, y) in enumerate(dataloader_validation):
        
        run_iteration(training = False)  
     
    
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
print("Done!")

NameError: name 'dataloader_train' is not defined

# PyTorch Model building blocks

PyTorch has several different blocks that can be used to build models. All modules are imported from the 'torch.nn' module:

- Modules
- Sequential
- Module list and
- Module dictionaries

## Modules

Modules are the base class of all models you build and must be inherit by your model class.

In [2]:
import torch.nn as nn
import torch.nn.functional as F

In [3]:
class MyModel(nn.Module):
    
    def __init__(self, input_channels, classes):
        
        super().__init__()
        
        self.conv1 = nn.Conv2d(input_channels, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)

        self.fc1 = nn.Linear(32 * 28 * 28, 1024)
        self.fc2 = nn.Linear(1024, classes)
        
    
    def forward(self, x):
        
        x = self.conv1(x)
        x = self.bn1(x)
        
        #Activation function is placed in the forward method
        x = F.relu(x)
        
        x = self.conv2(x)
        x = self.bn2(x)
        x = F.relu(x)
        
        #flatten
        x = x.view(x.size(0), -1)
        
        x = self.fc1(x)
        x = F.sigmoid(x)
        x = self.fc2(x)
        
        return x

To build our model

In [4]:
model = MyModel(1, 10)
print(model)

MyModel(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=25088, out_features=1024, bias=True)
  (fc2): Linear(in_features=1024, out_features=10, bias=True)
)


## Sequential

Instead of having layers individually, we can also combine and stack them into smaller blocks. Layers in a sequential block are, as the name implies, excuted in the same order in which they are stacked. 
Stacking modules is useful for building block containing several layers that we want to reu-use.

In [5]:
class MyModel(nn.Module):
    def __init__(self, input_channels, classes):
        
        super().__init__()
        
        #Combine conv, batchnorm and relu activation into a block
        self.conv_block1 = nn.Sequential(
            nn.Conv2d(input_channels, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU()
        )
        
        self.conv_block2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU()
        )
        
        self.decoder = nn.Sequential(
            nn.Linear(32 * 28 * 28, 1024),
            nn.Sigmoid(),
            nn.Linear(1024, classes)
        )

        
    def forward(self, x):
        
        #Now we just call the sequential block
        x = self.conv_block1(x)
        
        x = self.conv_block2(x)

        x = x.view(x.size(0), -1) # flat
        
        x = self.decoder(x)
        
        return x

In [6]:
model = MyModel(1, 10)
print(model)

MyModel(
  (conv_block1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (conv_block2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (decoder): Sequential(
    (0): Linear(in_features=25088, out_features=1024, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=1024, out_features=10, bias=True)
  )
)


***Note!*** 

In the sequential example above, we placed the ReLU function in the __ init __ method. In the first example, activation functions were placed in the forward method.

Both ways are correct, but there is a difference:
nn.Relu() is a class, which needs to be initialized, therefore it is written in the __ init __ method.
Alternatively we can use the functional API and directly put F.relu() in the forward method.

We can further simplify the code by generating a single method for a convolutional block and merge the two convolutional blocks into a sequential layer.
With that, we create an encoder - decoder model-like architecture.

In [17]:
def conv_block(features_in, features_out, *args, **kwargs):
    return nn.Sequential(
        nn.Conv2d(features_in, features_out, *args, **kwargs),
        nn.BatchNorm2d(features_out),
        nn.ReLU()
    )

In [18]:
class MyModel(nn.Module):
    def __init__(self, input_channels, classes):
        
        super().__init__()
        
        self.encoder = nn.Sequential(
                        conv_block(input_channels, 32, kernel_size = 3, padding = 1),
                        conv_block(32, 64, kernel_size = 3, padding = 1))
                                     
        self.decoder = nn.Sequential(
            nn.Linear(32 * 28 * 28, 1024),
            nn.Sigmoid(),
            nn.Linear(1024, classes)
        )

        
    def forward(self, x):
        
        #Now we just call the sequential block
        x = self.encoder(x)
        
        x = x.view(x.size(0), -1) # flat
        
        x = self.decoder(x)
        
        return x

In [19]:
model = MyModel(1, 10)
print(model)

MyModel(
  (encoder): Sequential(
    (0): Sequential(
      (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (1): Sequential(
      (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
  )
  (decoder): Sequential(
    (0): Linear(in_features=25088, out_features=1024, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=1024, out_features=10, bias=True)
  )
)


## Dynamic Sequential modules

In the previous example we added to convolutional layers to our sequential block. But what if we would like to add more layers? 
Most deep learning models are huge, with tons of layers. Manually writing the code for each layer can be a lot of work and makes your code hard to read.
A better way is to dynamically add layers into a sequential block.

In [28]:
def dec_block(in_f, out_f):
    return nn.Sequential(
        nn.Linear(in_f, out_f),
        nn.Sigmoid()
    )

In [69]:
class MyModel(nn.Module):
    def __init__(self, input_channels, classes, encoder_features, decoder_features):
        
        super().__init__()
        
        self.encoder_features = [input_channels, *encoder_features]
        
        self.decoder_features = [32 * 28 * 28, *decoder_features]
        
        conv_blocks = [conv_block(in_f, out_f, kernel_size=3, padding=1) 
                       for in_f, out_f in zip(self.encoder_features, self.encoder_features[1:])]
        
        #Sequential cannot take a list as the input. We need to decompose it.
        self.encoder = nn.Sequential(*conv_blocks)

        
        dec_blocks = [dec_block(in_f, out_f) 
                       for in_f, out_f in zip(self.decoder_features, self.decoder_features[1:])]
        
        self.decoder = nn.Sequential(*dec_blocks)
        
        self.last = nn.Linear(self.decoder_features[-1], classes)

        
    def forward(self, x):
        x = self.encoder(x)
        
        x = x.view(x.size(0), -1) # flat
        
        x = self.decoder(x)
        
        return x

In [70]:
model = MyModel(1, 10, [32,64], [1024, 512])
print(model)

TypeError: __init__() got an unexpected keyword argument 'kernel_size'

We could also split our encoder and decoder part

In [44]:
class Encoder(nn.Module):
    
    def __init__(self, encoder_features):
        
        super().__init__()
        self.conv_blocks = nn.Sequential(*[conv_block(in_f, out_f, kernel_size=3, padding=1) 
                       for in_f, out_f in zip(encoder_features, encoder_features[1:])])

        def forward(self, x):
            
            return self.conv_blocks(x)
        
class Decoder(nn.Module):
    def __init__(self, decoder_features, classes):
        
        super().__init__()
        self.dec_blocks = nn.Sequential(*[dec_block(in_f, out_f) 
                       for in_f, out_f in zip(decoder_features, decoder_features[1:])])
        self.last = nn.Linear(decoder_features[-1], classes)

    def forward(self, x):
        
        return self.dec_blocks()
    
    
class MyModel(nn.Module):
    
    def __init__(self, input_channels, classes, encoder_features, decoder_features):
        
        super().__init__()
        self.encoder_features = [input_channels, *encoder_features]
        self.decoder_features = [32 * 28 * 28, *decoder_features]

        self.encoder = Encoder(self.encoder_features)
        
        self.decoder = Decoder(decoder_features, classes)
        
    def forward(self, x):
        
        x = self.encoder(x)
        
        x = x.flatten(1) # flat
        
        x = self.decoder(x)
        
        return x

In [45]:
model = MyModel(1, 10, [32,64], [1024, 512])
print(model)

MyModel(
  (encoder): Encoder(
    (conv_blocks): Sequential(
      (0): Sequential(
        (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
      )
      (1): Sequential(
        (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
      )
    )
  )
  (decoder): Decoder(
    (dec_blocks): Sequential(
      (0): Sequential(
        (0): Linear(in_features=1024, out_features=512, bias=True)
        (1): Sigmoid()
      )
    )
    (last): Linear(in_features=512, out_features=10, bias=True)
  )
)


## ModuleList

We can store layers (modules) in a list, using the ModuleList building block. 
This can be useful if we want to store or use information while iterating over layers.
**The main difference between Sequential layers and ModuleLists is that a ModuleList does not have a forward method! The layers inside a ModuleList are not connected**

In [7]:
class ModuleListExample(nn.Module):
    
    def __init__(self, sizes):
        
        super().__init__()        
        self.layers = nn.ModuleList([nn.Linear(in_f, out_f) for in_f, out_f in zip(sizes, sizes[1:])])        
        self.trace = []
        
    def forward(self,x):
        
        for layer in self.layers:
            x = layer(x)
            self.trace.append(x)
        
        return x

## ModuleDict

Sometimes we want a flexible model, which architetecture can be varied, depending on a users input. That is where Module dictionaries can be useful. We can store layers, activation function or other variables that can be changed depensing on the input.

In [8]:
def conv_block(in_f, out_f, activation='relu', *args, **kwargs):
    
    activations = nn.ModuleDict([
                ['lrelu', nn.LeakyReLU()],
                ['relu', nn.ReLU()]
    ])
    
    return nn.Sequential(
        nn.Conv2d(in_f, out_f, *args, **kwargs),
        nn.BatchNorm2d(out_f),
        activations[activation]
    )

In [None]:
def conv_block(in_f, out_f, activation=nn.ReLU(), *args, **kwargs):
    
    
    return nn.Sequential(
        nn.Conv2d(in_f, out_f, *args, **kwargs),
        nn.BatchNorm2d(out_f),
        activations
    )

In the above example we created a convolutional block. Inside the block we have a ModuleDict with two different activation functions (LeakyReLU, ReLU). By providing a key-string as the input to the conv-block method, we can decide which activation we want to use in this block.

# U-Net

In [46]:
class conv_block(nn.Module):
    def __init__(self, in_c, out_c):
        super().__init__()

        self.conv1 = nn.Conv2d(in_c, out_c, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(out_c)

        self.conv2 = nn.Conv2d(out_c, out_c, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(out_c)

        self.relu = nn.ReLU()

    def forward(self, inputs):
        x = self.conv1(inputs)
        x = self.bn1(x)
        x = self.relu(x)

        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)

        return x

In [47]:
class encoder_block(nn.Module):
    def __init__(self, in_c, out_c):
        super().__init__()

        self.conv = conv_block(in_c, out_c)
        self.pool = nn.MaxPool2d((2, 2))

    def forward(self, inputs):
        x = self.conv(inputs)
        p = self.pool(x)

        return x, p


In [48]:
class decoder_block(nn.Module):
    def __init__(self, in_c, out_c):
        super().__init__()

        self.up = nn.ConvTranspose2d(in_c, out_c, kernel_size=2, stride=2, padding=0)
        self.conv = conv_block(out_c+out_c, out_c)

    def forward(self, inputs, skip):
        x = self.up(inputs)
        x = torch.cat([x, skip], axis=1)
        x = self.conv(x)

        return x

In [52]:
class UNet(nn.Module):
    def __init__(self):
        super().__init__()

        """ Encoder """
        self.e1 = encoder_block(1, 32)
        self.e2 = encoder_block(32, 64)
        self.e3 = encoder_block(64, 128)
        self.e4 = encoder_block(128, 256)

        """ Bottleneck """
        self.b = conv_block(256, 512)

        """ Decoder """
        self.d1 = decoder_block(512, 256)
        self.d2 = decoder_block(256, 128)
        self.d3 = decoder_block(128, 64)
        self.d4 = decoder_block(64, 32)

        """ Classifier """
        self.outputs = nn.Conv2d(32, 1, kernel_size=1, padding=0)

    def forward(self, inputs):
        """ Encoder """
        s1, p1 = self.e1(inputs)
        s2, p2 = self.e2(p1)
        s3, p3 = self.e3(p2)
        s4, p4 = self.e4(p3)

        """ Bottleneck """
        b = self.b(p4)

        """ Decoder """
        d1 = self.d1(b, s4)
        d2 = self.d2(d1, s3)
        d3 = self.d3(d2, s2)
        d4 = self.d4(d3, s1)

        """ Classifier """
        outputs = self.outputs(d4)

        return outputs

In [53]:
model = UNet()
print(model)

UNet(
  (e1): encoder_block(
    (conv): conv_block(
      (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU()
    )
    (pool): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  )
  (e2): encoder_block(
    (conv): conv_block(
      (conv1): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU()
    )
    (pool): MaxPool2d(kernel_size=(2, 2), stride=(