# Using Deeplay Models

In Deeplay, models are typically complete neural networks that don't need any further customization to be used for a specific complete application or as a substantial part of it (for example, the backbone models that usually need a head to become useful).

In [1]:
import deeplay as dl
import torch

## Selection of Models Available in Deeplay

### ResNet18

The `ResNet18` is available as a backbone.

In [2]:
resnet18 = dl.models.BackboneResnet18(in_channels=3, pool_output=True)

print(resnet18)

BackboneResnet18(
  (blocks): LayerList(
    (0): Conv2dBlock(
      (layer): Layer[Conv2d](in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=3, bias=False)
      (normalization): Layer[BatchNorm2d](num_features=64)
      (activation): Layer[ReLU](inplace=True)
      (pool): Layer[MaxPool2d](kernel_size=3, stride=2, padding=1, ceil_mode=False, dilation=1)
    )
    (1): Conv2dBlock(
      (blocks): Sequential(
        (0-1): 2 x Conv2dBlock(
          (shortcut_start): Conv2dBlock(
            (layer): Layer[Identity](in_channels=64, out_channels=64, kernel_size=1, stride=1, padding=0)
          )
          (blocks): Sequential(
            (0): Conv2dBlock(
              (layer): Layer[Conv2d](in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1)
              (normalization): Layer[BatchNorm2d](num_features=64)
              (activation): Layer[ReLU]()
            )
            (1): Conv2dBlock(
              (layer): Layer[Conv2d](in_channels=64, out_ch

### DCGAN Generator and Discriminator

In [3]:
generator = dl.models.DCGANGenerator(out_channels=3)
discriminator = dl.models.DCGANDiscriminator(in_channels=3)

print(generator)
print(discriminator)

DCGANGenerator(
  (blocks): LayerList(
    (0): Conv2dBlock(
      (layer): Layer[ConvTranspose2d](in_channels=100, out_channels=1024, kernel_size=4, stride=1, padding=0)
      (activation): Layer[ReLU]()
      (normalization): Layer[BatchNorm2d](num_features=1024)
    )
    (1): Conv2dBlock(
      (layer): Layer[ConvTranspose2d](in_channels=1024, out_channels=512, kernel_size=4, stride=2, padding=1)
      (activation): Layer[ReLU]()
      (normalization): Layer[BatchNorm2d](num_features=512)
    )
    (2): Conv2dBlock(
      (layer): Layer[ConvTranspose2d](in_channels=512, out_channels=256, kernel_size=4, stride=2, padding=1)
      (activation): Layer[ReLU]()
      (normalization): Layer[BatchNorm2d](num_features=256)
    )
    (3): Conv2dBlock(
      (layer): Layer[ConvTranspose2d](in_channels=256, out_channels=128, kernel_size=4, stride=2, padding=1)
      (activation): Layer[ReLU]()
      (normalization): Layer[BatchNorm2d](num_features=128)
    )
    (4): Conv2dBlock(
      (layer

### CycleGAN Generator and Discriminator

In [4]:
generator = dl.models.CycleGANResnetGenerator(in_channels=3, out_channels=3)
discriminator = dl.models.CycleGANDiscriminator(in_channels=3)

print(generator)
print(discriminator)

CycleGANResnetGenerator(
  (encoder): ConvolutionalEncoder2d(
    (blocks): LayerList(
      (0): Conv2dBlock(
        (layer): Layer[Conv2d](in_channels=3, out_channels=64, kernel_size=7, stride=1, padding=3, padding_mode=reflect)
        (normalization): Layer[InstanceNorm2d](num_features=64)
        (activation): Layer[ReLU]()
      )
      (1): Conv2dBlock(
        (layer): Layer[Conv2d](in_channels=64, out_channels=128, kernel_size=3, stride=2, padding=1)
        (normalization): Layer[InstanceNorm2d](num_features=128)
        (activation): Layer[ReLU]()
      )
      (2): Conv2dBlock(
        (layer): Layer[Conv2d](in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1)
        (normalization): Layer[InstanceNorm2d](num_features=256)
        (activation): Layer[ReLU]()
      )
    )
    (postprocess): Layer[Identity]()
  )
  (bottleneck): ConvolutionalNeuralNetwork(
    (blocks): LayerList(
      (0-8): 9 x Conv2dBlock(
        (shortcut_start): Conv2dBlock(
      

### Multi-Layer Perceptrons

Multi-layer perceptrons are available in various sizes.
They can be used both as stand-alone applications and as backbones.

In [5]:
small = dl.models.SmallMLP(in_features=10, out_features=1).create()

print(small)

SmallMLP(
  (blocks): LayerList(
    (0): LinearBlock(
      (layer): Linear(in_features=10, out_features=32, bias=True)
      (activation): LeakyReLU(negative_slope=0.05)
      (normalization): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): LinearBlock(
      (layer): Linear(in_features=32, out_features=32, bias=True)
      (activation): LeakyReLU(negative_slope=0.05)
      (normalization): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (2): LinearBlock(
      (layer): Linear(in_features=32, out_features=1, bias=True)
      (activation): Identity()
    )
  )
)


In [6]:
medium = dl.models.MediumMLP(in_features=10, out_features=1).create()

print(medium)

MediumMLP(
  (blocks): LayerList(
    (0): LinearBlock(
      (layer): Linear(in_features=10, out_features=64, bias=True)
      (activation): LeakyReLU(negative_slope=0.05)
      (normalization): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): LinearBlock(
      (layer): Linear(in_features=64, out_features=128, bias=True)
      (activation): LeakyReLU(negative_slope=0.05)
      (normalization): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (2): LinearBlock(
      (layer): Linear(in_features=128, out_features=1, bias=True)
      (activation): Identity()
    )
  )
)


In [7]:
large = dl.models.LargeMLP(in_features=10, out_features=1).create()

print(large)

LargeMLP(
  (blocks): LayerList(
    (0): LinearBlock(
      (layer): Linear(in_features=10, out_features=128, bias=True)
      (activation): LeakyReLU(negative_slope=0.05)
      (normalization): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1-2): 2 x LinearBlock(
      (layer): Linear(in_features=128, out_features=128, bias=True)
      (activation): LeakyReLU(negative_slope=0.05)
      (normalization): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (3): LinearBlock(
      (layer): Linear(in_features=128, out_features=1, bias=True)
      (activation): Identity()
    )
  )
)


In [8]:
xlarge = dl.models.XLargeMLP(in_features=10, out_features=1).create()

print(xlarge)

XLargeMLP(
  (blocks): LayerList(
    (0): LinearBlock(
      (layer): Linear(in_features=10, out_features=128, bias=True)
      (activation): LeakyReLU(negative_slope=0.05)
      (normalization): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): LinearBlock(
      (layer): Linear(in_features=128, out_features=256, bias=True)
      (activation): LeakyReLU(negative_slope=0.05)
      (normalization): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (2): LinearBlock(
      (layer): Linear(in_features=256, out_features=512, bias=True)
      (activation): LeakyReLU(negative_slope=0.05)
      (normalization): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (3): LinearBlock(
      (layer): Linear(in_features=512, out_features=512, bias=True)
      (activation): LeakyReLU(negative_slope=0.05)
      (normalization): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=T

### Recurrent Neural Network

The `RecurrentModel` provides a base for recurrent neural networks. Specific implementations will be provided in the future.

In [9]:
rnn = dl.models.RecurrentModel(
    in_features=10, 
    hidden_features=[256, 256], 
    out_features=1,
    return_cell_state=True,
    rnn_type="LSTM",
    out_activation=torch.nn.Sigmoid,
    bidirectional=False,
    batch_first=True,
    embedding=torch.nn.Embedding(
        num_embeddings=10, 
        embedding_dim=10,
    ),
).create()

print(rnn)

RecurrentModel(
  (embedding): Embedding(10, 10)
  (embedding_dropout): Dropout(p=0, inplace=False)
  (blocks): LayerList(
    (0): Sequence1dBlock(
      (layer): LSTM(10, 256, batch_first=True)
    )
    (1): Sequence1dBlock(
      (layer): LSTM(256, 256, batch_first=True)
    )
  )
  (head): MultiLayerPerceptron(
    (blocks): LayerList(
      (0): LinearBlock(
        (layer): Linear(in_features=256, out_features=1, bias=True)
        (activation): Sigmoid()
      )
    )
  )
)


### Vision Transformer

The `VisionTransformer` provides a base for vision transformers. Specific implementations will be provided in the future.

In [10]:
transformer = dl.models.ViT(
    in_channels=3, 
    image_size=256, 
    patch_size=8, 
    hidden_features=[128, 128], 
    num_heads=8, 
    out_features=1,
).create()

print(transformer)

ViT(
  (patch_embedder): Patchify(
    (layer): Conv2d(3, 128, kernel_size=(8, 8), stride=(8, 8))
    (dropout): Dropout(p=0, inplace=False)
  )
  (positional_embedder): PositionalEmbedding(
    (dropout): Dropout(p=0, inplace=False)
  )
  (transformer_encoder): TransformerEncoderLayer(
    (blocks): LayerList(
      (0-1): 2 x SequentialBlock(
        (multihead): LayerDropoutSkipNormalization(
          (normalization): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
          (layer): MultiheadSelfAttention(
            (projection): Identity()
            (attention): MultiheadAttention(
              (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
            )
          )
          (dropout): Dropout(p=0, inplace=False)
          (skip): Add()
        )
        (feed_forward): LayerDropoutSkipNormalization(
          (normalization): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
          (layer): MultiLayerPerceptron(
     

## Making Your Own Model from Deeplay Components

Typically, you'll make your own model using the `Sequential` object.

For example, you might combine both models and other components ...

In [11]:
model = dl.Sequential(
    dl.models.BackboneResnet18(in_channels=3, pool_output=True),
    dl.models.SmallMLP(in_features=512, out_features=1),
)

print(model)

Sequential(
  (0): BackboneResnet18(
    (blocks): LayerList(
      (0): Conv2dBlock(
        (layer): Layer[Conv2d](in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=3, bias=False)
        (normalization): Layer[BatchNorm2d](num_features=64)
        (activation): Layer[ReLU](inplace=True)
        (pool): Layer[MaxPool2d](kernel_size=3, stride=2, padding=1, ceil_mode=False, dilation=1)
      )
      (1): Conv2dBlock(
        (blocks): Sequential(
          (0-1): 2 x Conv2dBlock(
            (shortcut_start): Conv2dBlock(
              (layer): Layer[Identity](in_channels=64, out_channels=64, kernel_size=1, stride=1, padding=0)
            )
            (blocks): Sequential(
              (0): Conv2dBlock(
                (layer): Layer[Conv2d](in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1)
                (normalization): Layer[BatchNorm2d](num_features=64)
                (activation): Layer[ReLU]()
              )
              (1): Conv2dBlock(


... you can also add PyTorch layers and customize the properties of the Deeplay components ...

In [12]:
model = dl.Sequential(
    dl.ConvolutionalEncoder2d(3, [16, 32, 64], 128),
    torch.nn.AdaptiveAvgPool2d((1, 1)),
    dl.MultiLayerPerceptron(128, [], 1),
)
model[..., "activation"].isinstance(torch.nn.ReLU) \
    .configure(torch.nn.LeakyReLU, negative_slope=0.2)

print(model)

Sequential(
  (0): ConvolutionalEncoder2d(
    (blocks): LayerList(
      (0): Conv2dBlock(
        (layer): Layer[Conv2d](in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1)
        (activation): Layer[LeakyReLU](negative_slope=0.2)
      )
      (1): Conv2dBlock(
        (pool): Layer[MaxPool2d](kernel_size=2, stride=2)
        (layer): Layer[Conv2d](in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1)
        (activation): Layer[LeakyReLU](negative_slope=0.2)
      )
      (2): Conv2dBlock(
        (pool): Layer[MaxPool2d](kernel_size=2, stride=2)
        (layer): Layer[Conv2d](in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        (activation): Layer[LeakyReLU](negative_slope=0.2)
      )
      (3): Conv2dBlock(
        (pool): Layer[MaxPool2d](kernel_size=2, stride=2)
        (layer): Layer[Conv2d](in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        (activation): Layer[Identity]()
      )
    )
    (po

### Making a Model by Subclassing

A model is just a `DeeplayModule` sublass like any other. For some applications, it might be more convenient to subclass the model and implement the `.forward()` method.

In [13]:
class MyModel(dl.DeeplayModule):
    """Class for my model subclassing DeeplayModule."""

    def __init__(self):
        """Initialize my module."""
        self.backbone = dl.ConvolutionalEncoder2d(
            in_channels=3, 
            hidden_channels=[16, 32, 64], 
            out_channels=128,
        )
        self.regression_head = dl.models.SmallMLP(
            in_features=128, 
            out_features=1,
        )
        self.classification_head = dl.models.SmallMLP(
            in_features=128, 
            out_features=10,
        )

    def forward(self, x):
        """Calculate forward pass for my module."""
        x = self.backbone(x)
        reg = self.regression_head(x)
        cls = self.classification_head(x)
        return reg, cls
    
model = MyModel().create()

print(model)

MyModel(
  (backbone): ConvolutionalEncoder2d(
    (blocks): LayerList(
      (0): Conv2dBlock(
        (layer): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (activation): ReLU()
      )
      (1): Conv2dBlock(
        (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (layer): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (activation): ReLU()
      )
      (2): Conv2dBlock(
        (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (layer): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (activation): ReLU()
      )
      (3): Conv2dBlock(
        (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (layer): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (activation): Identity()
      )
    )
    (postprocess): Identity()
  )
  (regression_head): SmallMLP(
    

## Initializing the Weights

Deeplay provides some weight initialization methods, which you can use before the model is built ...

In [14]:
backbone = dl.ConvolutionalEncoder2d(
    in_channels=3, 
    hidden_channels=[16, 32, 64], 
    out_channels=128,
)

initializer = dl.initializers.Normal(mean=1, std=1.0)
backbone.initialize(initializer)  # Before building the model.
backbone.build()

print(f"Mean weights first layer: {backbone.blocks[0].layer.weight.mean()}") 
print(f"STD weights first layer: {backbone.blocks[0].layer.weight.std()}")

Mean weights first layer: 1.0270298719406128
STD weights first layer: 0.9969273209571838


... or after the model is built.

In [15]:
backbone = dl.ConvolutionalEncoder2d(
    in_channels=3, 
    hidden_channels=[16, 32, 64], 
    out_channels=128,
)

initializer = dl.initializers.Normal(mean=1, std=1.0)
backbone.build()
backbone.initialize(initializer)  # After building the model.

print(f"Mean weights first layer: {backbone.blocks[0].layer.weight.mean()}") 
print(f"STD weights first layer: {backbone.blocks[0].layer.weight.std()}")

Mean weights first layer: 0.9408934712409973
STD weights first layer: 1.0349736213684082


### Conditionally Initializing the Weights

You can also conditionally initialize the weights. For example, this can be useful if you want to use different initialization methods for different parts of the model.

There are two ways to do this. The first is to set the `targets` parameter of the initializer, which should be a tuple of module types that should be initialized ...

In [16]:
backbone = dl.ConvolutionalEncoder2d(
    in_channels=3, 
    hidden_channels=[16, 32, 64], 
    out_channels=128,
)
backbone.normalized()  # Add normalization to be initialized differently.

norm_initializer = dl.initializers.Normal(
    mean=0, 
    std=1.0, 
    targets=(torch.nn.Conv2d,),   # Target layers for normal initialization.
)
backbone.initialize(norm_initializer)

const_initializer = dl.initializers.Constant(
    weight=1, 
    bias=0, 
    targets=(torch.nn.BatchNorm2d,),  # Target layers for constant initialization.
)
backbone.initialize(const_initializer)

backbone.build()

print("Conv2d \n"
      f"Mean {backbone.blocks[0].layer.weight.mean()} \n" 
      f"STD {backbone.blocks[0].layer.weight.std()} \n")
print("BatchNorm2d \n" 
      f"Mean {backbone.blocks[0].normalization.weight.mean()} \n" 
      f"STD {backbone.blocks[0].normalization.weight.std()} \n")

Conv2d 
Mean 0.014395988546311855 
STD 1.0669336318969727 

BatchNorm2d 
Mean 1.0 
STD 0.0 



... the second way is to use the selector syntax.

In [17]:
backbone = dl.ConvolutionalEncoder2d(
    in_channels=3, 
    hidden_channels=[16, 32, 64], 
    out_channels=128,
)
backbone.normalized()

norm_initializer = dl.initializers.Normal(mean=0, std=1)
const_initializer = dl.initializers.Constant(weight=1, bias=0)

backbone[..., "layer"].all.initialize(norm_initializer)
backbone[..., "normalization"].all.initialize(const_initializer)

backbone.build()

print("Conv2d \n"
      f"Mean {backbone.blocks[0].layer.weight.mean()} \n" 
      f"STD {backbone.blocks[0].layer.weight.std()} \n")
print("BatchNorm2d \n" 
      f"Mean {backbone.blocks[0].normalization.weight.mean()} \n" 
      f"STD {backbone.blocks[0].normalization.weight.std()} \n")

Conv2d 
Mean 0.030025742948055267 
STD 0.9641354084014893 

BatchNorm2d 
Mean 1.0 
STD 0.0 



**NOTE:** You can specify the tensors to be initialized during weight initialization. This is done with the `tensors` parameter. For example:

```python
initializer = Normal()
model.initialize(initializer, tensors=("weight", "bias"))  # Bias and weights (default).
model.initialize(initializer, tensors="weight")  # Only weight.
model.initialize(initializer, tensors="bias")  # Only bias.

const = Constant()
model.initialize(const, tensors="bias")
```