## Configure environment

In [3]:
# Basic
import sys
import numpy as np
from typing import List
from collections import namedtuple

# Torch utils
import torch
import torch.nn as nn
from torch.autograd import Variable

**Remember conversions**
* 8 Bits = 1 Byte
* 8192 Bits = 1024 Bytes = 1 Kb.
* 8388608 Bits = 1048576 Bytes = 1 Mb.

In [40]:
# Object to store info about parameters in network
Parameter = namedtuple('Parameter', ['size', 'bits'],
                       defaults=[np.asarray((0, 0)), 32])

In [20]:
input_tensor = torch.FloatTensor(64, 3, 128, 128)
input_tensor.requires_grad = True

print(input_tensor.size())
print(input_tensor.dtype)
print(np.prod(input_tensor.size()) * 32)

torch.Size([64, 3, 128, 128])
torch.float32
100663296


In [21]:
input_tensor.grad  # == None

In [23]:
input_tensor = Variable(input_tensor, requires_grad=True) 
out = input_tensor + input_tensor
out.backward(input_tensor)
print(input_tensor.grad)

tensor([[[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]],

         [[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]],

         [[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]]],


        [[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
        

In [24]:
# Grad
print(input_tensor.grad.size())
print(input_tensor.grad.dtype)
print(np.prod(input_tensor.grad.size()) * 32)

torch.Size([64, 3, 128, 128])
torch.float32
100663296


In [51]:
class SizeEstimator:

    def __init__(self, model: nn.Module, input_size: List[int],
                 input_n_bits: int = 32):
        """
        Estimates the size of PyTorch models in memory
        for a given input size and data precision, measured in bits.
        So default input type of torch.float32 equals to 32 bits precision.
        """
        self._model = model
        self._input_size = input_size
        self._input_n_bits = input_n_bits

        # Calculate
        self._parameters_sizes = self._get_parameter_sizes()
        self._output_sizes = self._get_output_sizes()
        self._parameters_bits = self._calculate_parameters_weight()
        self._forward_backward_bits = self._calculate_forward_backward_weight()
        self._input_weight = self._calculate_input_weight()

    def _get_parameter_sizes(self) -> List[Parameter]:
        """
        Get sizes of all parameters in `model`
        """
        sizes = []
        modules = list(self._model.modules())[1:]
        for i, module in enumerate(modules):
            if isinstance(module, nn.ModuleList):
                # To not to estimate inner sub-modules twice!
                continue
            else:
                sizes.extend([Parameter(size=np.asarray(param.size(), dtype=np.float64),
                                        bits=self.__get_parameter_bits(param))
                              for param in module.parameters()])
        return sizes

    def _get_output_sizes(self) -> List[Parameter]:
        """
        Run sample input through each layer to get output sizes
        """
        input_ = torch.Tensor(torch.FloatTensor(*self._input_size))  #, volatile=True
        modules = list(self._model.modules())[1:]
        out_sizes = []
        for i, module in enumerate(modules):
            out = module(input_)
            out_sizes.append(Parameter(size=np.asarray(out.size(), dtype=np.float64),
                                       bits=self.__get_parameter_bits(out)))
            input_ = out
        return out_sizes

    def _calculate_parameters_weight(self) -> float:
        """
        Calculate total number of bits to store `model` parameters
        """
        total_bits = 0
        for param in self._parameters_sizes:
            total_bits += np.prod(param.size) * param.bits
        return total_bits

    @staticmethod
    def __get_parameter_bits(param: torch.Tensor) -> int:
        """
        Calculate total number of bits to store `model` parameters
        """
        # Choose dtype
        if param.dtype == torch.float16:
            return 16
        elif param.dtype == torch.bfloat16:
            return 16
        elif param.dtype == torch.float32:
            return 32
        elif param.dtype == torch.float64:
            return 64
        else:
            print(f"Current version estimated only sizes of floating points parameters!")
            return 32

    def _calculate_forward_backward_weight(self) -> float:
        """
        Calculate bits to store forward and backward pass
        """
        total_bits = 0
        for out in self._output_sizes:
            # forward pass
            f_bits = np.prod(out.size) * out.bits
            total_bits += f_bits

        # Multiply by 2 for both forward and backward
        return total_bits * 2

    def _calculate_input_weight(self) -> float:
        """
        Calculate bits to store single input sequence.
        """
        return np.prod(np.array(self._input_size, dtype=np.float64)) * self._input_n_bits

    def estimate_total_size(self) -> float:
        """
        Estimate model size in memory in megabytes and bits.
        """
        total = self._input_weight + self._parameters_bits + self._forward_backward_bits
        total_bytes = (total / 8)
        total_megabytes = total_bytes / (1024**2)
        print(f"Model size is: {total} bits, {total_bytes} bytes, {total_megabytes} Mb.")
        return total_megabytes

## Example #1: Simple convolutional model

In [37]:
class SimpleModel(nn.Module):
    
    def __init__(self):
        super(SimpleModel, self).__init__()
        
        self.conv0 = nn.Conv2d(1, 16, kernel_size=3, padding=5)
        self.conv1 = nn.Conv2d(16, 32, kernel_size=3)

    def forward(self, x):
        h = self.conv0(x)
        h = self.conv1(h)
        return h

In [52]:
simple_net = SimpleModel()
sample_input = torch.FloatTensor(64, 1, 128, 128)
print(simple_net)

SimpleModel(
  (conv0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(5, 5))
  (conv1): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
)


In [53]:
estimator = SizeEstimator(model=simple_net, 
                          input_size=sample_input.size(),
                          input_n_bits=32)  # as input type is float32
estimator.estimate_total_size()

Model size is: 3599390720.0 bits, 449923840.0 bytes, 429.080810546875 Mb.


429.080810546875

## Example #2: Model with nested nn.ModuleList

In [54]:
class NestedModel(nn.Module):
    
    def __init__(self, inputSize, numLayers, nodesPerLayer):
        super(NestedModel, self).__init__()
        self.activation = nn.Sigmoid()
        self.hidden = nn.ModuleList()
        self.hidden.append(nn.Linear(inputSize, nodesPerLayer))
        for i in range(numLayers-1):
            self.hidden.append(nn.Linear(nodesPerLayer, nodesPerLayer))
        self.finalFC = nn.Linear(nodesPerLayer, 1)

    def forward(self, x):
        for layer in self.hidden:
            x = self.activation(layer(x))
        x = self.finalFC(x)
        return x

In [60]:
nested_model = NestedModel(inputSize=200, numLayers=8, nodesPerLayer=128)
sample_input = torch.FloatTensor(64, 100, 200)
print(nested_model)

NestedModel(
  (activation): Sigmoid()
  (hidden): ModuleList(
    (0): Linear(in_features=200, out_features=128, bias=True)
    (1): Linear(in_features=128, out_features=128, bias=True)
    (2): Linear(in_features=128, out_features=128, bias=True)
    (3): Linear(in_features=128, out_features=128, bias=True)
    (4): Linear(in_features=128, out_features=128, bias=True)
    (5): Linear(in_features=128, out_features=128, bias=True)
    (6): Linear(in_features=128, out_features=128, bias=True)
    (7): Linear(in_features=128, out_features=128, bias=True)
  )
  (finalFC): Linear(in_features=128, out_features=1, bias=True)
)


In [62]:
estimator = SizeEstimator(model=nested_model, 
                          input_size=sample_input.size(),
                          input_n_bits=32)  # as input type is float32
estimator.estimate_total_size()

NotImplementedError: 