In [1]:
from batchflow import Pipeline, D, B, V, C, R, P
from batchflow.opensets import Imagenette160
from batchflow.models.torch import UNet
from batchflow import GPUMemoryMonitor
from fastai.vision.all import URLs
from batchflow.models.torch import EncoderDecoder
import torch
from train_module import training_functions
import numpy as np

In [2]:
init_batch_size = 16
epochs_num = 25

dataset = Imagenette160(bar=True)

 50%|█████     | 1/2 [00:05<00:05,  5.57s/it]


In [3]:
device_id = 4

model_config = dict(model = UNet)
model_config['device'] = f'cuda:{device_id}'
model_config['loss'] = 'mse'

In [4]:
train_pipeline = (dataset.train.p
                .crop(shape=(160, 160), origin='center')
                .init_variable('loss_history', [])
                .to_array(channels='first', dtype=np.float32)
                .multiply(1./255)
                .init_model('dynamic', UNet, 'unet',
                            config=model_config)
                .train_model('unet', B.images, B.images, 
                             fetches='loss', save_to=V('loss_history', mode='a'), use_lock=True)
)

In [5]:
with GPUMemoryMonitor(gpu_list=[device_id]) as monitor:
    torch.cuda.empty_cache()
    train_pipeline.run(init_batch_size, n_iters=epochs_num, bar='n')
first_run_memory = np.max(monitor.data)
with GPUMemoryMonitor(gpu_list=[device_id]) as monitor:
    torch.cuda.empty_cache()
    train_pipeline.run(2*init_batch_size, n_iters=epochs_num, bar='n')
second_run_memory = np.max(monitor.data)

  0%|                                                                                                         …

  "In future, upsample filters can be made to match decoder block's filters by default.")


  0%|                                                                                                         …

What happend:

***run_memory = model_size + item_size * batch_size***

We set: ***init_batch_size = 16***
 

So, we have two equations:

***first_run_memory = model_size + init_batch_size * item_size***

***second_run_memory = model_size + 2 * init_batch_size * item_size***

We can get:

***item_size * init_batch_size = second_run_memory - first_run_memory***

***model_size = first_run_memory - item_size * init_batch_size = 2 * first_run_memory - second_run_memory***

We want to know max_batch_size if we have total_memory amount of GPU memory.

***max_batch_size = (total_memory - model_size)/item_size***

It is equal to:

***max_batch_size = (total_memory - model_size)/((second_run_memory - first_run_memory)/init_batch_size)*** 

where init_batch_size=16

or:

***max_batch_size = init_batch_size * (total_memory - model_size)/(second_run_memory - first_run_memory)*** 


Memory is measured as a percentage, so ***total_memory = 100*** %.

In [9]:
max_batch_size = init_batch_size * (100 - 2 * first_run_memory + second_run_memory)/(second_run_memory - first_run_memory)

In [10]:
max_batch_size

32.1768149882904