# Memory Consuming. MNIST

In [1]:
import sys
sys.path.insert(1, '../../_tools/')
import torch as th
import torch.nn as nn
import numpy as np
import torchvision
import torchvision.transforms as trasf
from utils.tensordata import TDataset, TDataloader, PosNeg_Bootstrap_TDataloader
from _mlp import MLP
import matplotlib.pyplot as plt

seed = 42
th.manual_seed(seed)
np.random.seed(seed)
device = th.device('cuda' if th.cuda.is_available() else 'cpu')
th.cuda.synchronize(device)

### Data

In [2]:
DS_torch_dataset = torchvision.datasets.MNIST(
    root=r"C:\Users\matte\LocalData\Master Thesis",
    train=True,
    download=True,
    transform=trasf.ToTensor()
)
TS_torch_dataset = torchvision.datasets.MNIST(
    root=r"C:\Users\matte\LocalData\Master Thesis",
    train=False,
    download=True,
    transform=trasf.ToTensor()
)

# Load everything
DS = TDataset(
    x=th.stack([x.flatten() for x, y in DS_torch_dataset]).to(device),
    y=th.tensor([y for x, y in DS_torch_dataset], device=device).reshape(-1, 1)
)
TS = TDataset(
    x=th.stack([x.flatten() for x, y in TS_torch_dataset]).to(device),
    y=th.tensor([y for x, y in TS_torch_dataset], device=device).reshape(-1, 1)
)
TR, VL = DS.random_split(5/6)

# Dataloaders
batch_size = 128
DS_dl = DS.dataloader(batch_size=batch_size, method='shuffle')

### Model

In [3]:
Model = MLP

***

Warm up model. As first model to test, a trash one will be used, to be sure that every pytorch 'just-first-time' memory allocations have been allocated when interesting models are tested.

In [4]:
hyp_warmup = {
    'task': 'classification',
    'archit': (784, 10),
    'f_hid': nn.ReLU(),
    'weight_decay': 1e-3,
    'lr': 0.1,
    'momentum': 0.99,
    'n_epochs': 2
}

exps = {}
exps['warmup'] = hyp_warmup

MLP models (hyp are not tuned... we're only interested in the memory)

In [5]:
hyp_20 = {
    'task': 'classification',
    'archit': (784, 20, 20, 20, 20, 20, 20, 10),
    'f_hid': nn.ReLU(),
    'weight_decay': 1e-3,
    'lr': 0.1,
    'momentum': 0.99,
    'n_epochs': 2
}
hyp_200 = {
    'task': 'classification',
    'archit': (784, 200, 200, 200, 200, 200, 200, 10),
    'f_hid': nn.ReLU(),
    'weight_decay': 1e-3,
    'lr': 0.1,
    'momentum': 0.99,
    'n_epochs': 2
}
hyp_2000 = {
    'task': 'classification',
    'archit': (784, 2000, 2000, 2000, 2000, 2000, 2000, 10),
    'f_hid': nn.ReLU(),
    'weight_decay': 1e-3,
    'lr': 0.1,
    'momentum': 0.99,
    'n_epochs': 2
}

exps |= {
    'mlp20': hyp_20,
    'mlp200': hyp_200,
    'mlp2000': hyp_2000
}

***

In [6]:
class MemoryAnalyzer:
    
    def __init__ (self, device):
        
        self.reset()

    def reset (self):

        th.cuda.reset_peak_memory_stats(device)
        th.cuda.synchronize(device)
        self._base_value = th.cuda.max_memory_allocated(device) 
    
    def get_max_gap (self):
        
        return (th.cuda.max_memory_allocated(device) - self._base_value)/1024/1024 #MByte
  
ma = MemoryAnalyzer(device)
for exp, hyp in exps.items():
    print(exp, end=': ')
    ma.reset()
    print(f"(baseline: {ma._base_value/1024/1024:.6g} MB)", end=' ')
    m = Model(hyp).to(device)
    m.fit(DS_dl)
    memory = ma.get_max_gap()
    print(f"{memory:.6g} MB")
    del m, memory

warmup: (baseline: 210.993 MB) 1.31885 MB
mlp20: (baseline: 211.451 MB) 0.991211 MB
mlp200: (baseline: 211.451 MB) 5.19775 MB
mlp2000: (baseline: 211.451 MB) 274.701 MB
