# Memory Consuming. MNIST

In [1]:
import sys
sys.path.insert(1, '../../_tools/')
import torch as th
import torch.nn as nn
import numpy as np
import torchvision
import torchvision.transforms as trasf
from utils.tensordata import TDataset, TDataloader, PosNeg_Bootstrap_TDataloader
from ffa import FFA_withEntropy
import matplotlib.pyplot as plt

seed = 42
th.manual_seed(seed)
np.random.seed(seed)
device = th.device('cuda' if th.cuda.is_available() else 'cpu')
th.cuda.synchronize(device)

### Data

In [2]:
DS_torch_dataset = torchvision.datasets.MNIST(
    root=r"C:\Users\matte\LocalData\Master Thesis",
    train=True,
    download=True,
    transform=trasf.ToTensor()
)
TS_torch_dataset = torchvision.datasets.MNIST(
    root=r"C:\Users\matte\LocalData\Master Thesis",
    train=False,
    download=True,
    transform=trasf.ToTensor()
)

# Load everything
DS = TDataset(
    x=th.stack([x.flatten() for x, y in DS_torch_dataset]).to(device),
    y=th.tensor([y for x, y in DS_torch_dataset], device=device).reshape(-1, 1)
)
TS = TDataset(
    x=th.stack([x.flatten() for x, y in TS_torch_dataset]).to(device),
    y=th.tensor([y for x, y in TS_torch_dataset], device=device).reshape(-1, 1)
)
TR, VL = DS.random_split(5/6)

# Dataloaders
batch_size = 128
DS_dl = DS.dataloader(batch_size=batch_size)
TR_dl = TR.dataloader(batch_size=batch_size)
VL_dl = VL.dataloader(batch_size=batch_size)
TS_dl = TS.dataloader(batch_size=batch_size)
TR_pndl = PosNeg_Bootstrap_TDataloader(TR, batch_size=batch_size)
DS_pndl = PosNeg_Bootstrap_TDataloader(DS, batch_size=batch_size)

### Model

In [3]:
Model = FFA_withEntropy

***

Warm up model. As first model to test, a trash one will be used, to be sure that every pytorch 'just-first-time' memory allocations have been allocated when interesting models are tested.

In [4]:
hyp_warmup = {
    'Nclasses': 10,
    'A': (784+10, 20),
    'f_hid': nn.ReLU(),
    'lr_hot': 0.01,
    'lr_cold': 0.01,
    'momentum': 0.99, 
    'weight_decay': 0.0001,
    'temperature': 0,
    'kernel_scale': 10.,
    'Nepochs': 2
}  

exps = {}
exps['warmup'] = hyp_warmup

Final models

In [5]:
hyp_Toff20 = {
    'Nclasses': 10,
    'A': (784+10, 20, 20, 20),
    'f_hid': nn.ReLU(),
    'lr_hot': 0.01,
    'lr_cold': 0.01,
    'momentum': 0.99, 
    'weight_decay': 0.0001,
    'temperature': 0,
    'kernel_scale': 10., # the value found in 'T on' experiment, to compare the value of H
    'Nepochs': 2
}
hyp_Toff200 = {
    'Nclasses': 10,
    'A': (784+10, 200, 200, 200),
    'f_hid': nn.ReLU(),
    'lr_hot': 0.05,
    'lr_cold': 0.05,
    'momentum': 0.99, 
    'weight_decay': 1e-8,
    'temperature': 0,
    'kernel_scale': 90., # the value found in 'T on' experiment, to compare the value of H
    'Nepochs': 2
}
hyp_Toff2000 = {
    'Nclasses': 10,
    'A': (784+10, 2000, 2000, 2000),
    'f_hid': nn.ReLU(),
    'lr_hot': 0.1,
    'lr_cold': 0.1,
    'momentum': 0.995, 
    'weight_decay': 1e-8,
    'temperature': 0,
    'kernel_scale': 900., # the value found in 'T on' experiment, to compare the value of H
    'Nepochs': 2
}
hyp_Ton20 = {
    'Nclasses': 10,
    'A': (784+10, 20, 20, 20),
    'f_hid': nn.ReLU(),
    'lr_hot': 0.01,
    'lr_cold': 0.01,
    'momentum': 0.99, 
    'weight_decay': 0.0001,
    'temperature': 0.1/3,
    'kernel_scale': 10.,
    'Nepochs': 2
}
hyp_Ton200 = {
    'Nclasses': 10,
    'A': (784+10, 200, 200, 200),
    'f_hid': nn.ReLU(),
    'lr_hot': 0.05,
    'lr_cold': 0.05,
    'momentum': 0.99, 
    'weight_decay': 1e-6,
    'temperature': 3,
    'kernel_scale': 90.,
    'Nepochs': 2
}
hyp_Ton2000 = {
    'Nclasses': 10,
    'A': (784+10, 2000, 2000, 2000),
    'f_hid': nn.ReLU(),
    'lr_hot': 0.1,
    'lr_cold': 0.1,
    'momentum': 0.995, 
    'weight_decay': 1e-8,
    'temperature': 30.,
    'kernel_scale': 900.,
    'Nepochs': 2
}

exps |= {
    'Toff20': hyp_Toff20,
    'Toff200': hyp_Toff200,
    'Toff2000': hyp_Toff2000,
    'Ton20': hyp_Ton20,
    'Ton200': hyp_Ton200,
    'Ton2000': hyp_Ton2000,
}

***

In [6]:
class MemoryAnalyzer:
    
    def __init__ (self, device):
        
        self.reset()

    def reset (self):

        th.cuda.reset_peak_memory_stats(device)
        th.cuda.synchronize(device)
        self._base_value = th.cuda.max_memory_allocated(device) 
    
    def get_max_gap (self):
        
        return (th.cuda.max_memory_allocated(device) - self._base_value)/1024/1024 #MByte
  
ma = MemoryAnalyzer(device)
for exp, hyp in exps.items():
    print(exp, end=': ')
    ma.reset()
    print(f"(baseline: {ma._base_value/1024/1024:.6g} MB)", end=' ')
    m = Model(hyp).to(device)
    m.fit(DS_pndl)
    memory = ma.get_max_gap()
    print(f"{memory:.6g} MB")
    del m, memory

warmup: (baseline: 210.995 MB) 9.34326 MB
Toff20: (baseline: 215.573 MB) 4.771 MB
Toff200: (baseline: 215.573 MB) 6.70996 MB
Toff2000: (baseline: 215.573 MB) 109.155 MB
Ton20: (baseline: 215.573 MB) 4.771 MB
Ton200: (baseline: 215.573 MB) 14.5952 MB
Ton2000: (baseline: 215.573 MB) 202.446 MB
