In [16]:
from bonsai.data_loaders import load_data
from bonsai.net import Net
from bonsai.trainers import *
from bonsai.helpers import *

import pandas as pd
pd.set_option("display.max_rows", 150)

In [2]:
nas_schedule = {'learn_phase': 16,
                'prune_phase': 16,
                'prune_interval': 4}
hypers = {
    'gpu_space': 8.25,
    'dataset': 'CIFAR10',
    'classes': 10,
    'batch_size': 64,
    'scale': 5,
    'nodes': 4,
    'patterns': [['r','n','n','n','na']],
    'half': False,
    'multiplier': 1,
    'lr_schedule':
        {'lr_max': .01,
         'T': 1},
    'drop_prob': .25,
    'prune_rate': {'edge': .5, 'input': .5}
}
data, dim = load_data(hypers['batch_size'], hypers['dataset'])
hypers['num_patterns'] = 1

model = Net(dim=dim,
            classes=hypers['classes'],
            scale=hypers['scale'],
            num_patterns=hypers['num_patterns'],
            patterns=hypers['patterns'],
            nodes=hypers['nodes'],
            random_ops={'e_c': .25, 'i_c': 1},
            drop_prob=hypers['drop_prob'],
            lr_schedule=hypers['lr_schedule'],
            prune=False)
model.data = data

In [3]:
optimizer = optim.SGD(model.parameters(), lr=model.lr_scheduler.lr, momentum=.9, weight_decay=3e-4)
criterion = nn.CrossEntropyLoss()
model.cuda()
with torch.autograd.profiler.profile(use_cuda=True) as prof:
    train(model,
          torch.device("cuda"),
          criterion=criterion,
          optimizer=optimizer,
          epoch=0,
          kill_at=50)

12/10/2019 03:01 PM
Init: 22.00MiB
0: 826.00MiB
1: 1.44GiB
2: 2.08GiB
3: 2.71GiB
4: 3.36GiB
GP: 3.36GiB
Classifier: 3.36GiB
Train Corrects: Top-1: 1.19%, 15.50s


In [4]:
tbl = prof.table(row_limit=None)
rows = tbl.split("\n")
print(len(rows))

747333


In [5]:
rows[-3:]

['Self CPU time total: 10.578s', 'CUDA time total: 21.086s', '']

In [6]:
def white_split(str):
    return [x.strip() for x in str.split("  ") if x.strip()]

header = white_split(rows[1])

In [7]:
table_rows = []
for row in rows[3:-4]:
    fields = white_split(row)
    table_rows.append({header[i]:fields[i] for i in range(len(fields))})
df = pd.DataFrame(table_rows)
for field in ['CPU time avg','CPU total','CUDA time avg','CUDA total','Self CPU total']:
    df[field]=df[field].apply(lambda x: float(x[:-2])*1e-6)
df['Number of Calls']=df['Number of Calls'].apply(int)
df

Unnamed: 0,CPU time avg,CPU total,CPU total %,CUDA time avg,CUDA total,CUDA total %,Input Shapes,Name,Number of Calls,Self CPU total,Self CPU total %
0,0.000018,0.000018,0.00%,0.000015,0.000015,0.00%,[],random_,1,0.000018,0.00%
1,0.000004,0.000004,0.00%,0.000004,0.000004,0.00%,[],is_floating_point,1,0.000004,0.00%
2,0.000004,0.000004,0.00%,0.000004,0.000004,0.00%,[],is_complex,1,0.000004,0.00%
3,0.000015,0.000015,0.00%,0.000015,0.000015,0.00%,[],item,1,0.000010,0.00%
4,0.000005,0.000005,0.00%,0.000005,0.000005,0.00%,[],_local_scalar_dense,1,0.000005,0.00%
5,0.000032,0.000032,0.00%,0.000029,0.000029,0.00%,[],to,1,0.000024,0.00%
6,0.000007,0.000007,0.00%,0.000008,0.000008,0.00%,[],empty,1,0.000007,0.00%
7,0.000017,0.000017,0.00%,0.000017,0.000017,0.00%,[],div,1,0.000017,0.00%
8,0.000012,0.000012,0.00%,0.000012,0.000012,0.00%,[],clone,1,0.000012,0.00%
9,0.000015,0.000015,0.00%,0.000015,0.000015,0.00%,[],to,1,0.000015,0.00%


In [9]:
df['CUDA total'].sum()

20.706645459

In [17]:
df.groupby('Name')[['CUDA total','Number of Calls']].agg({'CUDA total':['sum','mean'],'Number of Calls':['sum']}).sort_values(by=('CUDA total','sum'),ascending=False)

Unnamed: 0_level_0,CUDA total,CUDA total,Number of Calls
Unnamed: 0_level_1,sum,mean,sum
Name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
mul,1.987779,2.2e-05,91729
add,1.394581,2.7e-05,51740
MulBackward0,1.29071,4.5e-05,28574
add_,1.202789,1e-05,114560
CudnnBatchNormBackward,1.099288,0.00014,7852
cudnn_batch_norm_backward,1.030292,0.000131,7852
ThnnConvDepthwise2DBackward,0.925898,0.000356,2600
batch_norm,0.914002,0.000116,7852
thnn_conv_depthwise2d_backward,0.89862,0.000346,2600
_batch_norm_impl_index,0.892727,0.000114,7852


In [18]:
df.groupby('Name')[['CPU total','Number of Calls']].agg({'CPU total':['sum','mean'],'Number of Calls':['sum']}).sort_values(by=('CPU total','sum'),ascending=False)

Unnamed: 0_level_0,CPU total,CPU total,Number of Calls
Unnamed: 0_level_1,sum,mean,sum
Name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
mul,1.618507,1.8e-05,91729
add_,1.506604,1.3e-05,114560
DivBackward0,1.015001,6.9e-05,14794
MulBackward0,0.939106,3.3e-05,28574
add,0.879523,1.7e-05,51740
batch_norm,0.774938,9.9e-05,7852
_batch_norm_impl_index,0.702695,8.9e-05,7852
div,0.679304,1.7e-05,39833
torch::autograd::AccumulateGrad,0.602097,2.2e-05,26936
CudnnBatchNormBackward,0.459319,5.8e-05,7852
