In [1]:
import pickle as pkl
import time
import sys

search=True

from bonsai.data_loaders import load_data
from bonsai.net import Net
from bonsai.trainers import *
from bonsai.helpers import cell_cnx, show_time, namer, TransitionDict, prev_output, wipe_output, mem_stats
from bonsai.ops import commons, Zero

%load_ext autoreload
%autoreload 2

In [2]:
dataset = 'CIFAR10'
classes = 10
gpu_space = 8.4
comp_ratio = 1/4
hypers = {
    'scale':{'init':2,'final':7},
    'batch_size':{'init':256,'final':64},
    'reductions':3,
    'spacing':5,
    'nodes':4,
    'lr_schedule':
        {'lr_min': 0,
         'lr_max': 1e-2,
         't_0': 600,
         't_mult': 1},
    'epochs':600,
    'drop_prob':.25,
}

# Search

In [3]:
if search:
    if 'model' in globals().keys():
        del model
        clean("Search init")
           
    # load date
    batch_size= hypers['batch_size']['init']
    data, data_shape = load_data(batch_size, dataset)
    print("Edges per cell:",cell_cnx(hypers['nodes']))
    
    # init model
    model = Net(dim=data_shape, 
                classes=classes, 
                scale=hypers['scale']['init'],
                reductions=hypers['reductions'], 
                spacing=hypers['spacing'],
                nodes=hypers['nodes'],
                auxiliary=True)
    model.save_genotype()
    size, overflow = size_test(model, data)
    print(model)
    print("Est Size: {}{:.2f}GiB {}".format(">" if overflow else "", size, "(overflow)" if overflow else "")) 
    if overflow:
        del model
        clean('Search init')
    
    # search params
    init_or_finish = True

Edges per cell: 24.0
              Carlton McClure Banbury              
Initializer         :                     20 params
Cell 0  (Normal)    :  4  x 32 ,       2,017 params
Cell 1  (Normal)    :  4  x 32 ,       2,018 params
Cell 2  (Normal)    :  4  x 32 ,       2,019 params
Cell 3  (Normal)    :  4  x 32 ,       2,020 params
Cell 4  (Reduction) :  8  x 32 ,       4,957 params
 ↳ Aux Tower        :                 20,490 params
Cell 5  (Normal)    :  8  x 16 ,       5,078 params
Cell 6  (Normal)    :  8  x 16 ,       5,079 params
Cell 7  (Normal)    :  8  x 16 ,       5,080 params
Cell 8  (Normal)    :  8  x 16 ,       5,081 params
Cell 9  (Reduction) :  16 x 16 ,      13,770 params
 ↳ Aux Tower        :                 10,250 params
Cell 10 (Normal)    :  16 x 8  ,      14,403 params
Cell 11 (Normal)    :  16 x 8  ,      14,404 params
Cell 12 (Normal)    :  16 x 8  ,      14,405 params
Cell 13 (Normal)    :  16 x 8  ,      14,406 params
Cell 14 (Reduction) :  32 x 8  ,      43,04

In [None]:
if search:
    prev_output() if not init_or_finish else wipe_output()
    search_start = time.time()
    
    # search parameters
    schedule = {'cycle_len':4,
                'transition_after':2,
                'n_cycles':4}
    epochs = schedule['cycle_len']*schedule['n_cycles']
    transition = schedule['cycle_len']*schedule['transition_after']
    lr_schedule = {'lr_min': hypers['lr_schedule']['lr_min'],
                   'lr_max': hypers['lr_schedule']['lr_max'],
                   't_0': transition,
                   't_mult': 1}    
    # search loop
    for scaling in range(hypers['scale']['init'],hypers['scale']['final']):
        tries = 1
        while 1:
            init_or_finish = full_train(
                model,
                data,
                resume=not init_or_finish,
                epochs=epochs,
                drop_prob=0,
                comp_lambdas=TransitionDict({0: None,
                                             transition: {'edge': .15*tries, 'input': .1*tries}}),
                comp_ratio=comp_ratio,
                prune_interval=schedule['cycle_len'],
                lr_schedule=lr_schedule
            );
            clean(verbose=False)
            size, overflow = size_test(model,data)
            print("Est size: {}, Batch: {}".format(size,batch_size))
            if size > gpu_space/2 and batch_size==hypers['batch_size']['final']:
                comp_ratio*=.75 
                tries+=1
                print("Restarting pruning at scale level {}, new comp ratio: {}".format(scaling,comp_ratio))
            elif size>(gpu_space/2) and batch_size>hypers['batch_size']['final']:
                batch_size=batch_size//2
                data, data_shape  = load_data(batch_size,dataset)
                print("Lowering batch size to {} for scaling".format(batch_size))
                break
            else:
                break   

        if init_or_finish:
            model.save_genotype()
            clean("Prescale")
            model.scale_up()
            model_id = model.model_id
        else:
            clean()
            break
    print("Search Time:",show_time(time.time()-search_start))

=== Training Carlton McClure Banbury ===
0: 22.00MiB
1: 862.00MiB
2: 1.60GiB
3: 2.37GiB
4: 3.13GiB
5: 3.54GiB
6: 3.99GiB
7: 4.44GiB
8: 4.88GiB
9: 5.33GiB
10: 5.62GiB
11: 5.93GiB
12: 6.24GiB
13: 6.56GiB
14: 6.88GiB
Train Corrects: Top-1: 35.39%, 4m,41s
Test  Corrects: Top-1: 43.48%, 14.57 s

[31mAdjusting lr to 0.01[0m
Train Corrects: Top-1: 44.17%, 4m,40s
Test  Corrects: Top-1: 44.88%, 14.52 s

[31mAdjusting lr to 0.009619397662556433[0m
Train Corrects: Top-1: 47.45%, 4m,37s
Test  Corrects: Top-1: 49.06%, 14.23 s

[31mAdjusting lr to 0.008535533905932738[0m
Train Corrects: Top-1: 49.80%, 4m,36s

Deadheaded 0 operations
Param Delta: 188,804 -> 188,804
Pre-prune Test  Corrects: Top-1: 51.74%, 14.12 s
Post-prune Test  Corrects: Top-1: 51.74%, 14.27 s

[31mAdjusting lr to 0.00691341716182545[0m
Train Corrects: Top-1: 51.78%, 4m,40s
Test  Corrects: Top-1: 52.08%, 14.33 s

[31mAdjusting lr to 0.005[0m
Train Corrects: Top-1: 53.48%, 4m,38s
Test  Corrects: Top-1: 51.80%, 14.74 s

[3

Train Corrects: Top-1: 78.74%, Comp: 0.45, 1.73 11m,56s
Train Loss Components: C: 1.337, E: 0.126, I: 0.07
Test  Corrects: Top-1: 74.08%, 30.55 s

[31mAdjusting lr to 0.005[0m
Train Corrects: Top-1: 80.77%, Comp: 0.46, 1.93 11m,57s
Train Loss Components: C: 1.142, E: 0.127, I: 0.08
Test  Corrects: Top-1: 80.78%, 30.66 s

[31mAdjusting lr to 0.0030865828381745515[0m
Train Corrects: Top-1: 82.86%, Comp: 0.45, 1.73 11m,57s
Train Loss Components: C: 0.540, E: 0.125, I: 0.07
Test  Corrects: Top-1: 82.07%, 30.27 s

[31mAdjusting lr to 0.0014644660940672626[0m
Train Corrects: Top-1: 84.18%, Comp: 0.45, 1.80 11m,56s
Train Loss Components: C: 1.538, E: 0.124, I: 0.07

Deadheaded 46 operations
Param Delta: 1,407,519 -> 1,319,505
Pre-prune Test  Corrects: Top-1: 78.27%, 30.39 s
Post-prune Test  Corrects: Top-1: 78.27%, 28.40 s

[31mAdjusting lr to 0.0003806023374435663[0m
Est size: 3.9453125, Batch: 64
Cleaning at Prescale. Pre: 26.00MiB, Post: 26.00MiB
[31mScaling from 4 to 5[0m
=== Tr

Train Corrects: Top-1: 70.46%, 7m,42s
Test  Corrects: Top-1: 60.38%, 25.47 s

[31mAdjusting lr to 0.009619397662556433[0m
Train Corrects: Top-1: 68.31%, 7m,42s
Test  Corrects: Top-1: 62.32%, 25.89 s

[31mAdjusting lr to 0.008535533905932738[0m
Train Corrects: Top-1: 62.27%, 7m,41s

Deadheaded 3 operations
Param Delta: 3,940,501 -> 3,937,746
Pre-prune Test  Corrects: Top-1: 69.10%, 25.27 s
Post-prune Test  Corrects: Top-1: 69.10%, 25.38 s

[31mAdjusting lr to 0.00691341716182545[0m
Train Corrects: Top-1: 71.39%, 7m,39s
Test  Corrects: Top-1: 74.84%, 25.11 s

[31mAdjusting lr to 0.005[0m
Train Corrects: Top-1: 76.43%, 7m,43s
Test  Corrects: Top-1: 77.24%, 26.64 s

[31mAdjusting lr to 0.0030865828381745515[0m
Train Corrects: Top-1: 79.16%, 7m,49s
Test  Corrects: Top-1: 82.42%, 26.00 s

[31mAdjusting lr to 0.0014644660940672626[0m
Train Corrects: Top-1: 81.80%, 7m,39s

Deadheaded 17 operations
Param Delta: 3,937,746 -> 3,885,921
Pre-prune Test  Corrects: Top-1: 83.45%, 25.06 s


Test  Corrects: Top-1: 87.17%, 22.95 s

[31mRestarting Learning Rate, setting new cycle length to 8[0m

[31mAdjusting lr to 0.01[0m
Train Corrects: Top-1: 81.24%, Comp: 0.34, 1.53 9m,27s
Train Loss Components: C: 4.062, E: 0.369, I: 0.19
Test  Corrects: Top-1: 76.51%, 23.29 s

[31mAdjusting lr to 0.009619397662556433[0m
Train Corrects: Top-1: 78.60%, Comp: 0.33, 1.33 9m,27s
Train Loss Components: C: 1.293, E: 0.357, I: 0.17
Test  Corrects: Top-1: 78.85%, 22.97 s

[31mAdjusting lr to 0.008535533905932738[0m
Train Corrects: Top-1: 76.64%, Comp: 0.33, 1.40 9m,26s
Train Loss Components: C: 2.340, E: 0.350, I: 0.17

Deadheaded 1 operations
Param Delta: 3,677,320 -> 3,641,735
Pre-prune Test  Corrects: Top-1: 74.82%, 23.34 s
Post-prune Test  Corrects: Top-1: 74.82%, 23.01 s

[31mAdjusting lr to 0.00691341716182545[0m
Train Corrects: Top-1: 79.94%, Comp: 0.33, 1.40 9m,22s
Train Loss Components: C: 1.613, E: 0.348, I: 0.17
Test  Corrects: Top-1: 81.98%, 22.93 s

[31mAdjusting lr to 0

Post-prune Test  Corrects: Top-1: 83.21%, 19.65 s

[31mAdjusting lr to 0.00691341716182545[0m
Train Corrects: Top-1: 83.34%, 5m,39s
Test  Corrects: Top-1: 85.07%, 19.45 s

[31mAdjusting lr to 0.005[0m
Train Corrects: Top-1: 85.98%, 5m,39s
Test  Corrects: Top-1: 86.23%, 19.68 s

[31mAdjusting lr to 0.0030865828381745515[0m
Train Corrects: Top-1: 87.80%, 5m,40s
Test  Corrects: Top-1: 88.03%, 19.58 s

[31mAdjusting lr to 0.0014644660940672626[0m
Train Corrects: Top-1: 89.01%, 5m,39s

Deadheaded 0 operations
Param Delta: 2,995,945 -> 2,995,945
Pre-prune Test  Corrects: Top-1: 86.32%, 19.58 s
Post-prune Test  Corrects: Top-1: 86.32%, 19.83 s

[31mAdjusting lr to 0.0003806023374435663[0m
Train Corrects: Top-1: 89.35%, Comp: 0.29, 1.27 8m,2s
Train Loss Components: C: 1.797, E: 0.834, I: 0.33
Test  Corrects: Top-1: 87.82%, 19.50 s

[31mRestarting Learning Rate, setting new cycle length to 8[0m

[31mAdjusting lr to 0.01[0m
Train Corrects: Top-1: 81.88%, Comp: 0.28, 1.27 8m,2s
Trai

Train Corrects: Top-1: 90.61%, Comp: 0.24, 1.20 7m,36s
Train Loss Components: C: 1.809, E: 1.027, I: 0.43

Deadheaded 9 operations
Param Delta: 2,634,036 -> 2,560,875
Pre-prune Test  Corrects: Top-1: 89.59%, 18.41 s
Post-prune Test  Corrects: Top-1: 89.58%, 17.80 s

[31mAdjusting lr to 0.0003806023374435663[0m
Est size: 4.791015625, Batch: 64
Restarting pruning at scale level 5, new comp ratio: 0.025028228759765625
=== Training Carlton McClure Banbury ===
0: 62.00MiB
1: 938.00MiB
2: 1.65GiB
3: 2.62GiB
4: 3.31GiB
5: 3.38GiB
6: 3.38GiB
7: 3.56GiB
8: 3.78GiB
9: 3.97GiB
10: 4.15GiB
11: 4.28GiB
12: 4.38GiB
13: 4.50GiB
14: 4.62GiB
Train Corrects: Top-1: 86.47%, 5m,9s
Test  Corrects: Top-1: 84.68%, 17.99 s

[31mAdjusting lr to 0.01[0m
Train Corrects: Top-1: 82.56%, 5m,8s
Test  Corrects: Top-1: 84.82%, 18.12 s

[31mAdjusting lr to 0.009619397662556433[0m
Train Corrects: Top-1: 84.05%, 5m,11s
Test  Corrects: Top-1: 84.55%, 18.25 s

[31mAdjusting lr to 0.008535533905932738[0m
Train Corre

Train Corrects: Top-1: 84.56%, Comp: 0.22, 1.13 6m,51s
Train Loss Components: C: 3.720, E: 1.393, I: 0.55

Deadheaded 0 operations
Param Delta: 2,189,907 -> 2,189,907
Pre-prune Test  Corrects: Top-1: 85.97%, 16.53 s
Post-prune Test  Corrects: Top-1: 85.97%, 16.41 s

[31mAdjusting lr to 0.00691341716182545[0m
Train Corrects: Top-1: 87.72%, Comp: 0.21, 1.13 6m,53s
Train Loss Components: C: 2.807, E: 1.358, I: 0.55
Test  Corrects: Top-1: 86.00%, 16.77 s

[31mAdjusting lr to 0.005[0m
Train Corrects: Top-1: 89.85%, Comp: 0.21, 1.13 6m,53s
Train Loss Components: C: 2.775, E: 1.352, I: 0.55
Test  Corrects: Top-1: 87.72%, 16.57 s

[31mAdjusting lr to 0.0030865828381745515[0m
Train Corrects: Top-1: 91.36%, Comp: 0.21, 1.13 6m,53s
Train Loss Components: C: 1.969, E: 1.349, I: 0.55
Test  Corrects: Top-1: 88.51%, 16.58 s

[31mAdjusting lr to 0.0014644660940672626[0m
Train Corrects: Top-1: 92.37%, Comp: 0.21, 1.13 6m,52s
Train Loss Components: C: 2.229, E: 1.335, I: 0.55

Deadheaded 9 opera

10: 6.98GiB
11: 6.98GiB
12: 7.08GiB
13: 7.29GiB
14: 7.51GiB
Train Corrects: Top-1: 59.03%, 5m,28s
Test  Corrects: Top-1: 58.48%, 18.99 s

[31mAdjusting lr to 0.01[0m
Train Corrects: Top-1: 60.32%, 5m,27s
Test  Corrects: Top-1: 65.70%, 18.95 s

[31mAdjusting lr to 0.009619397662556433[0m
Train Corrects: Top-1: 62.43%, 5m,28s
Test  Corrects: Top-1: 61.27%, 19.00 s

[31mAdjusting lr to 0.008535533905932738[0m
Train Corrects: Top-1: 65.34%, 5m,28s

Deadheaded 1 operations
Param Delta: 7,537,615 -> 7,537,486
Pre-prune Test  Corrects: Top-1: 66.08%, 18.96 s
Post-prune Test  Corrects: Top-1: 66.08%, 18.95 s

[31mAdjusting lr to 0.00691341716182545[0m
Train Corrects: Top-1: 68.87%, 5m,27s
Test  Corrects: Top-1: 72.98%, 18.93 s

[31mAdjusting lr to 0.005[0m
Train Corrects: Top-1: 72.27%, 5m,27s
Test  Corrects: Top-1: 74.12%, 18.94 s

[31mAdjusting lr to 0.0030865828381745515[0m
Train Corrects: Top-1: 75.71%, 5m,27s
Test  Corrects: Top-1: 76.65%, 18.93 s

[31mAdjusting lr to 0.00146

Train Corrects: Top-1: 84.41%, Comp: 0.21, 1.07 7m,9s
Train Loss Components: C: 1.490, E: 0.361, I: 0.02
Test  Corrects: Top-1: 85.20%, 18.88 s

[31mRestarting Learning Rate, setting new cycle length to 8[0m

[31mAdjusting lr to 0.01[0m
Train Corrects: Top-1: 69.41%, Comp: 0.21, 1.07 7m,8s
Train Loss Components: C: 7.284, E: 0.363, I: 0.02
Test  Corrects: Top-1: 57.60%, 18.89 s

[31mAdjusting lr to 0.009619397662556433[0m
Train Corrects: Top-1: 71.98%, Comp: 0.20, 1.07 7m,8s
Train Loss Components: C: 3.430, E: 0.355, I: 0.02
Test  Corrects: Top-1: 10.00%, 18.89 s

[31mAdjusting lr to 0.008535533905932738[0m
Train Corrects: Top-1: 74.88%, Comp: 0.20, 1.07 7m,10s
Train Loss Components: C: 2.964, E: 0.357, I: 0.02

Deadheaded 0 operations
Param Delta: 7,537,486 -> 7,537,486
Pre-prune Test  Corrects: Top-1: 10.01%, 18.91 s
Post-prune Test  Corrects: Top-1: 10.01%, 18.91 s

[31mAdjusting lr to 0.00691341716182545[0m
Train Corrects: Top-1: 77.27%, Comp: 0.20, 1.07 7m,8s
Train Loss 

Train Corrects: Top-1: 71.11%, 5m,13s
Test  Corrects: Top-1: 75.56%, 18.20 s

[31mAdjusting lr to 0.008535533905932738[0m
Train Corrects: Top-1: 74.85%, 5m,13s

Deadheaded 0 operations
Param Delta: 7,481,159 -> 7,481,159
Pre-prune Test  Corrects: Top-1: 76.77%, 18.23 s
Post-prune Test  Corrects: Top-1: 76.77%, 18.23 s

[31mAdjusting lr to 0.00691341716182545[0m
Train Corrects: Top-1: 76.11%, 5m,13s
Test  Corrects: Top-1: 79.76%, 18.19 s

[31mAdjusting lr to 0.005[0m
Train Corrects: Top-1: 80.03%, 5m,13s
Test  Corrects: Top-1: 82.45%, 18.21 s

[31mAdjusting lr to 0.0030865828381745515[0m
Train Corrects: Top-1: 83.25%, 5m,13s
Test  Corrects: Top-1: 85.00%, 18.21 s

[31mAdjusting lr to 0.0014644660940672626[0m
Train Corrects: Top-1: 84.50%, 5m,13s

Deadheaded 0 operations
Param Delta: 7,481,159 -> 7,481,159
Pre-prune Test  Corrects: Top-1: 86.27%, 18.20 s
Post-prune Test  Corrects: Top-1: 86.27%, 18.20 s

[31mAdjusting lr to 0.0003806023374435663[0m
Train Corrects: Top-1: 85.4

Train Corrects: Top-1: 84.24%, Comp: 0.19, 1.07 6m,37s
Train Loss Components: C: 3.201, E: 0.902, I: 0.05
Test  Corrects: Top-1: 85.00%, 17.42 s

[31mAdjusting lr to 0.0014644660940672626[0m
Train Corrects: Top-1: 85.55%, Comp: 0.19, 1.07 6m,37s
Train Loss Components: C: 1.642, E: 0.911, I: 0.05

Deadheaded 2 operations
Param Delta: 7,150,271 -> 7,013,053
Pre-prune Test  Corrects: Top-1: 86.61%, 17.42 s
Post-prune Test  Corrects: Top-1: 86.61%, 17.33 s

[31mAdjusting lr to 0.0003806023374435663[0m
Est size: 7.01171875, Batch: 64
Restarting pruning at scale level 6, new comp ratio: 0.001409427528415108
=== Training Carlton McClure Banbury ===
0: 96.00MiB
1: 1.11GiB
2: 2.03GiB
3: 3.08GiB
4: 3.98GiB
5: 4.80GiB
6: 5.18GiB
7: 5.50GiB
8: 5.80GiB
9: 6.22GiB
10: 6.30GiB
11: 6.30GiB
12: 6.38GiB
13: 6.52GiB
14: 6.71GiB
Train Corrects: Top-1: 78.19%, 4m,54s
Test  Corrects: Top-1: 79.52%, 17.33 s

[31mAdjusting lr to 0.01[0m
Train Corrects: Top-1: 76.02%, 4m,54s
Test  Corrects: Top-1: 78.50%

Post-prune Test  Corrects: Top-1: 78.27%, 17.08 s

[31mAdjusting lr to 0.00691341716182545[0m
Train Corrects: Top-1: 71.86%, 4m,52s
Test  Corrects: Top-1: 77.38%, 17.12 s

[31mAdjusting lr to 0.005[0m
Train Corrects: Top-1: 76.49%, 4m,51s
Test  Corrects: Top-1: 83.31%, 17.06 s

[31mAdjusting lr to 0.0030865828381745515[0m
Train Corrects: Top-1: 81.99%, 4m,52s
Test  Corrects: Top-1: 83.75%, 17.08 s

[31mAdjusting lr to 0.0014644660940672626[0m
Train Corrects: Top-1: 83.98%, 4m,52s

Deadheaded 1 operations
Param Delta: 6,859,195 -> 6,859,194
Pre-prune Test  Corrects: Top-1: 85.69%, 17.09 s
Post-prune Test  Corrects: Top-1: 85.69%, 17.06 s

[31mAdjusting lr to 0.0003806023374435663[0m
Train Corrects: Top-1: 84.86%, Comp: 0.18, 1.00 6m,29s
Train Loss Components: C: 2.003, E: 1.132, I: 0.00
Test  Corrects: Top-1: 85.93%, 17.02 s

[31mRestarting Learning Rate, setting new cycle length to 8[0m

[31mAdjusting lr to 0.01[0m
Train Corrects: Top-1: 75.31%, Comp: 0.18, 1.00 6m,28s
Tr

Train Corrects: Top-1: 84.81%, Comp: 0.17, 1.00 6m,19s
Train Loss Components: C: 1.563, E: 1.257, I: 0.00

Deadheaded 4 operations
Param Delta: 6,147,506 -> 6,009,390
Pre-prune Test  Corrects: Top-1: 86.62%, 16.72 s
Post-prune Test  Corrects: Top-1: 86.62%, 16.48 s

[31mAdjusting lr to 0.0003806023374435663[0m
Est size: 6.69921875, Batch: 64
Restarting pruning at scale level 6, new comp ratio: 0.0004459516789125928
=== Training Carlton McClure Banbury ===
0: 92.00MiB
1: 1.11GiB
2: 2.03GiB
3: 3.00GiB
4: 3.98GiB
5: 4.79GiB
6: 5.18GiB
7: 5.47GiB
8: 5.80GiB
9: 6.12GiB
10: 6.21GiB
11: 6.21GiB
12: 6.21GiB
13: 6.36GiB
14: 6.50GiB
Train Corrects: Top-1: 74.64%, 4m,39s
Test  Corrects: Top-1: 79.08%, 16.48 s

[31mAdjusting lr to 0.01[0m
Train Corrects: Top-1: 76.05%, 4m,39s
Test  Corrects: Top-1: 82.34%, 16.49 s

[31mAdjusting lr to 0.009619397662556433[0m
Train Corrects: Top-1: 78.18%, 4m,39s
Test  Corrects: Top-1: 81.25%, 16.49 s

[31mAdjusting lr to 0.008535533905932738[0m
Train Corre

Train Corrects: Top-1: 79.51%, Comp: 0.16, 1.00 6m,0s
Train Loss Components: C: 5.735, E: 1.543, I: 0.00

Deadheaded 0 operations
Param Delta: 5,662,248 -> 5,662,248
Pre-prune Test  Corrects: Top-1: 79.04%, 16.03 s
Post-prune Test  Corrects: Top-1: 79.04%, 16.07 s

[31mAdjusting lr to 0.00691341716182545[0m
Train Corrects: Top-1: 82.89%, Comp: 0.16, 1.00 6m,0s
Train Loss Components: C: 2.872, E: 1.541, I: 0.00
Test  Corrects: Top-1: 86.02%, 16.06 s

[31mAdjusting lr to 0.005[0m
Train Corrects: Top-1: 84.25%, Comp: 0.16, 1.00 6m,2s
Train Loss Components: C: 3.091, E: 1.494, I: 0.00
Test  Corrects: Top-1: 82.45%, 16.04 s

[31mAdjusting lr to 0.0030865828381745515[0m
Train Corrects: Top-1: 85.22%, Comp: 0.16, 1.00 6m,1s
Train Loss Components: C: 3.849, E: 1.504, I: 0.00
Test  Corrects: Top-1: 85.54%, 16.05 s

[31mAdjusting lr to 0.0014644660940672626[0m
Train Corrects: Top-1: 87.29%, Comp: 0.16, 1.00 6m,0s
Train Loss Components: C: 2.567, E: 1.518, I: 0.00

Deadheaded 2 operations

Train Corrects: Top-1: 70.46%, 4m,13s
Test  Corrects: Top-1: 73.17%, 15.48 s

[31mAdjusting lr to 0.01[0m
Train Corrects: Top-1: 75.47%, 4m,13s
Test  Corrects: Top-1: 67.64%, 15.48 s

[31mAdjusting lr to 0.009619397662556433[0m
Train Corrects: Top-1: 74.01%, 4m,13s
Test  Corrects: Top-1: 74.94%, 15.50 s

[31mAdjusting lr to 0.008535533905932738[0m
Train Corrects: Top-1: 76.26%, 4m,14s

Deadheaded 1 operations
Param Delta: 4,942,111 -> 4,906,270
Pre-prune Test  Corrects: Top-1: 80.28%, 15.50 s
Post-prune Test  Corrects: Top-1: 80.28%, 15.39 s

[31mAdjusting lr to 0.00691341716182545[0m
Train Corrects: Top-1: 81.71%, 4m,13s
Test  Corrects: Top-1: 80.66%, 15.35 s

[31mAdjusting lr to 0.005[0m
Train Corrects: Top-1: 83.49%, 4m,14s
Test  Corrects: Top-1: 86.28%, 15.37 s

[31mAdjusting lr to 0.0030865828381745515[0m
Train Corrects: Top-1: 86.28%, 4m,13s
Test  Corrects: Top-1: 87.25%, 15.37 s

[31mAdjusting lr to 0.0014644660940672626[0m
Train Corrects: Top-1: 87.41%, 4m,14s

De

Train Corrects: Top-1: 86.28%, Comp: 0.15, 1.00 5m,33s
Train Loss Components: C: 4.210, E: 1.723, I: 0.00
Test  Corrects: Top-1: 87.08%, 14.85 s

[31mRestarting Learning Rate, setting new cycle length to 8[0m

[31mAdjusting lr to 0.01[0m
Train Corrects: Top-1: 81.06%, Comp: 0.14, 1.00 5m,33s
Train Loss Components: C: 9.438, E: 1.655, I: 0.00
Test  Corrects: Top-1: 34.23%, 14.84 s

[31mAdjusting lr to 0.009619397662556433[0m
Train Corrects: Top-1: 78.21%, Comp: 0.15, 1.00 5m,34s
Train Loss Components: C: 3.336, E: 1.704, I: 0.00
Test  Corrects: Top-1: 80.14%, 14.83 s

[31mAdjusting lr to 0.008535533905932738[0m
Train Corrects: Top-1: 80.48%, Comp: 0.14, 1.00 5m,34s
Train Loss Components: C: 3.711, E: 1.695, I: 0.00

Deadheaded 0 operations
Param Delta: 4,883,738 -> 4,883,738
Pre-prune Test  Corrects: Top-1: 84.59%, 14.87 s
Post-prune Test  Corrects: Top-1: 84.59%, 14.84 s

[31mAdjusting lr to 0.00691341716182545[0m
Train Corrects: Top-1: 82.56%, Comp: 0.15, 1.00 5m,33s
Train L

Train Corrects: Top-1: 70.16%, 3m,53s
Test  Corrects: Top-1: 77.65%, 14.49 s

[31mAdjusting lr to 0.008535533905932738[0m
Train Corrects: Top-1: 75.17%, 3m,53s

Deadheaded 1 operations
Param Delta: 4,275,220 -> 4,274,963
Pre-prune Test  Corrects: Top-1: 78.92%, 14.48 s
Post-prune Test  Corrects: Top-1: 78.92%, 14.47 s

[31mAdjusting lr to 0.00691341716182545[0m
Train Corrects: Top-1: 77.34%, 3m,54s
Test  Corrects: Top-1: 83.53%, 14.46 s

[31mAdjusting lr to 0.005[0m
Train Corrects: Top-1: 81.57%, 3m,54s
Test  Corrects: Top-1: 84.47%, 14.45 s

[31mAdjusting lr to 0.0030865828381745515[0m
Train Corrects: Top-1: 82.72%, 3m,54s
Test  Corrects: Top-1: 85.73%, 14.46 s

[31mAdjusting lr to 0.0014644660940672626[0m
Train Corrects: Top-1: 84.42%, 3m,54s

Deadheaded 0 operations
Param Delta: 4,274,963 -> 4,274,963
Pre-prune Test  Corrects: Top-1: 86.29%, 14.46 s
Post-prune Test  Corrects: Top-1: 86.29%, 14.51 s

[31mAdjusting lr to 0.0003806023374435663[0m
Train Corrects: Top-1: 85.1

Train Corrects: Top-1: 52.81%, Comp: 0.13, 0.93 5m,14s
Train Loss Components: C: 4.878, E: 1.979, I: 0.21
Test  Corrects: Top-1: 42.28%, 14.04 s

[31mAdjusting lr to 0.0014644660940672626[0m
Train Corrects: Top-1: 56.40%, Comp: 0.13, 0.93 5m,13s
Train Loss Components: C: 8.443, E: 1.979, I: 0.21

Deadheaded 4 operations
Param Delta: 4,229,136 -> 3,137,548
Pre-prune Test  Corrects: Top-1: 57.48%, 14.04 s
Post-prune Test  Corrects: Top-1: 57.48%, 13.45 s

[31mAdjusting lr to 0.0003806023374435663[0m
Est size: 5.095703125, Batch: 64
Restarting pruning at scale level 6, new comp ratio: 1.883482982261874e-05
=== Training Carlton McClure Banbury ===
0: 78.00MiB
1: 862.00MiB
2: 1.64GiB
3: 2.36GiB
4: 3.22GiB
5: 3.88GiB
6: 3.97GiB
7: 4.09GiB
8: 4.36GiB
9: 4.59GiB
10: 4.66GiB
11: 4.66GiB
12: 4.68GiB
13: 4.81GiB
14: 4.95GiB
Train Corrects: Top-1: 44.53%, 3m,32s
Test  Corrects: Top-1: 13.68%, 13.46 s

[31mAdjusting lr to 0.01[0m
Train Corrects: Top-1: 46.16%, 3m,32s
Test  Corrects: Top-1: 30

Post-prune Test  Corrects: Top-1: 30.52%, 13.44 s

[31mAdjusting lr to 0.00691341716182545[0m
Train Corrects: Top-1: 63.39%, 3m,31s
Test  Corrects: Top-1: 27.57%, 13.42 s

[31mAdjusting lr to 0.005[0m
Train Corrects: Top-1: 66.54%, 3m,31s
Test  Corrects: Top-1: 31.37%, 13.43 s

[31mAdjusting lr to 0.0030865828381745515[0m
Train Corrects: Top-1: 69.65%, 3m,32s
Test  Corrects: Top-1: 35.87%, 13.41 s

[31mAdjusting lr to 0.0014644660940672626[0m
Train Corrects: Top-1: 72.57%, 3m,31s

Deadheaded 0 operations
Param Delta: 3,136,010 -> 3,136,010
Pre-prune Test  Corrects: Top-1: 59.91%, 13.42 s
Post-prune Test  Corrects: Top-1: 59.91%, 13.43 s

[31mAdjusting lr to 0.0003806023374435663[0m
Train Corrects: Top-1: 74.72%, Comp: 0.13, 1.00 5m,0s
Train Loss Components: C: 3.585, E: 2.126, I: 0.00
Test  Corrects: Top-1: 76.17%, 13.43 s

[31mRestarting Learning Rate, setting new cycle length to 8[0m

[31mAdjusting lr to 0.01[0m
Train Corrects: Top-1: 61.57%, Comp: 0.13, 0.93 4m,59s
Tra

Train Corrects: Top-1: 82.54%, Comp: 0.13, 1.00 4m,59s
Train Loss Components: C: 3.348, E: 2.296, I: 0.00

Deadheaded 0 operations
Param Delta: 3,136,010 -> 3,136,010
Pre-prune Test  Corrects: Top-1: 80.45%, 13.40 s
Post-prune Test  Corrects: Top-1: 80.45%, 13.41 s

[31mAdjusting lr to 0.0003806023374435663[0m
Est size: 5.099609375, Batch: 64
Restarting pruning at scale level 6, new comp ratio: 5.95945787356296e-06
=== Training Carlton McClure Banbury ===
0: 78.00MiB
1: 862.00MiB
2: 1.64GiB
3: 2.36GiB
4: 3.22GiB
5: 3.88GiB
6: 3.97GiB
7: 4.09GiB
8: 4.36GiB
9: 4.59GiB
10: 4.66GiB
11: 4.66GiB
12: 4.66GiB
13: 4.81GiB
14: 4.95GiB
Train Corrects: Top-1: 69.35%, 3m,31s
Test  Corrects: Top-1: 25.54%, 13.44 s

[31mAdjusting lr to 0.01[0m
Train Corrects: Top-1: 70.26%, 3m,32s
Test  Corrects: Top-1: 15.30%, 13.43 s

[31mAdjusting lr to 0.009619397662556433[0m
Train Corrects: Top-1: 72.02%, 3m,31s
Test  Corrects: Top-1: 30.30%, 13.42 s

[31mAdjusting lr to 0.008535533905932738[0m
Train Cor

Train Corrects: Top-1: 77.75%, Comp: 0.13, 0.93 4m,58s
Train Loss Components: C: 6.521, E: 2.537, I: 0.27

Deadheaded 0 operations
Param Delta: 2,998,793 -> 2,998,793
Pre-prune Test  Corrects: Top-1: 70.58%, 13.33 s
Post-prune Test  Corrects: Top-1: 70.58%, 13.34 s

[31mAdjusting lr to 0.00691341716182545[0m
Train Corrects: Top-1: 80.23%, Comp: 0.13, 1.40 4m,58s
Train Loss Components: C: 8.364, E: 2.508, I: 1.64
Test  Corrects: Top-1: 74.24%, 13.37 s

[31mAdjusting lr to 0.005[0m
Train Corrects: Top-1: 83.00%, Comp: 0.13, 0.93 4m,57s
Train Loss Components: C: 4.336, E: 2.537, I: 0.27
Test  Corrects: Top-1: 78.39%, 13.33 s

[31mAdjusting lr to 0.0030865828381745515[0m
Train Corrects: Top-1: 85.52%, Comp: 0.13, 1.20 4m,58s
Train Loss Components: C: 4.392, E: 2.537, I: 0.82
Test  Corrects: Top-1: 85.50%, 13.35 s

[31mAdjusting lr to 0.0014644660940672626[0m
Train Corrects: Top-1: 86.88%, Comp: 0.13, 1.00 4m,59s
Train Loss Components: C: 3.462, E: 2.537, I: 0.00

Deadheaded 0 opera

Train Corrects: Top-1: 74.86%, 3m,29s
Test  Corrects: Top-1: 48.82%, 13.39 s

[31mAdjusting lr to 0.01[0m
Train Corrects: Top-1: 69.88%, 3m,29s
Test  Corrects: Top-1: 67.06%, 13.35 s

[31mAdjusting lr to 0.009619397662556433[0m
Train Corrects: Top-1: 75.54%, 3m,29s
Test  Corrects: Top-1: 47.71%, 13.35 s

[31mAdjusting lr to 0.008535533905932738[0m
Train Corrects: Top-1: 76.98%, 3m,30s

Deadheaded 0 operations
Param Delta: 2,998,793 -> 2,998,793
Pre-prune Test  Corrects: Top-1: 64.02%, 13.37 s
Post-prune Test  Corrects: Top-1: 64.02%, 13.36 s

[31mAdjusting lr to 0.00691341716182545[0m
Train Corrects: Top-1: 79.83%, 3m,29s
Test  Corrects: Top-1: 74.05%, 13.37 s

[31mAdjusting lr to 0.005[0m
Train Corrects: Top-1: 83.62%, 3m,31s
Test  Corrects: Top-1: 79.21%, 13.36 s

[31mAdjusting lr to 0.0030865828381745515[0m
Train Corrects: Top-1: 85.82%, 3m,30s
Test  Corrects: Top-1: 84.45%, 13.34 s

[31mAdjusting lr to 0.0014644660940672626[0m
Train Corrects: Top-1: 86.85%, 3m,30s

De

Train Corrects: Top-1: 87.92%, Comp: 0.13, 1.00 4m,57s
Train Loss Components: C: 3.665, E: 2.791, I: 0.00
Test  Corrects: Top-1: 88.51%, 13.31 s

[31mRestarting Learning Rate, setting new cycle length to 8[0m

[31mAdjusting lr to 0.01[0m
Train Corrects: Top-1: 76.33%, Comp: 0.13, 1.60 4m,59s
Train Loss Components: C: 7.660, E: 2.791, I: 2.70
Test  Corrects: Top-1: 82.09%, 13.31 s

[31mAdjusting lr to 0.009619397662556433[0m
Train Corrects: Top-1: 73.14%, Comp: 0.13, 1.60 4m,57s
Train Loss Components: C: 11.097, E: 2.764, I: 2.70
Test  Corrects: Top-1: 70.50%, 13.34 s

[31mAdjusting lr to 0.008535533905932738[0m
Train Corrects: Top-1: 73.77%, Comp: 0.13, 0.93 4m,56s
Train Loss Components: C: 5.159, E: 2.791, I: 0.30

Deadheaded 0 operations
Param Delta: 2,998,793 -> 2,998,793
Pre-prune Test  Corrects: Top-1: 78.27%, 13.32 s
Post-prune Test  Corrects: Top-1: 78.27%, 13.32 s

[31mAdjusting lr to 0.00691341716182545[0m
Train Corrects: Top-1: 79.11%, Comp: 0.13, 0.93 4m,56s
Train 

Train Corrects: Top-1: 78.92%, 3m,29s
Test  Corrects: Top-1: 76.63%, 13.32 s

[31mAdjusting lr to 0.008535533905932738[0m
Train Corrects: Top-1: 81.29%, 3m,29s

Deadheaded 0 operations
Param Delta: 2,998,791 -> 2,998,791
Pre-prune Test  Corrects: Top-1: 81.37%, 13.34 s
Post-prune Test  Corrects: Top-1: 81.37%, 13.34 s

[31mAdjusting lr to 0.00691341716182545[0m
Train Corrects: Top-1: 83.54%, 3m,29s
Test  Corrects: Top-1: 82.82%, 13.32 s

[31mAdjusting lr to 0.005[0m
Train Corrects: Top-1: 85.85%, 3m,30s
Test  Corrects: Top-1: 86.90%, 13.31 s

[31mAdjusting lr to 0.0030865828381745515[0m
Train Corrects: Top-1: 87.76%, 3m,28s
Test  Corrects: Top-1: 88.37%, 13.31 s

[31mAdjusting lr to 0.0014644660940672626[0m
Train Corrects: Top-1: 89.06%, 3m,29s

Deadheaded 0 operations
Param Delta: 2,998,791 -> 2,998,791
Pre-prune Test  Corrects: Top-1: 88.54%, 13.33 s
Post-prune Test  Corrects: Top-1: 88.54%, 13.34 s

[31mAdjusting lr to 0.0003806023374435663[0m
Train Corrects: Top-1: 89.6

Train Corrects: Top-1: 88.83%, Comp: 0.12, 1.00 4m,56s
Train Loss Components: C: 5.682, E: 3.117, I: 0.47
Test  Corrects: Top-1: 88.82%, 13.24 s

[31mAdjusting lr to 0.0014644660940672626[0m
Train Corrects: Top-1: 89.58%, Comp: 0.12, 1.40 4m,57s
Train Loss Components: C: 6.343, E: 3.117, I: 1.62

Deadheaded 1 operations
Param Delta: 2,996,230 -> 2,960,389
Pre-prune Test  Corrects: Top-1: 89.26%, 13.27 s
Post-prune Test  Corrects: Top-1: 89.26%, 13.17 s

[31mAdjusting lr to 0.0003806023374435663[0m
Est size: 4.974609375, Batch: 64
Restarting pruning at scale level 6, new comp ratio: 2.5169851396752785e-07
=== Training Carlton McClure Banbury ===
0: 78.00MiB
1: 862.00MiB
2: 1.64GiB
3: 2.36GiB
4: 3.22GiB
5: 3.80GiB
6: 3.89GiB
7: 4.01GiB
8: 4.28GiB
9: 4.51GiB
10: 4.58GiB
11: 4.58GiB
12: 4.60GiB
13: 4.76GiB
14: 4.85GiB
Train Corrects: Top-1: 81.48%, 3m,26s
Test  Corrects: Top-1: 80.14%, 13.16 s

[31mAdjusting lr to 0.01[0m
Train Corrects: Top-1: 77.18%, 3m,27s
Test  Corrects: Top-1: 7

Post-prune Test  Corrects: Top-1: 69.16%, 12.95 s

[31mAdjusting lr to 0.00691341716182545[0m
Train Corrects: Top-1: 73.70%, 3m,22s
Test  Corrects: Top-1: 68.51%, 12.95 s

[31mAdjusting lr to 0.005[0m
Train Corrects: Top-1: 78.06%, 3m,22s
Test  Corrects: Top-1: 78.27%, 12.95 s

[31mAdjusting lr to 0.0030865828381745515[0m
Train Corrects: Top-1: 81.16%, 3m,22s
Test  Corrects: Top-1: 80.26%, 12.95 s

[31mAdjusting lr to 0.0014644660940672626[0m
Train Corrects: Top-1: 82.63%, 3m,21s

Deadheaded 0 operations
Param Delta: 2,950,660 -> 2,950,660
Pre-prune Test  Corrects: Top-1: 84.69%, 12.95 s
Post-prune Test  Corrects: Top-1: 84.69%, 12.96 s

[31mAdjusting lr to 0.0003806023374435663[0m
Train Corrects: Top-1: 83.79%, Comp: 0.12, 0.93 4m,49s
Train Loss Components: C: 5.159, E: 3.241, I: 0.33
Test  Corrects: Top-1: 85.51%, 12.92 s

[31mRestarting Learning Rate, setting new cycle length to 8[0m

[31mAdjusting lr to 0.01[0m
Train Corrects: Top-1: 72.93%, Comp: 0.12, 0.93 4m,48s
Tr

Train Corrects: Top-1: 82.28%, Comp: 0.12, 1.33 4m,48s
Train Loss Components: C: 5.584, E: 3.368, I: 1.75

Deadheaded 1 operations
Param Delta: 2,950,660 -> 2,813,443
Pre-prune Test  Corrects: Top-1: 84.16%, 12.93 s
Post-prune Test  Corrects: Top-1: 84.16%, 12.83 s

[31mAdjusting lr to 0.0003806023374435663[0m
Est size: 4.8203125, Batch: 64
Restarting pruning at scale level 6, new comp ratio: 7.96389829350381e-08
=== Training Carlton McClure Banbury ===
0: 76.00MiB
1: 860.00MiB
2: 1.51GiB
3: 2.23GiB
4: 3.09GiB
5: 3.67GiB
6: 3.77GiB
7: 3.88GiB
8: 4.15GiB
9: 4.38GiB
10: 4.45GiB
11: 4.45GiB
12: 4.45GiB
13: 4.59GiB
14: 4.71GiB
Train Corrects: Top-1: 69.12%, 3m,20s
Test  Corrects: Top-1: 58.66%, 12.86 s

[31mAdjusting lr to 0.01[0m
Train Corrects: Top-1: 69.14%, 3m,20s
Test  Corrects: Top-1: 69.31%, 12.89 s

[31mAdjusting lr to 0.009619397662556433[0m
Train Corrects: Top-1: 68.99%, 3m,20s
Test  Corrects: Top-1: 71.29%, 12.87 s

[31mAdjusting lr to 0.008535533905932738[0m
Train Corre

Train Corrects: Top-1: 75.39%, Comp: 0.12, 0.87 4m,46s
Train Loss Components: C: 6.979, E: 3.582, I: 0.55

Deadheaded 0 operations
Param Delta: 2,812,930 -> 2,812,930
Pre-prune Test  Corrects: Top-1: 80.45%, 12.82 s
Post-prune Test  Corrects: Top-1: 80.45%, 12.82 s

[31mAdjusting lr to 0.00691341716182545[0m
Train Corrects: Top-1: 77.50%, Comp: 0.12, 1.73 4m,51s
Train Loss Components: C: 8.375, E: 3.582, I: 3.09
Test  Corrects: Top-1: 80.83%, 13.59 s

[31mAdjusting lr to 0.005[0m
Train Corrects: Top-1: 80.57%, Comp: 0.12, 1.27 5m,3s
Train Loss Components: C: 9.527, E: 3.531, I: 1.92
Test  Corrects: Top-1: 82.46%, 13.27 s

[31mAdjusting lr to 0.0030865828381745515[0m
Train Corrects: Top-1: 82.40%, Comp: 0.12, 1.40 5m,1s
Train Loss Components: C: 6.302, E: 3.555, I: 1.92
Test  Corrects: Top-1: 83.58%, 13.53 s

[31mAdjusting lr to 0.0014644660940672626[0m
Train Corrects: Top-1: 83.54%, Comp: 0.12, 1.00 5m,2s
Train Loss Components: C: 5.219, E: 3.531, I: 0.55

Deadheaded 0 operatio

# Train

In [None]:
data, data_shape = load_data(hypers['batch_size']['final'], dataset)
params, genotype = pkl.load(open('genotypes/genotype_{}.pkl'.format(name.replace(" ","_")),'rb'))

params['genotype']=genotype
params['scale']=hypers['scale']['final']
params['prune']=False
params['auxiliary']=True
params['dim']=data_shape
model = Net(**params)
print(model)

init_or_finish = True
size_test(model,data)
actual_comp_ratio = model.genotype_compression()

In [None]:
prev_output() if not init_or_finish else wipe_output()
init_or_finish = full_train(
    model, data,
    lr_schedule = hypers['lr_schedule'],
    resume=not init_or_finish,
    epochs=hypers['epochs'],
    drop_prob=hypers['drop_prob'],
);
clean()

# Random Search

In [None]:
data,data_shape = load_data(hypers['batch_size']['final'], dataset)
model = Net(dim=data_shape, 
            classes=classes, 
            scale=hypers['scale']['final'],
            reductions=hypers['reductions'], 
            spacing=hypers['spacing'],
            nodes=hypers['nodes'],
            random_ops=actual_comp_ratio, 
            prune=False,
            auxiliary=True)
model.save_genotype()
print(model)
print(size_test(model,data))
model.genotype_compression()

In [None]:
full_train(
    model, data,
    lr_schedule = hypers['lr_schedule'],
    epochs=hypers['epochs'],
    drop_prob=hypers['drop_prob'],
);