# Imports

In [1]:
import pickle as pkl
import time
import sys
import numpy as np

search=True

from bonsai.data_loaders import load_data
from bonsai.net_vert import Net
from bonsai.trainers import *
from bonsai.helpers import *
from bonsai.ops import commons, Zero

%load_ext autoreload
%autoreload 2

In [2]:
mem_stats()

'0.00B'

In [3]:
gpu_space = 8
batch_size = 64
patterns = [['r','n','n','na'],['r','n','na'],['r','n','na'],['r','n','na'],['r','na'],['r','na']]
hypers = {
    'dataset':'CIFAR10',
    'classes':10,
    'scale':5,
    'half':False,
    'batch_size':batch_size,
    'multiplier':1024//batch_size,
    'patterns':patterns,
    'num_patterns':{'init':1,'final':len(patterns)},
    'nodes':4,
    'lr_schedule':
        {'lr_min': 0,
         'lr_max': .01},
    'epochs':600,
    'drop_prob':.25,
}
schedule = {'cycle_len':4,
            'transition_after':2,
            'n_cycles':4}

In [6]:
print(sp_size_test(hypers['num_patterns']['init'],e_c=1,raw=True,print_model=True,**hypers))
#print(sp_size_test(6,e_c=.25,raw=True,print_model=True,verbose=True,**hypers))
print(sp_size_test(hypers['num_patterns']['final']-1,e_c=.25,raw=False,print_model=True,verbose=True,**hypers))

                Nair Budd Greenfield                
Initializer         :                    160 params
Cell 0  (Normal)    :  32 x 32 ,      41,209 params
Cell 1  (Normal)    :  32 x 32 ,      41,210 params
Cell 2  (Normal)    :  32 x 32 ,      41,211 params
Cell 3  (Normal)    :  32 x 32 ,      41,212 params
 ↳ Classifier       :                327,690 params
Total               :                492,692 params


(7.6015625, (1.0, 1.0), False)
                Johnson Norse Hausa                
Initializer         :                    160 params
Cell 0  (Normal)    :  32 x 32 ,      28,848 params
Cell 1  (Normal)    :  32 x 32 ,      28,656 params
Cell 2  (Normal)    :  32 x 32 ,      31,472 params
Cell 3  (Normal)    :  32 x 32 ,      34,680 params
 ↳ Aux Tower        :                327,690 params
Cell 4  (Reduction) :  64 x 16 ,      68,877 params
Cell 5  (Normal)    :  64 x 16 ,      88,529 params
Cell 6  (Normal)    :  64 x 16 ,      79,380 params
 ↳ Aux Tower        :         

# Determine Height/Size Ratios
Check how a test model scales under the search params to ensure we don't overfill GPU

In [8]:
sizes = {}
for n in range(hypers['num_patterns']['init'],hypers['num_patterns']['final']):
    sizes[n]=[]
    raw = n == hypers['num_patterns']['final']
    bst=BST(.2,1.)
    while bst.answer is None:
        print("{}: {:.3f}\r".format(n,bst.pos),end="")
        size = sp_size_test(n,e_c=bst.pos,raw=raw,**hypers)
        print(size)
        print((not size[2] and (size[0]+.5)<gpu_space))
        query = not (not size[2] and (size[0]+.5)<gpu_space)
        bst.query(query, size[1])
    print(bst.pass_dict)
    sizes[n]=max(bst.passes)

if any([v for (k,v) in sizes.items() if v==1]):
    start_size = [k for (k,v) in sizes.items() if v==1][-1]+1
else:
    start_size = hypers['num_patterns']['init']
print("Comp Ratios:",*["\n{}->{}: {:.3f}".format(k,k+1,v) for (k,v) in sizes.items()])

(7.884765625, 1.0, False)
False
(6.357421875, 1.0, False)
True
(7.013671875, 1.0, False)
True
(7.916015625, None, True)
False
(7.564453125, 1.0, False)
False
(7.568359375, 1.0, False)
False
(7.240234375, 0.9999999403953552, False)
True
{0.4000000000000001: 1.0, 0.5000000000000001: 1.0, 0.4750000000000001: 0.9999999403953552}
(7.923828125, None, True)
False
(6.947265625, 1.0, False)
True
(7.9140625, None, True)
False
2: 0.450

KeyboardInterrupt: 

# Search

## Model Setup

In [8]:
def jn_print(x,end="\n"):
    print(x,end=end)
    with open("logs/jn_out.log","a") as f:
        f.write(x+end)
        
if search:
    # load data
    data, data_shape = load_data(hypers['batch_size'], hypers['dataset'])
    lr_scheduler = schedule_generator(hypers['lr_schedule'])
    
    # init model
    model = Net(dim=data_shape, 
                classes=hypers['classes'], 
                scale=hypers['scale'],
                patterns=hypers['patterns'], 
                num_patterns=start_size,
                nodes=hypers['nodes'],
                auxiliary=True,
                half=hypers['half'])
    model.save_genotype()
    size, overflow = size_test(model, data, )
    print(model)
    print("Est Size: {}{:.2f}GiB {}".format(">" if overflow else "", size, "(overflow)" if overflow else "")) 
    if overflow:
        del model
        clean('Search init')
    
    # search params
    init_or_finish = True

[39872]
[39872, 39872]
[39872, 39872, 39872]
[39872, 39872, 39872, 39872]
             Boylston Senora Belshazzar             
Initializer         :                    160 params
Cell 0  (Normal)    :  32 x 32 ,      41,209 params
Cell 1  (Normal)    :  32 x 32 ,      41,210 params
Cell 2  (Normal)    :  32 x 32 ,      41,211 params
Cell 3  (Normal)    :  32 x 32 ,      41,212 params
 ↳ Classifier       :                327,690 params
Total               :                492,692 params

Est Size: 7.67GiB 


## Model Search

In [9]:
if search:
    prev_output() if not init_or_finish else wipe_output()
    search_start = time.time()
    
    # search parameters
    transition = schedule['cycle_len']*schedule['transition_after']
    # search loop
    for n in range(start_size,hypers['num_patterns']['final']):
        comp_ratio = sizes[n]
        aim = comp_ratio*.9 if comp_ratio>.3 else comp_ratio*.66
        jn_print("=== {} Patterns. Target Comp: {:.2f}, Aim: {:.2f}".format(n, comp_ratio,aim))

        tries = 1
        for tries in range(1,10):
            if tries == 1:
                epochs = schedule['cycle_len']*schedule['n_cycles']
                comp_lambdas = TransitionDict({0: None, transition: {'edge': tries, 'input': tries}})
            else:
                epochs = transition
                comp_lambdas = TransitionDict({0: {'edge': tries, 'input': tries}})
            init_or_finish = full_train(
                model,
                data,
                resume=not init_or_finish,
                epochs=epochs,
                drop_prob=hypers['drop_prob'],
                comp_lambdas=comp_lambdas,
                comp_ratio=aim,
                prune_interval=schedule['cycle_len'],
                lr_schedule=lr_scheduler(epochs),
                half=hypers['half']
            );
            clean(verbose=False)
            edge_comp, input_comp = model.genotype_compression()
            if edge_comp > sizes[n]:
                jn_print("Try {}. Restarting pruning at pattern {}. Target comp: {:.2f}/{:.2f}, Actual: {:.3f}".format(tries,n,comp_ratio,aim,edge_comp))
            else:
                break

        if init_or_finish:
            model.save_genotype()
            clean("Pre-add")
            print("Adding next pattern:",n+1)
            if n != hypers['num_patterns']['final']:
                model.add_pattern()
            model_id = model.model_id
        else: 
            print("No progress after 10 tries, aborting.")
            clean()
            break
    #model.remove_prune
    rs()
    clean("Search End")
    print(size_test(model,data))
    print("Search Time:",show_time(time.time()-search_start))
    print(model)
    e_c,i_c = model.genotype_compression()

=== 1 Patterns. Target Comp: 0.55, Aim: 0.50
=== Training Boylston Senora Belshazzar ===
Starting at 2019-10-25 14:05:41.519734
10/25/2019 02:05 PM
Init: 50.00MiB
0: 2.07GiB
1: 3.94GiB
2: 5.80GiB
3: 7.67GiB
GP: 7.67GiB
Classifier: 7.67GiB
Train Corrects: Top-1: 38.37%, 4m,55s
MGC: tensor(1., device='cuda:0')
All Towers Test  Corrects: Top-1: 51.34%, 15.73s
Last Tower Test  Corrects: Top-1: 51.34%, 15.73s

[31mAdjusting lr to 0.01[0m
10/25/2019 02:10 PM
Train Corrects: Top-1: 49.31%, 4m,55s
MGC: tensor(1., device='cuda:0')
All Towers Test  Corrects: Top-1: 59.23%, 15.65s
Last Tower Test  Corrects: Top-1: 59.23%, 15.65s

[31mAdjusting lr to 0.009903926402016152[0m
10/25/2019 02:16 PM
Train Corrects: Top-1: 54.22%, 4m,55s
MGC: tensor(1., device='cuda:0')
All Towers Test  Corrects: Top-1: 65.18%, 15.71s
Last Tower Test  Corrects: Top-1: 65.18%, 15.71s

[31mAdjusting lr to 0.009619397662556433[0m
10/25/2019 02:21 PM
Train Corrects: Top-1: 57.61%, 4m,54s

Deadheaded 5 operations
Param 

KeyboardInterrupt: 

In [None]:
mem_stats()
clean("here")

# Train

In [None]:
prev_output() if not init_or_finish else wipe_output()
print(model)
init_or_finish = full_train(
    model, 
    data,
    lr_schedule = lr_scheduler(hypers['epochs']),
    resume=not init_or_finish,
    epochs=hypers['epochs'],
    drop_prob=hypers['drop_prob'],
);
clean()

# Random Search

In [None]:
e_c,i_c=.25,1

In [None]:
data,data_shape = load_data(hypers['batch_size'], hypers['dataset'])
model = Net(dim=data_shape, 
            classes=hypers['classes'], 
            scale=hypers['scale'],
            num_patterns=hypers['num_patterns']['final'],
            patterns=hypers['patterns'],
            nodes=hypers['nodes'],
            random_ops={'e_c':e_c,'i_c':i_c}, 
            prune=False,
            auxiliary=True,
            half=hypers['half'])
model.save_genotype()
print(model)
print(size_test(model,data))
model.genotype_compression()

In [None]:
data,data_shape = load_data(hypers['batch_size']*2, hypers['dataset'])
full_train(
    model, data,
    lr_schedule = lr_scheduler(hypers['epochs']),
    epochs=hypers['epochs'],
    drop_prob=hypers['drop_prob'],
    half=hypers['half'],
);