In [2]:
import numpy as np
import torch
from pytorch_lightning import Trainer, seed_everything
from pytorch_lightning.callbacks import ModelCheckpoint
from src.model import SuperNetMNIST

from pprint import pprint

import matplotlib.pyplot as plt
%matplotlib inline

AVAIL_GPUS = min(1, torch.cuda.device_count())


SEED = 42
seed_everything(SEED, workers=True)



Global seed set to 42


42

# Training of SuperNet

We train a network with uniform sampling from 8 possible architectures

1's and 0's mean switching on/off certain nodes in the architecture

In [None]:
configurations = ['1010', '1001', '0110', '0101'] # '1101', '0111', '1110', '1011'

model = SuperNetMNIST(is_train_mult=True)
trainer = Trainer(
    gpus=AVAIL_GPUS,
    max_epochs=20,
    callbacks=[ModelCheckpoint('logs/together')]
)
trainer.fit(model)

results_together = {}
for conf in configurations:
    model.sample_subnetwork(conf)
    results_together[conf] = trainer.test(model)

In [7]:
print('Training gradually together with random uniform choice')
print('Checking their accuracy on validation set')
results_together_cleaned = {key:value[0][f'{key}_val_acc'] for key, value in results_together.items()}
results_together_cleaned = sorted(results_together_cleaned.items(), key=lambda x: x[1])
pprint(results_together_cleaned)

Training gradually together with random uniform choice
Checking their accuracy on validation set
[('1010', 0.9556999802589417),
 ('0110', 0.9596999883651733),
 ('0101', 0.9646999835968018),
 ('1001', 0.9657999873161316)]


# Training together result 
Best subnetwork is the one with 5x5 convolution layers

# Training subnetworks separately

In [None]:
results_sep = {}
for conf in configurations:
    model = SuperNetMNIST(is_train_mult=False, flow_solo=conf)
    trainer = Trainer(
        gpus=AVAIL_GPUS,
        max_epochs=20,
        callbacks=[ModelCheckpoint(f'logs/separately_{conf}')]
    )
    trainer.fit(model)
    results_sep[conf] = trainer.test(model)

In [8]:
print('Checking accuracy on validation set of separate subnetworks')
results_sep_cleaned = {key:value[0][f'{key}_val_acc'] for key, value in results_sep.items()}
results_sep_cleaned = sorted(results_sep_cleaned.items(), key=lambda x: x[1])
pprint(results_sep_cleaned)

Checking accuracy on validation set of separate subnetworks
[('1010', 0.954200029373169),
 ('0110', 0.9634000062942505),
 ('0101', 0.9641000032424927),
 ('1001', 0.9690999984741211)]


# Training network from full path '1111'

In [None]:
configurations = ['1111', '1010', '1001', '0110', '0101']

model = SuperNetMNIST(is_train_mult=False, flow_solo='1111')
trainer = Trainer(
    gpus=AVAIL_GPUS,
    max_epochs=20,
    callbacks=[ModelCheckpoint('logs/overall')]
)
trainer.fit(model)

results_full = {}
for conf in configurations:
    model.sample_subnetwork(conf)
    results_full[conf] = trainer.test(model)


In [10]:
print('Checking accuracy on validation set of full network')
results_full_cleaned = {key:value[0][f'{key}_val_acc'] for key, value in results_full.items()}
results_full_cleaned = sorted(results_full_cleaned.items(), key=lambda x: x[1])
pprint(results_full_cleaned)

Checking accuracy on validation set of full network
[('1010', 0.45660001039505005),
 ('0110', 0.5903000235557556),
 ('0101', 0.8687999844551086),
 ('1001', 0.8898000121116638),
 ('1111', 0.972000002861023)]


# Results:
1. 2 top architectures from "together" training remain top 2 after separate training
2. If we train with full paths 1111, network trains more "efficient" paths, more profitable paths are the same as in 1.

# Further steps:
1. Change dataset to something more challenging for architecture search
2. Add more paths/subnetworks for a next iteration
3. Add not uniformly random but some distribution of "dropoutness" during training