## 1. Importing necessary packages

In [1]:
# Basic python packages
import numpy as np
import pandas as pd
import os, math, time, pickle
import matplotlib.pyplot as plt
from tqdm import tqdm
from operator import itemgetter
from IPython.display import clear_output

In [2]:
# Pytorch packages
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import random_split
from sklearn.model_selection import train_test_split
from torchvision import models, datasets, transforms
from components import ToRGB, BetaNet2, BetaNet3, EffNetV1, EffNetV2, MobileNetV2, ResNet50
device = torch.device('cuda')
torch.cuda.device_count()

  from .autonotebook import tqdm as notebook_tqdm


6

## 2. Training, validation, and test datasets

In [3]:
# Set up train and validation datasets
norm_stats = ((0.5071, 0.4866, 0.4409),(0.2009, 0.1984, 0.2023)) # CIFAR100 training set normalization constants
R = 384
train_transform = transforms.Compose([
    transforms.AutoAugment(policy = transforms.autoaugment.AutoAugmentPolicy.CIFAR10),
    transforms.RandomHorizontalFlip(),
    transforms.RandomResizedCrop(R),
    transforms.ToTensor(), # Also standardizes to range [0,1]
    transforms.Normalize(*norm_stats),
])

valid_transform = transforms.Compose([
    transforms.Resize(R),
    transforms.ToTensor(), # Also standardizes to range [0,1]
    transforms.Normalize(*norm_stats),
])

ROOT = '/data/cifar100'
train_dataset = datasets.CIFAR100(root=ROOT, train=True, transform=train_transform, download=True)

# Hold-out this data for final evaluation
valid_dataset = datasets.CIFAR100(root=ROOT, train=False, transform=valid_transform, download=True)

print(f'Train: {len(train_dataset):,.0f}, Valid: {len(valid_dataset):,.0f}')

Files already downloaded and verified
Files already downloaded and verified
Train: 50,000, Valid: 10,000


## 3. Run training and evaluation routine.

In [4]:
# DDP training routine inputs
model_type = 'beta3'
world_size = 6
time_budget_mins = np.inf # minutes per trial
nepochs = 250
batch_size = 100
accumulate = 1
evaluate = True
saving = 'final'

# Import or define model parameters
with open('nas_results/beta3/R224/trial_results/best_parameters.pkl', 'rb') as handle:
    model_parameters = pickle.load(handle)
model_parameters["R"] = R
model_parameters["output_size"] = 100

%run -i bte_ddp.py

with open(f'experiment_results/{model_type}_result.pkl', 'rb') as handle:
    results = pickle.load(handle)
    
# Unpack results object
avg_epoch_train_time = [results[e]["time"] for e in results]
train_loss_epoch = np.array([results[e]["tloss"] for e in results])
valid_loss_epoch = np.array([results[e]["vloss"] for e in results])
valid_top1accu_epoch = np.array([results[e]["top1"] for e in results])
valid_top5accu_epoch = np.array([results[e]["top5"] for e in results])

target = valid_top1accu_epoch.max()

# Save trial result measures to disk for inspection later.
payload = {
    'R': R,
    'time_budget_mins': time_budget_mins,
    'params': model_parameters,
    'tloss':train_loss_epoch,
    'vloss':valid_loss_epoch,
    'top1': valid_top1accu_epoch,
    'top5': valid_top5accu_epoch,
    'time': avg_epoch_train_time
}

with open(f'experiment_results/BetaNet3_384_results.pkl', 'wb') as handle:
    pickle.dump(payload, handle)

# Need to delete this because otherwise failed training is skipped and the file from last run is picked up instead.
os.remove(f'experiment_results/{model_type}_result.pkl')

Generated beta2 model with 30,011,085 params.
EPOCH 0, TLOSS 4.398, VLOSS 3.976, TOP1 7.73, TOP5 26.86, TIME 54.955
EPOCH 1, TLOSS 4.061, VLOSS 3.586, TOP1 13.86, TOP5 38.67, TIME 52.807
EPOCH 2, TLOSS 3.771, VLOSS 3.170, TOP1 20.97, TOP5 50.94, TIME 52.994
EPOCH 3, TLOSS 3.498, VLOSS 2.769, TOP1 28.86, TOP5 61.04, TIME 53.062
EPOCH 4, TLOSS 3.271, VLOSS 2.482, TOP1 34.86, TOP5 67.28, TIME 53.082
EPOCH 5, TLOSS 3.069, VLOSS 2.314, TOP1 38.38, TOP5 71.11, TIME 53.099
EPOCH 6, TLOSS 2.904, VLOSS 2.141, TOP1 42.40, TOP5 74.54, TIME 53.275
EPOCH 7, TLOSS 2.761, VLOSS 2.016, TOP1 45.31, TOP5 76.91, TIME 53.222
EPOCH 8, TLOSS 2.662, VLOSS 1.912, TOP1 48.50, TOP5 79.11, TIME 53.202
EPOCH 9, TLOSS 2.580, VLOSS 1.865, TOP1 49.08, TOP5 79.44, TIME 53.164
EPOCH 10, TLOSS 2.473, VLOSS 1.663, TOP1 53.82, TOP5 82.90, TIME 53.077
EPOCH 11, TLOSS 2.403, VLOSS 1.602, TOP1 55.22, TOP5 84.24, TIME 53.121
EPOCH 12, TLOSS 2.318, VLOSS 1.575, TOP1 56.04, TOP5 84.61, TIME 53.236
EPOCH 13, TLOSS 2.281, VLOSS 