In [1]:
import os, sys, time, torch, torchvision

sys.path.append(os.path.abspath(os.path.join('../..')))  # Allow repository modules to be imported

from utils.optimization import initialize

experiment = {
    'dataset': 'mnist',
    'classes': 10,
    'seed': 1234,
    'input_dimension': 28 * 28,
    'batch_size': 64,
    'epochs': 10000,
    'learning_rate': 0.001,
    'initial_hidden_units': 2,
    'initial_depth': 1,
    'bias': True,
    'convergence_epsilon': 0.00001,
    'target_accuracy': .95
}
override_saved_experiment = False
device, generator = initialize(experiment['seed'])
transforms = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.1307,), (0.3081,)),
    torchvision.transforms.Lambda(torch.flatten)
])
train_dataset = torchvision.datasets.MNIST(root='./data/', train=True, download=True, transform=transforms)
train_data = torch.utils.data.DataLoader(train_dataset, batch_size=experiment['batch_size'], shuffle=True, generator=generator, drop_last=True)
test_dataset = torchvision.datasets.MNIST(root='./data/', train=False, download=True, transform=transforms)
test_data = torch.utils.data.DataLoader(test_dataset, batch_size=experiment['batch_size'], shuffle=False, generator=generator)
experiment['sample_size'] = len(train_data) * experiment['batch_size']

In [2]:
import ipycanvas

training_canvas, growth_metrics_canvas = ipycanvas.Canvas(), ipycanvas.Canvas()
training_canvas.width, training_canvas.height = 1200, 600
growth_metrics_canvas.width = 1200; growth_metrics_canvas.height = 400
training_canvas.font = growth_metrics_canvas.font = "30px arial"
args = ('Results will appear as processed', training_canvas.width / 4, training_canvas.height / 3)
training_canvas.fill_text(*args); growth_metrics_canvas.fill_text(*args)

In [3]:
training_canvas

Canvas(height=600, width=1200)

In [4]:
growth_metrics_canvas

Canvas(height=400, width=1200)

In [5]:
from utils.optimization import train, test, Accuracy, get_random_states, set_random_states
from utils.persistance import experiment_exists, load_experiment, save_experiment
from plots import plot_train_loss_and_accuracy, plot_growth_metrics
from growing_neural_network import GrowingNeuralNetwork

growing_epochs_interval = 20
device, generator = initialize(experiment['seed'])
if not override_saved_experiment and experiment_exists(**experiment):
    experiment = load_experiment(**experiment)
    model = GrowingNeuralNetwork.load(**experiment).to(device)
    generator = set_random_states(experiment['random_states'])
    plot_train_loss_and_accuracy(**experiment, canvas=training_canvas)
    plot_growth_metrics(**experiment, canvas=growth_metrics_canvas)

else: 
    model = GrowingNeuralNetwork(**experiment).to(device)

train_data.generator = test_data.generator = generator
train_loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=experiment['learning_rate'])
if 'train' not in experiment:
    experiment.update({
        'train': [test(train_data, model, train_loss, device)],
        'train_time': [0],
        'test': [test(test_data, model, train_loss, device, verbose=False)],
        'train_accuracy': [test(train_data, model, Accuracy, device, verbose=False)],
        'test_accuracy': [test(test_data, model, Accuracy, device, verbose=False)],
        'architecture': model.architecture,
        'growth_metrics': []
    })

epochs_already_executed = len(experiment['train'])
for epoch in range(epochs_already_executed, experiment['epochs'] + 1):
    start_time = time.time()
    train(train_data, model, train_loss, optimizer, device, verbose=False)
    end_time = time.time()
    train_time = experiment['train_time'][-1] + end_time - start_time
    train_loss_value = test(train_data, model, train_loss, device, 
                            calculate_gradients=True, retain_graph=True, verbose=False)
    model.calculate_growth_metrics()
    if ((epoch % growing_epochs_interval == 0 or epoch == experiment['epochs']) and 
        abs(experiment['train'][-1] - experiment['train'][-2]) < experiment['convergence_epsilon']):
        print(f'Convergence achieve according to convergence_epsilon = {experiment["convergence_epsilon"]}')
        if model.remove_dead_units() or model.grow():
            optimizer.zero_grad()
            optimizer = torch.optim.SGD(model.parameters(), lr=experiment['learning_rate'])
            experiment['architecture'] = model.architecture

    test_loss_value = test(test_data, model, train_loss, device, verbose=False)
    train_accuracy = test(train_data, model, Accuracy, device, verbose=False)
    test_accuracy = test(test_data, model, Accuracy, device, verbose=False)
    experiment['train'].append(train_loss_value)
    experiment['train_time'].append(train_time)
    experiment['test'].append(test_loss_value)
    experiment['train_accuracy'].append(train_accuracy)
    experiment['test_accuracy'].append(test_accuracy)
    experiment['growth_metrics'] += [{**growth_metrics, 'epoch': epoch} for growth_metrics in model.growth_metrics]
    if epoch % growing_epochs_interval == 0 or epoch == experiment['epochs']:
        model.save()
        experiment['random_states'] = get_random_states()
        save_experiment(experiment)
        plot_train_loss_and_accuracy(**experiment, canvas=training_canvas)
        plot_growth_metrics(**experiment, canvas=growth_metrics_canvas)

Convergence achieve according to convergence_epsilon = 1e-05
Width growth: Two unit with opposing signs were added to layer 0 which now has 4 units
