In [1]:
%matplotlib inline
import mpld3
mpld3.enable_notebook()

%load_ext autoreload

In [2]:
%autoreload
import sys
sys.path.insert(0, "../")

import warnings
import os
import time
import pickle

import numpy as np

from optimizers_llc import get_optimizer
from experiment_utils import import_cifar,history_todict, lr_schedule
from rotation_rate_utils import get_kernel_layer_names, LayerwiseParameterDistanceMemory
from models import VGG_pytorchBlog

from keras.callbacks import LearningRateScheduler
from keras.preprocessing.image import ImageDataGenerator

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
def load_results():
    if not os.path.isfile('impact_of_weight_decay.p'):
        return {}
    else:
        with open('impact_of_weight_decay.p','rb') as f:
            return pickle.load(f)

def dump_results(results):
    with open('impact_of_weight_decay.p','wb') as f:
        pickle.dump(dict(results),f)

def update_results(path, key, value):
    results = load_results()
    position = results
    for p in path:
        position = position[p]
    position.update({key:value})
    dump_results(results)

In [4]:
save_results = True
if not save_results:
    results = {}
monitor_file = 'monitor_weight_decay.txt' 

In [5]:
x_train, y_train, x_test, y_test = import_cifar()

model = VGG_pytorchBlog()
layer_names = get_kernel_layer_names(model)
initial_kernels = list(zip(layer_names,[model.get_layer(l).get_weights()[0] for l in layer_names]))

for optimizer in ['SGD']:

    if save_results:
        results = load_results()
        if optimizer not in results.keys():
            update_results([],optimizer,{})
    elif optimizer not in results.keys():
        results.update({optimizer:{}})

    for training_mode in ['normal']:
        if save_results:
            results = load_results()
            if training_mode not in results[optimizer].keys():
                update_results([optimizer],training_mode,{})
        elif training_mode not in results[optimizer].keys():
            results[optimizer].update({training_mode:{}})
        
        for weight_decay in [False, True]:
        
            start = time.time()
            if weight_decay:
                model = VGG_pytorchBlog()
            else:
                model = VGG_pytorchBlog(weight_decay = 0.)

            batch_size = 128
            verbose = 0
            if training_mode == 'normal':
                epochs = 250
                lr_bib = {'SGD':0.5, 'RMSprop':0.0003,'SGD_AMom':0.5,'Adam':0.0003,'Adagrad':0.01 }
                lr = lr_bib[optimizer]
                lr_scheduler = LearningRateScheduler(lr_schedule(lr,0.5,[i*25 for i in range(1,100)]))
            elif training_mode == 'llc':
                epochs = 250
                lr_bib = {'SGD':3**-3, 'RMSprop':3**-3,'SGD_AMom':3**-5,'Adam':3**-5,'Adagrad':3**-3 }
                lr = lr_bib[optimizer]
                lr_scheduler = LearningRateScheduler(lr_schedule(lr,0.2,[100,170,220]))

            batch_frequency = int((x_train.shape[0]/batch_size))+5 # once per epoch
            lpdm = LayerwiseParameterDistanceMemory(initial_kernels, batch_frequency = batch_frequency)

            model.compile(loss='categorical_crossentropy',
                          optimizer=get_optimizer(optimizer, training_mode, model, lr),
                          metrics=['accuracy', 'categorical_crossentropy'])

            with warnings.catch_warnings():
                warnings.simplefilter("ignore") # removes warning from keras for slow callback
                datagen = ImageDataGenerator(horizontal_flip=True)

                history = model.fit_generator(datagen.flow(x_train, y_train,batch_size=batch_size),
                                              steps_per_epoch=int(x_train.shape[0]/batch_size),
                                              epochs=epochs,
                                              validation_data=(x_test,y_test),
                                              verbose = verbose,
                                              callbacks = [lr_scheduler, lpdm])

            if save_results:
                update_results([optimizer,training_mode],weight_decay,{'history':history_todict(history),'lpdm':np.array(lpdm.memory)})
            else:
                results[optimizer][training_mode].update({weight_decay:{'history':history_todict(history),'lpdm':np.array(lpdm.memory)}})

            with open(monitor_file,'a') as file:
                file.write(optimizer+', '+training_mode+', '+str(weight_decay)+': done in '+str(time.time()-start)+' seconds.\n')