# Notebook initialization

## Selecting best GPU

In [1]:
import os
os.environ['PYTHONHASHSEED'] = '0'
import gpustat

stats = gpustat.GPUStatCollection.new_query()
ids = map(lambda gpu: int(gpu.entry['index']), stats)
ratios = map(lambda gpu: float(gpu.entry['memory.used'])/float(gpu.entry['memory.total']), stats)
bestGPU = 3#min(zip(ids, ratios), key=lambda x: x[1])[0]

print("setGPU: Setting GPU to: {}".format(bestGPU))
os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES'] = str(bestGPU)

setGPU: Setting GPU to: 3


##  Global variables

In [2]:
# IPython extension to reload modules before executing user code.
# 'autoreload' reloads modules automatically before entering the execution of code typed at the IPython prompt.
%load_ext autoreload
%autoreload 2
import sys
sys.path.insert(0, "../")

simulation_id = 4
weights_path = lambda wid, name: '../weights/{}/{}/'.format(wid, name)
dw_path = lambda name: 'dw/{}/{}'.format(simulation_id, name)
figures_path = 'figures/{}/'.format(simulation_id)

## Seed initialization (for reproductible results)

In [3]:
# Setting the seed for NumPy generated random numbers
import numpy as np
np.random.seed(0)

# Setting the seed for Python random numbers
import random as rn
rn.seed(0)

# Setting the seed for TensorFlow random numbers
import tensorflow as tf
tf.set_random_seed(0)

#from keras.backend.tensorflow_backend import set_session
#config = tf.ConfigProto()
#config.gpu_options.per_process_gpu_memory_fraction = 0.333
#set_session(tf.Session(config=config))

# Importing weights

In [4]:
import sys
sys.path.insert(0, "../")

from models import VGG16_Vanilla, VGG16, VGG16_beta, VGG16_Vanilla_beta
import numpy as np
import keras
import matplotlib.pyplot as plt
from collections import defaultdict
from numpy.linalg import norm
import itertools
from keras_utils import get_kernel_layer_names, get_kernel_layer, get_kernel_weights

import matplotlib
matplotlib.use('nbagg')
import matplotlib.pyplot as plt
from collections import OrderedDict
import pathlib
import pickle

Using TensorFlow backend.


In [5]:
# Simply loading both the initial and finals weights of our models

models = [
    (3, 'vgg16_vanilla_layca_bad', VGG16_Vanilla_beta(input_shape=(32,32,3), num_classes=10)),
    (1, 'vgg16_vanilla_beta', VGG16_Vanilla_beta(input_shape=(32,32,3), num_classes=10)),
    (1, 'vgg16_beta', VGG16_beta(input_shape=(32,32,3), num_classes=10, weight_decay=0.005)),
    (1, 'vgg16_vanilla_layca', VGG16_Vanilla_beta(input_shape=(32,32,3), num_classes=10))
]

layer_names = [name.split('_', 1)[0] for name in get_kernel_layer_names(models[0][2])]
n_layers = len(get_kernel_layer_names(models[0][2]))

In [6]:
for (_, name, model) in models:
    pathlib.Path(figures_path + 'byepoch/initial/distribution').mkdir(parents=True, exist_ok=True)
    pathlib.Path(figures_path + 'byepoch/initial/histogram').mkdir(parents=True, exist_ok=True)
    #pathlib.Path(figures_path(name) + 'byepoch/initial/mean').mkdir(parents=True, exist_ok=True)
    pathlib.Path(figures_path + 'byepoch/final/distribution').mkdir(parents=True, exist_ok=True)
    pathlib.Path(figures_path + 'byepoch/final/histogram').mkdir(parents=True, exist_ok=True)
    #pathlib.Path(figures_path(name) + 'byepoch/initial/mean').mkdir(parents=True, exist_ok=True)

In [7]:
def calculate_weight_difference(w_ref, w):

    dw =  np.absolute(w - w_ref)
    #dW[name][i] /= np.amax(dW[name][i])
    
    return dw

In [8]:
W_i = list()
for wid, name, model in models:
    model.load_weights(weights_path(wid, name) + 'initial/weights-initial.hdf5') 
    W_i.append(get_kernel_weights(model))

In [19]:
W_f = list()
for wid, name, model in models:
    model.load_weights(weights_path(wid, name) + 'byepoch/weights-50.hdf5') 
    W_f.append(get_kernel_weights(model))

In [9]:
import progressbar

In [10]:
import time

In [20]:
for epoch in range(49, 50):
    start_time = time.time()
    
    fig1, axes1 = plt.subplots(nrows=n_layers, ncols=len(models), figsize=(15, 30))
    #fig2, axes2 = plt.subplots(nrows=n_layers, ncols=len(models), figsize=(15, 30))
    #fig3, axes3 = plt.subplots(nrows=1,        ncols=len(models), figsize=(20, 5))
    bar = progressbar.ProgressBar(maxval=len(models)*n_layers, widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()])
    
    print("epoch {}".format(epoch))
    
    W = list()
    
    for j, (wid, name, model) in enumerate(models):
        
        if epoch == 0:
            model.load_weights(weights_path(wid, name) + 'initial/weights-initial.hdf5') 
        elif epoch == 100:
            model.load_weights(weights_path(wid, name) + 'final/weights-final.hdf5') 
        else:
            model.load_weights(weights_path(wid, name) + 'byepoch/weights-{:02d}.hdf5'.format(epoch)) 
            
        W.append(get_kernel_weights(model))

    bar.start()
    for i in range(n_layers):
        dw_max = 0
        occ_max = 0
        
        for j, (wid, name, model) in enumerate(models):
                  
            means = defaultdict(list)

            w = W[j][i].flatten()
            w_i = W_i[j][i].flatten()
            #w_f = W_f[j][i].flatten()
            fuck = (w - w_i)#/np.linalg.norm(w_f, np.inf)
            dw = np.absolute(fuck)
            dw_max = max(np.max(dw), dw_max)
            
            dw = -np.sort(-dw)
            
            mean = np.mean(dw)

            axes1[i,j].plot(list(range(len(dw))), dw)
            axes1[i,j].set(xlabel='weight index', ylabel='difference in weight ' + r'$\Delta W$', title=layer_names[i])
            axes1[i,j].plot(list(range(len(dw))), np.ones(len(dw))*mean, linestyle='--')            

            #n, b, patches = axes2[i,j].hist(dw, bins=np.arange(min(dw), max(dw) + 0.001, 0.001))
            #occ_max = max(np.max(n), occ_max)
            
            #axes2[i,j].set(xlabel=r'$\Delta W$', ylabel='occurrence', title=layer_names[i])
            #axes2[i,j].axvline(dw.mean(), color='r', linestyle='dashed', linewidth=1) 
            
            bar.update(len(models)*i+j)

        #for j, (wid, name, model) in enumerate(models):
            #axes1[i,j].set_ylim((0, dw_max))
            #axes2[i,j].set_xlim((0, dw_max))            
            #axes2[i,j].set_ylim((0, occ_max))   
            
    for fname, fig in zip(["initial/distribution/dw_distribution", "initial/histogram/dw_histogram"], [fig1]):
        fig.tight_layout()
        fig.savefig(figures_path + 'byepoch/{}-{:02d}.png'.format(fname, epoch), dpi=100)
        plt.close(fig)
        
    bar.finish()
    
    del fig1
    #del fig2
    del axes1
    #del axes2
    del bar
    del W
    del dw
    del fuck
    del w
    #del w_f
    del w_i
    #del patches
    #del b
    #del n
    
    elapsed_time = time.time() - start_time
    print("finished in {} seconds".format(elapsed_time))

epoch 49




finished in 50.8206672668457 seconds





In [21]:
for epoch in range(49, 50):
    start_time = time.time()
    
    fig1, axes1 = plt.subplots(nrows=n_layers, ncols=len(models), figsize=(15, 30))
    #fig2, axes2 = plt.subplots(nrows=n_layers, ncols=len(models), figsize=(15, 30))
    #fig3, axes3 = plt.subplots(nrows=1,        ncols=len(models), figsize=(20, 5))
    bar = progressbar.ProgressBar(maxval=len(models)*n_layers, widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()])
    
    print("epoch {}".format(epoch))
    
    W = list()
    
    for j, (wid, name, model) in enumerate(models):
        
        if epoch == 0:
            model.load_weights(weights_path(wid, name) + 'initial/weights-initial.hdf5') 
        elif epoch == 100:
            model.load_weights(weights_path(wid, name) + 'final/weights-final.hdf5') 
        else:
            model.load_weights(weights_path(wid, name) + 'byepoch/weights-{:02d}.hdf5'.format(epoch)) 
            
        W.append(get_kernel_weights(model))

    bar.start()
    for i in range(n_layers):
        dw_max = 0
        occ_max = 0
        
        for j, (wid, name, model) in enumerate(models):
                  
            means = defaultdict(list)

            w = W[j][i].flatten()
            #w_i = W_i[j][i].flatten()
            w_f = W_f[j][i].flatten()
            fuck = (w - w_f)#/np.linalg.norm(w_f, np.inf)
            dw = np.absolute(fuck)
            dw_max = max(np.max(dw), dw_max)
            
            dw = -np.sort(-dw)
            
            mean = np.mean(dw)

            axes1[i,j].plot(list(range(len(dw))), dw)
            axes1[i,j].set(xlabel='weight index', ylabel='difference in weight ' + r'$\Delta W$', title=layer_names[i])
            axes1[i,j].plot(list(range(len(dw))), np.ones(len(dw))*mean, linestyle='--')            

            #n, b, patches = axes2[i,j].hist(dw, bins=np.arange(min(dw), max(dw) + 0.001, 0.001))
            #occ_max = max(np.max(n), occ_max)
            
            #axes2[i,j].set(xlabel=r'$\Delta W$', ylabel='occurrence', title=layer_names[i])
            #axes2[i,j].axvline(dw.mean(), color='r', linestyle='dashed', linewidth=1) 
            
            bar.update(len(models)*i+j)

        #for j, (wid, name, model) in enumerate(models):
            #axes1[i,j].set_ylim((0, dw_max))
            #axes2[i,j].set_xlim((0, dw_max))            
            #axes2[i,j].set_ylim((0, occ_max))   
            
    for fname, fig in zip(["final/distribution/dw_distribution", "final/histogram/dw_histogram"], [fig1]):
        fig.tight_layout()
        fig.savefig(figures_path + 'byepoch/{}-{:02d}.png'.format(fname, epoch), dpi=100)
        plt.close(fig)
        
    bar.finish()
    
    del fig1
    #del fig2
    del axes1
    #del axes2
    del bar
    del W
    del dw
    del fuck
    del w
    del w_f
    #del w_i
    #del patches
    #del b
    #del n
    
    elapsed_time = time.time() - start_time
    print("finished in {} seconds".format(elapsed_time))

epoch 49




finished in 49.533509254455566 seconds





In [22]:
for (_, name, model) in models:
    pathlib.Path(figures_path + 'byepoch/initial_normalized/distribution').mkdir(parents=True, exist_ok=True)
    pathlib.Path(figures_path + 'byepoch/initial_normalized/histogram').mkdir(parents=True, exist_ok=True)
    #pathlib.Path(figures_path(name) + 'byepoch/initial/mean').mkdir(parents=True, exist_ok=True)

In [None]:
for epoch in range(49, 50):
    start_time = time.time()
    
    fig1, axes1 = plt.subplots(nrows=n_layers, ncols=len(models), figsize=(15, 30))
    #fig2, axes2 = plt.subplots(nrows=n_layers, ncols=len(models), figsize=(15, 30))
    #fig3, axes3 = plt.subplots(nrows=1,        ncols=len(models), figsize=(20, 5))
    bar = progressbar.ProgressBar(maxval=len(models)*n_layers, widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()])
    
    print("epoch {}".format(epoch))
    
    W = list()
    
    for j, (wid, name, model) in enumerate(models):
        
        if epoch == 0:
            model.load_weights(weights_path(wid, name) + 'initial/weights-initial.hdf5') 
        elif epoch == 100:
            model.load_weights(weights_path(wid, name) + 'final/weights-final.hdf5') 
        else:
            model.load_weights(weights_path(wid, name) + 'byepoch/weights-{:02d}.hdf5'.format(epoch)) 
            
        W.append(get_kernel_weights(model))

    bar.start()
    for i in range(n_layers):
        dw_max = 0
        occ_max = 0
        
        for j, (wid, name, model) in enumerate(models):
                  
            means = defaultdict(list)

            w = W[j][i].flatten()
            w_i = W_i[j][i].flatten()
            w_f = W_f[j][i].flatten()
            fuck = (w - w_i)/np.linalg.norm(w_f)
            dw = np.absolute(fuck)
            dw_max = max(np.max(dw), dw_max)
            
            dw = -np.sort(-dw)
            
            mean = np.mean(dw)

            axes1[i,j].plot(list(range(len(dw))), dw)
            axes1[i,j].set(xlabel='weight index', ylabel='difference in weight ' + r'$\Delta W$', title=layer_names[i])
            axes1[i,j].plot(list(range(len(dw))), np.ones(len(dw))*mean, linestyle='--')            

            #n, b, patches = axes2[i,j].hist(dw, bins=np.arange(min(dw), max(dw) + 0.001, 0.001))
            #occ_max = max(np.max(n), occ_max)
            
            #axes2[i,j].set(xlabel=r'$\Delta W$', ylabel='occurrence', title=layer_names[i])
            #axes2[i,j].axvline(dw.mean(), color='r', linestyle='dashed', linewidth=1) 
            
            bar.update(len(models)*i+j)

        for j, (wid, name, model) in enumerate(models):
            axes1[i,j].set_ylim((0, dw_max))
            #axes2[i,j].set_xlim((0, dw_max))            
            #axes2[i,j].set_ylim((0, occ_max))   
            
    for fname, fig in zip(["initial_normalized/distribution/dw_distribution", "initial_normalized/histogram/dw_histogram"], [fig1]):
        fig.tight_layout()
        fig.savefig(figures_path + 'byepoch/{}-{:02d}.png'.format(fname, epoch), dpi=100)
        plt.close(fig)
        
    bar.finish()
    
    del fig1
    #del fig2
    del axes1
    #del axes2
    del bar
    del W
    del dw
    del fuck
    del w
    del w_f
    del w_i
    #del patches
    #del b
    #del n
    
    elapsed_time = time.time() - start_time
    print("finished in {} seconds".format(elapsed_time))

epoch 49




In [None]:
import sys

# These are the usual ipython objects, including this one you are creating
ipython_vars = ['In', 'Out', 'exit', 'quit', 'get_ipython', 'ipython_vars']

# Get a sorted list of the objects and their sizes
sorted([(x, sys.getsizeof(globals().get(x))) for x in dir() if not x.startswith('_') and x not in sys.modules and x not in ipython_vars], key=lambda x: x[1], reverse=True)