In [1]:
from numpy.random import seed
seed(1)
import tensorflow
tensorflow.random.set_seed(2)

In [2]:
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import scipy as sp
import sklearn
from sklearn.decomposition import TruncatedSVD
from matplotlib import pyplot as plt
from IPython.display import clear_output
import powerlaw
import tensorflow_addons as tfa
import copy
import weightwatcher as ww
import imageio
from datetime import datetime
import io
import cv2
# Suppress the powerlaw package warnings
# "powerlaw.py:700: RuntimeWarning: divide by zero encountered in true_divide"
# "powerlaw.py:700: RuntimeWarning: invalid value encountered in true_divide"
import warnings
warnings.simplefilter(action='ignore', category=RuntimeWarning)
import random

In [3]:
# Model / data parameters
num_classes = 10
inputShape = (28, 28, 1)

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255
# Make sure images have shape (28, 28, 1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

# shuffle training set and its labels accordingly
trainingIndexes = np.arange(0,y_train.shape[0]).tolist()
random.shuffle(trainingIndexes)
x_train = x_train[trainingIndexes,:,:,:]
y_train = y_train[trainingIndexes,:]

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


In [4]:
# LENET
model = keras.Sequential(
    [
        keras.layers.InputLayer(input_shape=inputShape),
        layers.Conv2D(filters=32, kernel_size=(5,5), padding='same', activation='relu'),
        layers.MaxPool2D(strides=2),
        layers.Conv2D(filters=48, kernel_size=(5,5), padding='valid', activation='relu'),
        layers.MaxPool2D(strides=2),
        layers.Flatten(),
        layers.Dense(256, activation='relu'),
        layers.Dense(84, activation='relu'),
        layers.Dense(10, activation='softmax'),
    ]
)
 
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 28, 28, 32)        832       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 10, 10, 48)        38448     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 48)          0         
_________________________________________________________________
flatten (Flatten)            (None, 1200)              0         
_________________________________________________________________
dense (Dense)                (None, 256)               307456    
_________________________________________________________________
dense_1 (Dense)              (None, 84)                2

In [5]:
trainingSize = 2000
testSize = 10000
batch_size = 128
epochs = 15
learningRate = .001

In [6]:
opt = keras.optimizers.Adam(learning_rate=learningRate)
 
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])
 
model.fit(x_train[0:trainingSize], y_train[0:trainingSize], batch_size=batch_size, epochs=epochs, validation_split=0)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x165a6865bb0>

In [7]:
# do a deep copy (keras clone) of the model and work with that
modelToSmooth = keras.models.clone_model(model)
modelToSmooth.build(model.input_shape) 
modelToSmooth.compile(loss=model.loss, optimizer=model.optimizer, metrics=["accuracy"])
modelToSmooth.set_weights(model.get_weights())
    
watcher = ww.WeightWatcher(model=modelToSmooth)
watcher.analyze()

INFO:weightwatcher:

python      version 3.8.10 (tags/v3.8.10:3d8993a, May  3 2021, 11:48:03) [MSC v.1928 64 bit (AMD64)]
numpy       version 1.19.5
tensforflow version 2.5.0
keras       version 2.5.0
INFO:weightwatcher:params {'glorot_fix': False, 'normalize': False, 'conv2d_norm': True, 'randomize': False, 'savefig': False, 'rescale': True, 'deltaEs': False, 'intra': False, 'channels': None, 'conv2d_fft': False, 'ww2x': False, 'min_evals': 0, 'max_evals': None, 'plot': False, 'mp_fit': False, 'layers': []}
INFO:weightwatcher:params {'glorot_fix': False, 'normalize': False, 'conv2d_norm': True, 'randomize': False, 'savefig': False, 'rescale': True, 'deltaEs': False, 'intra': False, 'channels': None, 'conv2d_fft': False, 'ww2x': False, 'min_evals': 0, 'max_evals': None, 'plot': False, 'mp_fit': False, 'layers': []}
INFO:weightwatcher:LAYER: 0 16  : <class 'tensorflow.python.keras.layers.convolutional.Conv2D'>
Assuming nested distributions
INFO:weightwatcher:compare dist=lognormal R=0.8

Unnamed: 0,layer_id,name,D,M,N,alpha,alpha_weighted,best_fit,entropy,has_esd,...,num_evals,num_pl_spikes,rank_loss,rf,sigma,spectral_norm,stable_rank,sv_max,xmax,xmin
0,0,,0.226329,1.0,32.0,5.223627,-5.102602,lognormal,0.995743,1.0,...,25.0,25.0,0.0,25.0,0.844725,0.10548,18.541426,0.324776,0.10548,0.060908
1,2,,0.083095,32.0,48.0,2.552763,-1.43294,truncated_power_law,0.920285,1.0,...,800.0,265.0,0.0,25.0,0.095385,0.274581,108.036262,0.524005,0.274581,0.037378
2,5,,0.072511,256.0,1200.0,3.779945,3.976407,truncated_power_law,0.966197,1.0,...,256.0,98.0,0.0,1.0,0.280817,11.271324,42.571344,3.357279,11.271324,1.920683
3,6,,0.170475,84.0,256.0,6.895635,4.237893,lognormal,0.961917,1.0,...,84.0,15.0,0.0,1.0,1.522246,4.116956,32.505039,2.029028,4.116956,2.629933
4,7,,0.25,10.0,84.0,13.822056,6.559663,truncated_power_law,0.979552,1.0,...,10.0,4.0,0.0,1.0,6.411028,2.982493,6.597403,1.72699,2.982493,2.391302


In [8]:
print("Training set accuracy " + str(modelToSmooth.evaluate(x_train[0:trainingSize], y_train[0:trainingSize], verbose=0)[1]))

Training set accuracy 0.9990000128746033


In [9]:
print("Test set accuracy " + str(modelToSmooth.evaluate(x_test[0:testSize], y_test[0:testSize], verbose=0)[1]))

Test set accuracy 0.9575999975204468


In [10]:
(_, nComponents) = watcher.unifiedSVDSmoothing(methodSelectComponents = "mp_spikes") 
# Other options are: 
# methodSelectComponents = "powerlaw_xmin" ==> default
# methodSelectComponents = "powerlaw_spikes"
# methodSelectComponents = "percentage", percent = 0.49
print("Components selected " + str(nComponents))

INFO:weightwatcher:params {'glorot_fix': False, 'normalize': False, 'conv2d_norm': True, 'randomize': False, 'savefig': False, 'rescale': True, 'deltaEs': False, 'intra': False, 'channels': None, 'conv2d_fft': False, 'ww2x': False, 'min_evals': 0, 'max_evals': None, 'plot': False, 'mp_fit': False, 'layers': [], 'percent': 0.2}
INFO:weightwatcher:params {'glorot_fix': False, 'normalize': False, 'conv2d_norm': True, 'randomize': False, 'savefig': False, 'rescale': True, 'deltaEs': False, 'intra': False, 'channels': None, 'conv2d_fft': False, 'ww2x': False, 'min_evals': 0, 'max_evals': None, 'plot': False, 'mp_fit': False, 'layers': [], 'percent': 0.2}
INFO:weightwatcher:LAYER: 0 16  : <class 'tensorflow.python.keras.layers.convolutional.Conv2D'>
INFO:weightwatcher:LAYER: 2 16  : <class 'tensorflow.python.keras.layers.convolutional.Conv2D'>
INFO:weightwatcher:LAYER: 5 4  : <class 'tensorflow.python.keras.layers.core.Dense'>
INFO:weightwatcher:LAYER: 6 4  : <class 'tensorflow.python.keras.

Components selected 465


In [11]:
print("Estimated test accuracy " + str(modelToSmooth.evaluate(x_train[0:trainingSize], y_train[0:trainingSize], verbose=0)[1]))

Estimated test accuracy 0.9904999732971191
