
# WWLayerIterator

This Notebook explains how to use the internal WeightWatcher Layer Iterators

Includes the WWStackerLayerIterator

- Stacks all weight matrices into a single, large, rectangular matrix

- Probably should normalize each layer in some consistent way *(not done yet)








In [1]:
# Suppress the powerlaw package warnings
# "powerlaw.py:700: RuntimeWarning: divide by zero encountered in true_divide"
# "powerlaw.py:700: RuntimeWarning: invalid value encountered in true_divide"
import warnings
warnings.simplefilter(action='ignore', category=RuntimeWarning)

In [2]:
import numpy as np
import pandas as pd

from tqdm import tqdm

import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

%load_ext watermark
%watermark

2021-09-10T21:37:36-07:00

CPython 3.8.5
IPython 7.18.1

compiler   : Clang 10.0.0 
system     : Darwin
release    : 17.7.0
machine    : x86_64
processor  : i386
CPU cores  : 12
interpreter: 64bit


### Import WeightWatcher

set custom Logging at WARN Level

In [3]:
import logging

import weightwatcher as ww
import torchvision.models as models

logger = logging.getLogger(ww.__name__)
logger.setLevel(logging.WARNING)

ww.__version__

'0.5.3'

###  How to create a WWLayerIterator

In [4]:
model = models.vgg11(pretrained=True)
watcher = ww.WeightWatcher(model=model)
ww_layer_iterator = watcher.make_layer_iterator(model=model)

### The Iterator lets you loop over WWLayer instances

- The WWLayer instance (object) is a wrapper to the underlying framework layers

- The intent is to only access the WWLayer instance and not the underlying framework methods

- This lets weightwatcher apply different functions / transformations on each layer individually



In [5]:
for ww_layer in ww_layer_iterator:
    print(ww_layer)

WWLayer 2  None PYTORCH CONV2D  skipped False
WWLayer 5  None PYTORCH CONV2D  skipped False
WWLayer 8  None PYTORCH CONV2D  skipped False
WWLayer 10  None PYTORCH CONV2D  skipped False
WWLayer 13  None PYTORCH CONV2D  skipped False
WWLayer 15  None PYTORCH CONV2D  skipped False
WWLayer 18  None PYTORCH CONV2D  skipped False
WWLayer 20  None PYTORCH CONV2D  skipped False
WWLayer 25  None PYTORCH DENSE  skipped False
WWLayer 28  None PYTORCH DENSE  skipped False
WWLayer 31  None PYTORCH DENSE  skipped False


In [6]:
type(ww_layer_iterator)

weightwatcher.weightwatcher.WWLayerIterator

In [7]:
from weightwatcher.weightwatcher import WWLayerIterator

In [8]:
from weightwatcher.constants import LAYER_TYPE

In [57]:

class WWStackedLayerIteratorB(WWLayerIterator):
    """Iterator variant that sticaks all weight matrics into a single WWLayer"""
    from copy import deepcopy

    def ww_stacked_iter_(self):
        from copy import deepcopy
        
        # find the maximum dimensions so we can pad the matrices
        ww_stacked_layer = None
        Wmats = []
        for ww_layer in self.ww_layer_iter_():
            
            # Here, Ijust lazizy copy an older layer
            # really, we should creat the WWLayer using the init() constructor
            if ww_stacked_layer is None:
                ww_stacked_layer =  deepcopy(ww_layer)
                ww_stacked_layer.the_type =  LAYER_TYPE.STACKED
                ww_stacked_layer.layer_id = 0  
                ww_stacked_layer.name = "Example Stacked Layer"
            
            if len(ww_layer.Wmats)>1:
                W = np.hstack(ww_layer.Wmats)
                print("hstacked: ",W.shape)
            else:
                W = ww_layer.Wmats[0]                
            Wmats.extend(W)
            
        print(len(Wmats))
        for W in Wmats:
            print(W.shape) 
       
        yield ww_stacked_layer
                
    def make_layer_iter_(self):
        return self.ww_stacked_iter_()

In [58]:
stacked_iter = WWStackedLayerIteratorB(model=model)

In [59]:
ww_layer = None
for ww_layer in stacked_iter:
    print(ww_layer)

hstacked:  (64, 27)
hstacked:  (128, 576)
hstacked:  (256, 1152)
hstacked:  (256, 2304)
hstacked:  (512, 2304)
hstacked:  (512, 4608)
hstacked:  (512, 4608)
hstacked:  (512, 4608)
11944
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(27,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(576,)
(57

(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(

(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(25088,)
(

(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)


(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)
(4096,)


In [18]:
ww_layer.N, ww_layer.M

AttributeError: 'NoneType' object has no attribute 'N'

### Notice: The final matrix is quite large so the SVD will take some time

In [None]:
W = layer.Wmats[0]

In [None]:
from sklearn.decomposition import TruncatedSVD
svd = TruncatedSVD(n_components=5000)
svd.fit(W)
svals = svd.singular_values_

In [None]:
plt.hist(svals, bins=100);
plt.title("VGG11 SVs, all layer W's naively padded and stacked")
plt.xlabel(r"Singular Values,  $\sigma_{i}$")
plt.ylabel(r"Density, $\rho(\sigma)$")

In [None]:
evals = svals*svals
plt.hist(np.log10(evals), bins=100);
plt.title("VGG11 ESD, all layer W's naively padded and stacked")
plt.xlabel(r"Log EigenValues,  $\log\;\lambda_{i}$")

In [None]:
evals_nz = evals[evals>0.01]
plt.hist(np.log10(evals_nz), bins=100);
plt.title("VGG11 non-zero ESD , all layer W's naively padded and stacked")
plt.xlabel(r"Log EigenValues,  $\log\;\lambda,\;\;\lambda>0.01$")

In [None]:
plt.loglog(evals);
plt.title("VGG11 Stacked ESD, log log plot")


In [None]:
plt.loglog(evals_nz);
plt.title("VGG11 Stacked non-zero ESD, log log plot")

In [None]:
import powerlaw

In [None]:
results = powerlaw.Fit(evals_nz)
results.alpha

In [None]:
layer.evals = evals_nz

#### The savefig option is not working correctly...

In [None]:
params = {'glorot_fix': False, 'normalize':False, 'conv2d_norm':False, 'randomize': True, 
                  'savedir':'ww-img', 'savefig':True, 'rescale':True, 'plot':True,
                  'deltaEs':False, 'intra':False, 'channels':None, 'conv2d_fft':False, 
                  'ww2x':False, 'vectors':False, 'smooth':None}

In [None]:
watcher.apply_fit_powerlaw(layer, params=params)

In [None]:
W = layer.Wmats[0]
Wrand = W.flatten()
np.random.shuffle(Wrand)
np.random.shuffle(Wrand)
np.random.shuffle(Wrand)
np.random.shuffle(Wrand)
np.random.shuffle(Wrand)

W = Wrand.reshape(W.shape)
W = W.astype(float)

svd = TruncatedSVD(n_components=5000)
svd.fit(W)
svals = svd.singular_values_
rand_evals =  svals*svals
layer.rand_evals = rand_evals

In [None]:
plt.hist(np.log10(layer.rand_evals), bins=100, color='red', alpha=0.5, density=True, label='random');
plt.hist(np.log10(evals), bins=100, color='green', alpha=0.5, density=True, label='stacked');
plt.legend()
plt.xlabel("Log10 Eigenvalues")
plt.title("log10 Eigenvalues: n\n Stacked Layers X  vs Randomized(X) ")

In [None]:
layer.evals = layer.rand_evals
watcher.apply_fit_powerlaw(layer, params=params)

In [None]:
details =  watcher.describe(model)
details


In [None]:
Ntot = int(np.sum(details.N.to_numpy()))
Mtot = int(np.sum(details.M.to_numpy()*np.sqrt(details.rf.to_numpy())))
Ntot, Mtot

In [None]:
Ntot/ Mtot, Ntot/3

In [None]:
layer.N = Ntot
layer.M = Ntot/20
watcher.apply_mp_fit(layer, params=params)