
# WWLayerIterator

This Notebook explains how to use the internal WeightWatcher Layer Iterators







In [1]:
# Suppress the powerlaw package warnings
# "powerlaw.py:700: RuntimeWarning: divide by zero encountered in true_divide"
# "powerlaw.py:700: RuntimeWarning: invalid value encountered in true_divide"
import warnings
warnings.simplefilter(action='ignore', category=RuntimeWarning)

In [2]:
import numpy as np
import pandas as pd

from tqdm import tqdm

import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

%load_ext watermark
%watermark

2021-07-31T15:29:26-07:00

CPython 3.7.4
IPython 7.17.0

compiler   : Clang 4.0.1 (tags/RELEASE_401/final)
system     : Darwin
release    : 17.7.0
machine    : x86_64
processor  : i386
CPU cores  : 12
interpreter: 64bit


### Import WeightWatcher

set custom Logging at WARN Level

In [3]:
import logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
logger.setLevel(logging.WARN)

import weightwatcher as ww
import torchvision.models as models


ww.__version__

'0.4.9'

###  How to create a WWLayerIterator

In [4]:
model = models.vgg19_bn(pretrained=True)
watcher = ww.WeightWatcher(model=model)
ww_layer_iterator = watcher.make_layer_iterator(model=model)

INFO:weightwatcher:

python      version 3.7.4 (default, Aug 13 2019, 15:17:50) 
[Clang 4.0.1 (tags/RELEASE_401/final)]
numpy       version 1.19.1
tensforflow version 2.1.0
keras       version 2.2.4-tf
INFO:weightwatcher:params {'glorot_fix': False, 'normalize': False, 'conv2d_norm': True, 'randomize': True, 'savefig': False, 'rescale': True, 'deltaEs': False, 'intra': False, 'channels': None, 'conv2d_fft': False, 'ww2x': False}


### The Iterator lets you loop over WWLayer instances

- The WWLayer instance (object) is a wrapper to the underlying framework layers

- The intent is to only access the WWLayer instance and not the underlying framework methods

- This lets weightwatcher apply different functions / transformations on each layer individually



In [5]:
for ww_layer in ww_layer_iterator:
    print(ww_layer)



WWLayer 2  None PYTORCH CONV2D  skipped False
WWLayer 5  None PYTORCH CONV2D  skipped False
WWLayer 9  None PYTORCH CONV2D  skipped False
WWLayer 12  None PYTORCH CONV2D  skipped False
WWLayer 16  None PYTORCH CONV2D  skipped False
WWLayer 19  None PYTORCH CONV2D  skipped False
WWLayer 22  None PYTORCH CONV2D  skipped False
WWLayer 25  None PYTORCH CONV2D  skipped False
WWLayer 29  None PYTORCH CONV2D  skipped False
WWLayer 32  None PYTORCH CONV2D  skipped False
WWLayer 35  None PYTORCH CONV2D  skipped False
WWLayer 38  None PYTORCH CONV2D  skipped False




WWLayer 42  None PYTORCH CONV2D  skipped False
WWLayer 45  None PYTORCH CONV2D  skipped False
WWLayer 48  None PYTORCH CONV2D  skipped False
WWLayer 51  None PYTORCH CONV2D  skipped False
WWLayer 57  None PYTORCH DENSE  skipped False
WWLayer 60  None PYTORCH DENSE  skipped False
WWLayer 63  None PYTORCH DENSE  skipped False


### The .__dict__ method lets you inspect what is actually in the iterator

In [6]:
li.__dict__

{'params': {'glorot_fix': False,
  'normalize': False,
  'conv2d_norm': True,
  'randomize': True,
  'savefig': False,
  'rescale': True,
  'deltaEs': False,
  'intra': False,
  'channels': None,
  'conv2d_fft': False,
  'ww2x': False},
 'k': 0,
 'model': VGG(
   (features): Sequential(
     (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
     (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     (2): ReLU(inplace=True)
     (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
     (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     (5): ReLU(inplace=True)
     (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
     (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
     (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     (9): ReLU(inplace=True)
     (10): Conv2d(128, 128, kernel_size=(

### The WWLayerIterator constructor method takes

- layers=[LAYER_ID, ...] to specify filters, as in the watcher.analyze(..,) and watcher.describe(...) methods

- other parameters, like ww2x and channels, are specified in the parameters dict



In [21]:
logger = logging.getLogger('weightwatcher') 
logger.setLevel(logging.DEBUG)


layers = [60]
DEFAULT_PARAMS = {'glorot_fix': False, 'normalize':False, 'conv2d_norm':True, 'randomize': True, 'savefig':False, 
                  'rescale':True , 'deltaEs':False, 'intra':False, 'channels':None, 'conv2d_fft':False, 
                  'ww2x':False}

params = DEFAULT_PARAMS
ww_layer_iterator = watcher.make_layer_iterator(model=model, layers=layers, params=params)



INFO:weightwatcher:params {'glorot_fix': False, 'normalize': False, 'conv2d_norm': True, 'randomize': True, 'savefig': False, 'rescale': True, 'deltaEs': False, 'intra': False, 'channels': None, 'conv2d_fft': False, 'ww2x': False}
DEBUG:weightwatcher:FRAMEWORKS: KERAS = 4  PYTORCH = 2 ONNX = 8 UNKNOWN = 1 
DEBUG:weightwatcher:FIRST = 2  LAST = 4 UNKNOWN = 1 
DEBUG:weightwatcher:MODEL ITERATOR, framework = 2, channels = 4 
INFO:weightwatcher:Filtering layer by id 60


#### Now only 1 layer is processed

In [22]:
for ww_layer in ww_layer_iterator:
    print(ww_layer)

INFO:weightwatcher:skipping layer 0 None by id
DEBUG:weightwatcher:layer_supported  N 0 max evals None
DEBUG:weightwatcher:Layer 0 None is skipped
INFO:weightwatcher:skipping layer 1 None by id
DEBUG:weightwatcher:layer_supported  N 0 max evals None
DEBUG:weightwatcher:Layer 1 None is skipped
DEBUG:weightwatcher:conv2D_Wmats
DEBUG:weightwatcher:Channels Last tensor shape: 64x3 (NxM), 3x3 (i,j)
DEBUG:weightwatcher:get_conv2D_Wmats N=64 M=3 rf= 9 channels = 4
INFO:weightwatcher:skipping layer 2 None by id
DEBUG:weightwatcher:layer_supported  N 64 max evals None
DEBUG:weightwatcher:Layer 2 None is skipped
INFO:weightwatcher:skipping layer 3 None by id
DEBUG:weightwatcher:layer_supported  N 0 max evals None
DEBUG:weightwatcher:Layer 3 None is skipped
INFO:weightwatcher:skipping layer 4 None by id
DEBUG:weightwatcher:layer_supported  N 0 max evals None
DEBUG:weightwatcher:Layer 4 None is skipped
DEBUG:weightwatcher:conv2D_Wmats
DEBUG:weightwatcher:Channels Last tensor shape: 64x64 (NxM), 3x

DEBUG:weightwatcher:layer_supported  N 0 max evals None
DEBUG:weightwatcher:Layer 33 None is skipped
INFO:weightwatcher:skipping layer 34 None by id
DEBUG:weightwatcher:layer_supported  N 0 max evals None
DEBUG:weightwatcher:Layer 34 None is skipped
DEBUG:weightwatcher:conv2D_Wmats
DEBUG:weightwatcher:Channels Last tensor shape: 512x512 (NxM), 3x3 (i,j)
DEBUG:weightwatcher:get_conv2D_Wmats N=512 M=512 rf= 9 channels = 4
INFO:weightwatcher:skipping layer 35 None by id
DEBUG:weightwatcher:layer_supported  N 512 max evals None
DEBUG:weightwatcher:Layer 35 None is skipped
INFO:weightwatcher:skipping layer 36 None by id
DEBUG:weightwatcher:layer_supported  N 0 max evals None
DEBUG:weightwatcher:Layer 36 None is skipped
INFO:weightwatcher:skipping layer 37 None by id
DEBUG:weightwatcher:layer_supported  N 0 max evals None
DEBUG:weightwatcher:Layer 37 None is skipped
DEBUG:weightwatcher:conv2D_Wmats
DEBUG:weightwatcher:Channels Last tensor shape: 512x512 (NxM), 3x3 (i,j)
DEBUG:weightwatcher:g

WWLayer 60  None PYTORCH DENSE  skipped False


### WWLayer  Instances

When a WWLayer instance is created, the weight matrices for the layer are extracted from the underlying framework tensor (i.e. layer.weights and layer.biases) and placed into WMats

- WMats = [W,W,W,...] contains 1 or more W matrices, of the same shape NxM, N > M
- evals: the *combined* evals for each layer
- rf = 1 or (k)x(k)  the size of the 'receptive field'



- layer_type =  an internal enum:    known layer types so far include:

<pre>
LAYER_TYPE.DENSE | LAYER_TYPE.CONV1D | LAYER_TYPE.CONV2D | LAYER_TYPE.FLATTENED |  LAYER_TYPE.EMBEDDING | LAYER_TYPE.NORM
</pre>

- channel_str:  string for channel type :  "FIRST" | "LAST" | "UNKNOWN"



### WeightWatcher Apply Methods

#### The various apply_xxx() methods use basic metaprogramming to set additional instance variables


- apply_filters()
- apply_normalize_Wmats()
- apply_esd()
- apply_random_esd()
- apply_plot_esd()
- apply_fit_powerlaw()
- apply_norm_metrics()
- apply_plot_deltaEs()
- apply_mp_fit()
- apply_svdsmoothing()


i.e the apply_esd() method runs SVD on all the WMats, then combines them into a single ESD

<code>
#
    def apply_esd(self, ww_layer, params=AULT_PARAMS):
            """run full SVD on layer weight matrices, compute ESD on combined eigenvalues, combine all..."""
#
...
#
    ww_layer.evals = evals
    ww_layer.add_column("has_esd", True)
    ww_layer.add_column("num_evals", len(evals))
    ww_layer.add_column("sv_max", sv_max)
    ww_layer.add_column("rank_loss", rank_loss)
    ww_layer.add_column("lambda_max", np.max(evals))    
#      
    return ww_layer
</code>       
        


### There are different Iterators for different ways of walking the Model Layers

In [24]:
type(ww_layer_iterator)

weightwatcher.weightwatcher.WWLayerIterator

In [26]:
from weightwatcher.weightwatcher import WWLayerIterator

In [37]:
from weightwatcher.constants import LAYER_TYPE

In [142]:

class WWStackedLayerIterator(WWLayerIterator):
    """Iterator variant that sticaks all weight matrics into a single WWLayer"""
    from copy import deepcopy

    def ww_stacked_iter_(self):
        from copy import deepcopy
        
        # find the maximum dimensions so we can pad the matrices
        ww_stacked_layer = None
        for ww_layer in self.ww_layer_iter_():
            
            # Here, Ijust lazizy copy an older layer
            # really, we should creat the WWLayer using the init() constructor
            if ww_stacked_layer is None:
                ww_stacked_layer =  deepcopy(ww_layer)
                ww_stacked_layer.the_type =  LAYER_TYPE.COMBINED
                ww_stacked_layer.layer_id = 0  
                ww_stacked_layer.name = "Example Stacked Layer"
            Wmats.extend(ww_layer.Wmats)
            
        # Note: Here the matrices should be padded so that they are all the same width
        #  ww_stacked_layer.Wmats = pad(Wmats)
        # 
        ww_stacked_layer.Wmats = Wmats
        
        # Then, the layer shape has to be set
        # Just setting dummy variable here
        ww_stacked_layer.N = 1000
        ww_stacked_layer.M = 100
        ww_stacked_layer.rf = 1
        #...
    
        
        yield ww_stacked_layer
                
    def make_layer_iter_(self):
        return self.ww_stacked_iter_()

In [143]:
layer_iter = WWStackedLayerIterator(model=model)

DEBUG:weightwatcher:FRAMEWORKS: KERAS = 4  PYTORCH = 2 ONNX = 8 UNKNOWN = 1 
DEBUG:weightwatcher:FIRST = 2  LAST = 4 UNKNOWN = 1 
DEBUG:weightwatcher:MODEL ITERATOR, framework = 2, channels = 4 


In [144]:
for layer in layer_iter:
    print(layer)

DEBUG:weightwatcher:layer_supported  N 0 max evals None
DEBUG:weightwatcher:layer not supported: Layer 0 None has no weights
DEBUG:weightwatcher:layer_supported  N 0 max evals None
DEBUG:weightwatcher:layer not supported: Layer 1 None has no weights
DEBUG:weightwatcher:conv2D_Wmats
DEBUG:weightwatcher:Channels Last tensor shape: 64x3 (NxM), 3x3 (i,j)
DEBUG:weightwatcher:get_conv2D_Wmats N=64 M=3 rf= 9 channels = 4
DEBUG:weightwatcher:layer_supported  N 64 max evals None
DEBUG:weightwatcher:layer_supported  N 0 max evals None
DEBUG:weightwatcher:layer not supported: Layer 3 None has no weights
DEBUG:weightwatcher:layer_supported  N 0 max evals None
DEBUG:weightwatcher:layer not supported: Layer 4 None has no weights
DEBUG:weightwatcher:conv2D_Wmats
DEBUG:weightwatcher:Channels Last tensor shape: 64x64 (NxM), 3x3 (i,j)
DEBUG:weightwatcher:get_conv2D_Wmats N=64 M=64 rf= 9 channels = 4
DEBUG:weightwatcher:layer_supported  N 64 max evals None
DEBUG:weightwatcher:layer_supported  N 0 max eva

DEBUG:weightwatcher:layer not supported: Layer 40 None has no weights
DEBUG:weightwatcher:layer_supported  N 0 max evals None
DEBUG:weightwatcher:layer not supported: Layer 41 None has no weights
DEBUG:weightwatcher:conv2D_Wmats
DEBUG:weightwatcher:Channels Last tensor shape: 512x512 (NxM), 3x3 (i,j)
DEBUG:weightwatcher:get_conv2D_Wmats N=512 M=512 rf= 9 channels = 4
DEBUG:weightwatcher:layer_supported  N 512 max evals None
DEBUG:weightwatcher:layer_supported  N 0 max evals None
DEBUG:weightwatcher:layer not supported: Layer 43 None has no weights
DEBUG:weightwatcher:layer_supported  N 0 max evals None
DEBUG:weightwatcher:layer not supported: Layer 44 None has no weights
DEBUG:weightwatcher:conv2D_Wmats
DEBUG:weightwatcher:Channels Last tensor shape: 512x512 (NxM), 3x3 (i,j)
DEBUG:weightwatcher:get_conv2D_Wmats N=512 M=512 rf= 9 channels = 4
DEBUG:weightwatcher:layer_supported  N 512 max evals None
DEBUG:weightwatcher:layer_supported  N 0 max evals None
DEBUG:weightwatcher:layer not su

WWLayer 0  Example Stacked Layer PYTORCH COMBINED  skipped False


### Notice:  The Layer Matrices have not been padded or reshaped yet

In [145]:
for W in layer.Wmats:
    print(W.shape)

(64, 3)
(64, 3)
(64, 3)
(64, 3)
(64, 3)
(64, 3)
(64, 3)
(64, 3)
(64, 3)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(128, 64)
(128, 64)
(128, 64)
(128, 64)
(128, 64)
(128, 64)
(128, 64)
(128, 64)
(128, 64)
(128, 128)
(128, 128)
(128, 128)
(128, 128)
(128, 128)
(128, 128)
(128, 128)
(128, 128)
(128, 128)
(256, 128)
(256, 128)
(256, 128)
(256, 128)
(256, 128)
(256, 128)
(256, 128)
(256, 128)
(256, 128)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(512, 256)
(512, 256)
(512, 256)
(512, 256)
(512, 256)
(512, 256)
(512, 256)
(512, 256)
(512, 256)
(512, 512)
(512, 512)
(512, 512)
(512, 512)
(512, 512)
(512, 512)
(512, 512)
(512, 512)
(512, 512)
(512, 512)
(512, 512)
(512, 512)
(512, 512)
(512, 512)
(512, 512