# WeightWatcher

https://calculationconsulting.com

In [1]:
# Suppress the powerlaw package warnings
# "powerlaw.py:700: RuntimeWarning: divide by zero encountered in true_divide"
# "powerlaw.py:700: RuntimeWarning: invalid value encountered in true_divide"
import warnings
warnings.simplefilter(action='ignore', category=RuntimeWarning)

## 1. Quick start example

### 1.1 Import your model (Keras or PyTorch)

In [2]:
from keras.models import load_model
from keras.applications import vgg16

kmodel = vgg16.VGG16
model = kmodel(weights='imagenet')

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
W0822 18:11:53.302999 11016 deprecation_wrapper.py:119] From c:\users\mzczm\anaconda3\envs\ww\lib\site-packages\keras\backend\tensorflow_backend.py:66: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0822 18:11:53.330447 11016 deprecation_wrapper.py:119] From c:\users\mzczm\anaconda3\envs\ww\lib\site-pack

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5


W0822 18:18:46.448710 11016 deprecation_wrapper.py:119] From c:\users\mzczm\anaconda3\envs\ww\lib\site-packages\keras\backend\tensorflow_backend.py:190: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.

W0822 18:18:46.449724 11016 deprecation_wrapper.py:119] From c:\users\mzczm\anaconda3\envs\ww\lib\site-packages\keras\backend\tensorflow_backend.py:197: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.



### 1.2 Run WeightWatcher

In [4]:
import weightwatcher as ww

watcher = ww.WeightWatcher(model=model)

results = watcher.analyze()

2019-08-22 18:40:07,014 INFO 
WeightWatcher v0.1.2 by Calculation Consulting
Analyze weight matrices of Deep Neural Networks
https://calculationconsulting.com/
python      version 3.7.4 (default, Aug  9 2019, 18:34:13) [MSC v.1915 64 bit (AMD64)]
numpy       version 1.17.0
tensforflow version 1.14.0
keras       version 2.2.5
I0822 18:40:07.014105 11016 weightwatcher.py:99] 
WeightWatcher v0.1.2 by Calculation Consulting
Analyze weight matrices of Deep Neural Networks
https://calculationconsulting.com/
python      version 3.7.4 (default, Aug  9 2019, 18:34:13) [MSC v.1915 64 bit (AMD64)]
numpy       version 1.17.0
tensforflow version 1.14.0
keras       version 2.2.5
2019-08-22 18:40:07,021 INFO Analyzing model 'vgg16' with 23 layers
I0822 18:40:07.021846 11016 weightwatcher.py:99] Analyzing model 'vgg16' with 23 layers
2019-08-22 18:40:13,594 INFO ### Printing results ###
I0822 18:40:13.594648 11016 weightwatcher.py:99] ### Printing results ###
2019-08-22 18:40:17,652 INFO Norm: min: 2.

In [5]:
results

{0: {'id': 0,
  'type': <keras.engine.input_layer.InputLayer at 0x2192a85bfc8>,
  'message': 'Skipping (Layer not supported)'},
 1: {'layer_type': <LAYER_TYPE.CONV2D: 4>,
  0: {'N': 64,
   'M': 3,
   'Q': 21.333333333333332,
   'summary': 'Weight matrix 1/9 (3,64): Skipping: too small (<50)'},
  1: {'N': 64,
   'M': 3,
   'Q': 21.333333333333332,
   'summary': 'Weight matrix 2/9 (3,64): Skipping: too small (<50)'},
  2: {'N': 64,
   'M': 3,
   'Q': 21.333333333333332,
   'summary': 'Weight matrix 3/9 (3,64): Skipping: too small (<50)'},
  3: {'N': 64,
   'M': 3,
   'Q': 21.333333333333332,
   'summary': 'Weight matrix 4/9 (3,64): Skipping: too small (<50)'},
  4: {'N': 64,
   'M': 3,
   'Q': 21.333333333333332,
   'summary': 'Weight matrix 5/9 (3,64): Skipping: too small (<50)'},
  5: {'N': 64,
   'M': 3,
   'Q': 21.333333333333332,
   'summary': 'Weight matrix 6/9 (3,64): Skipping: too small (<50)'},
  6: {'N': 64,
   'M': 3,
   'Q': 21.333333333333332,
   'summary': 'Weight matrix 7/

In [6]:
watcher.get_summary()

{'norm': 4.0183034,
 'norm_compound': 6.7512617,
 'lognorm': 0.56747305,
 'lognorm_compound': 0.69468397}

In [7]:
watcher.print_results()

2019-08-22 18:48:34,203 INFO ### Printing results ###
I0822 18:48:34.203774 11016 weightwatcher.py:99] ### Printing results ###
2019-08-22 18:48:37,047 INFO Norm: min: 2.4488985538482666, max: 23.396276473999023, avg: 4.018303394317627
I0822 18:48:37.047494 11016 weightwatcher.py:99] Norm: min: 2.4488985538482666, max: 23.396276473999023, avg: 4.018303394317627
2019-08-22 18:48:37,053 INFO Norm compound: min: 2.728064775466919, max: 23.396276473999023, avg: 6.7512617111206055
I0822 18:48:37.053721 11016 weightwatcher.py:99] Norm compound: min: 2.728064775466919, max: 23.396276473999023, avg: 6.7512617111206055
2019-08-22 18:48:37,056 INFO LogNorm: min: 0.3889707922935486, max: 1.369146704673767, avg: 0.5674730539321899
I0822 18:48:37.056441 11016 weightwatcher.py:99] LogNorm: min: 0.3889707922935486, max: 1.369146704673767, avg: 0.5674730539321899
2019-08-22 18:48:37,063 INFO LogNorm compound: min: 0.43449220061302185, max: 1.369146704673767, avg: 0.6946839690208435
I0822 18:48:37.0636

## 2. Advanced examples

## 2.1 Filter by layer type (CONV1D, CONV2D, DENSE)

In this example we are interested in the DENSE layers only

In [8]:
from keras.applications import vgg16

kmodel = vgg16.VGG16
model = kmodel(weights='imagenet')

import weightwatcher as ww

watcher = ww.WeightWatcher(model=model)

watcher.analyze(layers=ww.LAYER_TYPE.DENSE)

2019-08-22 18:48:46,270 INFO 
WeightWatcher v0.1.2 by Calculation Consulting
Analyze weight matrices of Deep Neural Networks
https://calculationconsulting.com/
python      version 3.7.4 (default, Aug  9 2019, 18:34:13) [MSC v.1915 64 bit (AMD64)]
numpy       version 1.17.0
tensforflow version 1.14.0
keras       version 2.2.5
I0822 18:48:46.270144 11016 weightwatcher.py:99] 
WeightWatcher v0.1.2 by Calculation Consulting
Analyze weight matrices of Deep Neural Networks
https://calculationconsulting.com/
python      version 3.7.4 (default, Aug  9 2019, 18:34:13) [MSC v.1915 64 bit (AMD64)]
numpy       version 1.17.0
tensforflow version 1.14.0
keras       version 2.2.5
2019-08-22 18:48:46,276 INFO Analyzing model 'vgg16' with 23 layers
I0822 18:48:46.276254 11016 weightwatcher.py:99] Analyzing model 'vgg16' with 23 layers
2019-08-22 18:48:46,976 INFO ### Printing results ###
I0822 18:48:46.976139 11016 weightwatcher.py:99] ### Printing results ###
2019-08-22 18:48:47,605 INFO Norm: min: 16

{0: {'id': 0,
  'type': <keras.engine.input_layer.InputLayer at 0x2193bdcf608>,
  'message': 'Skipping (Layer not supported)'},
 1: {'layer_type': <LAYER_TYPE.CONV2D: 4>,
  'message': 'Skipping (Layer type not requested to analyze)'},
 2: {'layer_type': <LAYER_TYPE.CONV2D: 4>,
  'message': 'Skipping (Layer type not requested to analyze)'},
 3: {'id': 3,
  'type': <keras.layers.pooling.MaxPooling2D at 0x2193be66b88>,
  'message': 'Skipping (Layer not supported)'},
 4: {'layer_type': <LAYER_TYPE.CONV2D: 4>,
  'message': 'Skipping (Layer type not requested to analyze)'},
 5: {'layer_type': <LAYER_TYPE.CONV2D: 4>,
  'message': 'Skipping (Layer type not requested to analyze)'},
 6: {'id': 6,
  'type': <keras.layers.pooling.MaxPooling2D at 0x2193bec6208>,
  'message': 'Skipping (Layer not supported)'},
 7: {'layer_type': <LAYER_TYPE.CONV2D: 4>,
  'message': 'Skipping (Layer type not requested to analyze)'},
 8: {'layer_type': <LAYER_TYPE.CONV2D: 4>,
  'message': 'Skipping (Layer type not req

In [9]:
watcher.print_results()

2019-08-22 18:48:53,894 INFO ### Printing results ###
I0822 18:48:53.894500 11016 weightwatcher.py:99] ### Printing results ###
2019-08-22 18:48:54,370 INFO Norm: min: 16.75731658935547, max: 23.396276473999023, avg: 19.391197204589844
I0822 18:48:54.370339 11016 weightwatcher.py:99] Norm: min: 16.75731658935547, max: 23.396276473999023, avg: 19.391197204589844
2019-08-22 18:48:54,376 INFO Norm compound: min: 16.75731658935547, max: 23.396276473999023, avg: 19.391197204589844
I0822 18:48:54.376683 11016 weightwatcher.py:99] Norm compound: min: 16.75731658935547, max: 23.396276473999023, avg: 19.391197204589844
2019-08-22 18:48:54,380 INFO LogNorm: min: 1.224204421043396, max: 1.369146704673767, avg: 1.2830352783203125
I0822 18:48:54.380995 11016 weightwatcher.py:99] LogNorm: min: 1.224204421043396, max: 1.369146704673767, avg: 1.2830352783203125
2019-08-22 18:48:54,384 INFO LogNorm compound: min: 1.224204421043396, max: 1.369146704673767, avg: 1.2830352783203125
I0822 18:48:54.384330 1

## 2.2 Filter by multiple layer types

In this example we are interested in the CONV1D and DENSE layers.

Filter the layers using a bitmask.

In [10]:
import weightwatcher as ww

watcher = ww.WeightWatcher(model=model)

watcher.analyze(layers=ww.LAYER_TYPE.CONV1D|ww.LAYER_TYPE.DENSE)

2019-08-22 18:49:14,507 INFO 
WeightWatcher v0.1.2 by Calculation Consulting
Analyze weight matrices of Deep Neural Networks
https://calculationconsulting.com/
python      version 3.7.4 (default, Aug  9 2019, 18:34:13) [MSC v.1915 64 bit (AMD64)]
numpy       version 1.17.0
tensforflow version 1.14.0
keras       version 2.2.5
I0822 18:49:14.507681 11016 weightwatcher.py:99] 
WeightWatcher v0.1.2 by Calculation Consulting
Analyze weight matrices of Deep Neural Networks
https://calculationconsulting.com/
python      version 3.7.4 (default, Aug  9 2019, 18:34:13) [MSC v.1915 64 bit (AMD64)]
numpy       version 1.17.0
tensforflow version 1.14.0
keras       version 2.2.5
2019-08-22 18:49:14,511 INFO Analyzing model 'vgg16' with 23 layers
I0822 18:49:14.511307 11016 weightwatcher.py:99] Analyzing model 'vgg16' with 23 layers
2019-08-22 18:49:15,108 INFO ### Printing results ###
I0822 18:49:15.108895 11016 weightwatcher.py:99] ### Printing results ###
2019-08-22 18:49:15,690 INFO Norm: min: 16

{0: {'id': 0,
  'type': <keras.engine.input_layer.InputLayer at 0x2193bdcf608>,
  'message': 'Skipping (Layer not supported)'},
 1: {'layer_type': <LAYER_TYPE.CONV2D: 4>,
  'message': 'Skipping (Layer type not requested to analyze)'},
 2: {'layer_type': <LAYER_TYPE.CONV2D: 4>,
  'message': 'Skipping (Layer type not requested to analyze)'},
 3: {'id': 3,
  'type': <keras.layers.pooling.MaxPooling2D at 0x2193be66b88>,
  'message': 'Skipping (Layer not supported)'},
 4: {'layer_type': <LAYER_TYPE.CONV2D: 4>,
  'message': 'Skipping (Layer type not requested to analyze)'},
 5: {'layer_type': <LAYER_TYPE.CONV2D: 4>,
  'message': 'Skipping (Layer type not requested to analyze)'},
 6: {'id': 6,
  'type': <keras.layers.pooling.MaxPooling2D at 0x2193bec6208>,
  'message': 'Skipping (Layer not supported)'},
 7: {'layer_type': <LAYER_TYPE.CONV2D: 4>,
  'message': 'Skipping (Layer type not requested to analyze)'},
 8: {'layer_type': <LAYER_TYPE.CONV2D: 4>,
  'message': 'Skipping (Layer type not req

## 2.3 Filter by layer Ids

In [11]:
import weightwatcher as ww

watcher = ww.WeightWatcher(model=model)

watcher.analyze(layers=[20])

2019-08-22 18:49:25,800 INFO 
WeightWatcher v0.1.2 by Calculation Consulting
Analyze weight matrices of Deep Neural Networks
https://calculationconsulting.com/
python      version 3.7.4 (default, Aug  9 2019, 18:34:13) [MSC v.1915 64 bit (AMD64)]
numpy       version 1.17.0
tensforflow version 1.14.0
keras       version 2.2.5
I0822 18:49:25.800711 11016 weightwatcher.py:99] 
WeightWatcher v0.1.2 by Calculation Consulting
Analyze weight matrices of Deep Neural Networks
https://calculationconsulting.com/
python      version 3.7.4 (default, Aug  9 2019, 18:34:13) [MSC v.1915 64 bit (AMD64)]
numpy       version 1.17.0
tensforflow version 1.14.0
keras       version 2.2.5
2019-08-22 18:49:25,805 INFO Analyzing model 'vgg16' with 23 layers
I0822 18:49:25.805460 11016 weightwatcher.py:99] Analyzing model 'vgg16' with 23 layers
2019-08-22 18:49:26,295 INFO ### Printing results ###
I0822 18:49:26.295799 11016 weightwatcher.py:99] ### Printing results ###
2019-08-22 18:49:26,853 INFO Norm: min: 23

{0: {'id': 0,
  'type': <keras.engine.input_layer.InputLayer at 0x2193bdcf608>,
  'message': 'Skipping (Layer id not requested to analyze)'},
 1: {'id': 1,
  'type': <keras.layers.convolutional.Conv2D at 0x2193bdcf6c8>,
  'message': 'Skipping (Layer id not requested to analyze)'},
 2: {'id': 2,
  'type': <keras.layers.convolutional.Conv2D at 0x2193be17f08>,
  'message': 'Skipping (Layer id not requested to analyze)'},
 3: {'id': 3,
  'type': <keras.layers.pooling.MaxPooling2D at 0x2193be66b88>,
  'message': 'Skipping (Layer id not requested to analyze)'},
 4: {'id': 4,
  'type': <keras.layers.convolutional.Conv2D at 0x2193be7b808>,
  'message': 'Skipping (Layer id not requested to analyze)'},
 5: {'id': 5,
  'type': <keras.layers.convolutional.Conv2D at 0x2193bea2dc8>,
  'message': 'Skipping (Layer id not requested to analyze)'},
 6: {'id': 6,
  'type': <keras.layers.pooling.MaxPooling2D at 0x2193bec6208>,
  'message': 'Skipping (Layer id not requested to analyze)'},
 7: {'id': 7,
  't

## 2.4 Get the return values per layer

In [12]:
import weightwatcher as ww

watcher = ww.WeightWatcher(model=model)

results = watcher.analyze()

2019-08-22 18:49:54,421 INFO 
WeightWatcher v0.1.2 by Calculation Consulting
Analyze weight matrices of Deep Neural Networks
https://calculationconsulting.com/
python      version 3.7.4 (default, Aug  9 2019, 18:34:13) [MSC v.1915 64 bit (AMD64)]
numpy       version 1.17.0
tensforflow version 1.14.0
keras       version 2.2.5
I0822 18:49:54.421251 11016 weightwatcher.py:99] 
WeightWatcher v0.1.2 by Calculation Consulting
Analyze weight matrices of Deep Neural Networks
https://calculationconsulting.com/
python      version 3.7.4 (default, Aug  9 2019, 18:34:13) [MSC v.1915 64 bit (AMD64)]
numpy       version 1.17.0
tensforflow version 1.14.0
keras       version 2.2.5
2019-08-22 18:49:54,428 INFO Analyzing model 'vgg16' with 23 layers
I0822 18:49:54.428843 11016 weightwatcher.py:99] Analyzing model 'vgg16' with 23 layers
2019-08-22 18:49:55,389 INFO ### Printing results ###
I0822 18:49:55.389982 11016 weightwatcher.py:99] ### Printing results ###
2019-08-22 18:49:58,857 INFO Norm: min: 2.

In [13]:
for layer_id, result in results.items():
    for slice_id, summary in result.items():
        if not str(slice_id).isdigit() or "lognorm" not in summary:
            continue
        lognorm = summary["lognorm"]
        print("Layer {}, Slice {}: Lognorm: {}".format(layer_id, slice_id, lognorm))    

Layer 2, Slice 0: Lognorm: 0.3978934586048126
Layer 2, Slice 1: Lognorm: 0.45358702540397644
Layer 2, Slice 2: Lognorm: 0.40578144788742065
Layer 2, Slice 3: Lognorm: 0.45428669452667236
Layer 2, Slice 4: Lognorm: 0.49695152044296265
Layer 2, Slice 5: Lognorm: 0.45737624168395996
Layer 2, Slice 6: Lognorm: 0.4044671952724457
Layer 2, Slice 7: Lognorm: 0.4511154294013977
Layer 2, Slice 8: Lognorm: 0.3889707922935486
Layer 4, Slice 0: Lognorm: 0.44109997153282166
Layer 4, Slice 1: Lognorm: 0.4613628089427948
Layer 4, Slice 2: Lognorm: 0.4370166063308716
Layer 4, Slice 3: Lognorm: 0.4667660593986511
Layer 4, Slice 4: Lognorm: 0.5201410055160522
Layer 4, Slice 5: Lognorm: 0.46935534477233887
Layer 4, Slice 6: Lognorm: 0.4464666247367859
Layer 4, Slice 7: Lognorm: 0.48161619901657104
Layer 4, Slice 8: Lognorm: 0.4471622705459595
Layer 5, Slice 0: Lognorm: 0.45507872104644775
Layer 5, Slice 1: Lognorm: 0.4839082658290863
Layer 5, Slice 2: Lognorm: 0.4593982994556427
Layer 5, Slice 3: Lognorm

## 2.5 Power Law Fit

In [None]:
import weightwatcher as ww

watcher = ww.WeightWatcher(model=model)

results = watcher.analyze(compute_alphas=True)

2019-08-22 19:02:51,825 INFO 
WeightWatcher v0.1.2 by Calculation Consulting
Analyze weight matrices of Deep Neural Networks
https://calculationconsulting.com/
python      version 3.7.4 (default, Aug  9 2019, 18:34:13) [MSC v.1915 64 bit (AMD64)]
numpy       version 1.17.0
tensforflow version 1.14.0
keras       version 2.2.5
I0822 19:02:51.825855 11016 weightwatcher.py:99] 
WeightWatcher v0.1.2 by Calculation Consulting
Analyze weight matrices of Deep Neural Networks
https://calculationconsulting.com/
python      version 3.7.4 (default, Aug  9 2019, 18:34:13) [MSC v.1915 64 bit (AMD64)]
numpy       version 1.17.0
tensforflow version 1.14.0
keras       version 2.2.5
2019-08-22 19:02:51,830 INFO Analyzing model 'vgg16' with 23 layers
I0822 19:02:51.830076 11016 weightwatcher.py:99] Analyzing model 'vgg16' with 23 layers


## 2.6 Debug and Custom Logging

### Custom Logging at Debug Level

In [None]:
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)

import weightwatcher as ww

watcher = ww.WeightWatcher(model=model, logger=logger)

results = watcher.analyze()

### Disable Logging

In [None]:
import weightwatcher as ww

watcher = ww.WeightWatcher(model=model, log=False)

results = watcher.analyze()

## 3. pyTorch Models

In [None]:
data = []

In [None]:
import weightwatcher as ww
import torchvision.models as models

model = models.vgg16(pretrained=True)

watcher = ww.WeightWatcher(model=model)

results = watcher.analyze(compute_alphas=True)

data.append({"name": "vgg16torch", "summary": watcher.get_summary()})

In [None]:
import weightwatcher as ww
import torchvision.models as models

model = models.vgg16_bn(pretrained=True)

watcher = ww.WeightWatcher(model=model)

results = watcher.analyze(compute_alphas=True)

data.append({"name": "vgg16bntorch", "summary": watcher.get_summary()})

In [None]:
import weightwatcher as ww
import torchvision.models as models

model = models.vgg11(pretrained=True)

watcher = ww.WeightWatcher(model=model)

results = watcher.analyze(compute_alphas=True)

data.append({"name": "vgg11torch", "summary": watcher.get_summary()})

In [None]:
import weightwatcher as ww
import torchvision.models as models

model = models.vgg11_bn(pretrained=True)

watcher = ww.WeightWatcher(model=model)

results = watcher.analyze(compute_alphas=True)

data.append({"name": "vgg11bntorch", "summary": watcher.get_summary()})

In [None]:
import weightwatcher as ww
import torchvision.models as models

model = models.vgg13(pretrained=True)

watcher = ww.WeightWatcher(model=model)

results = watcher.analyze(compute_alphas=True)

data.append({"name": "vgg13torch", "summary": watcher.get_summary()})

In [None]:
import weightwatcher as ww
import torchvision.models as models

model = models.vgg13_bn(pretrained=True)

watcher = ww.WeightWatcher(model=model)

results = watcher.analyze(compute_alphas=True)

data.append({"name": "vgg13bntorch", "summary": watcher.get_summary()})

In [None]:
import weightwatcher as ww
import torchvision.models as models

model = models.vgg19(pretrained=True)

watcher = ww.WeightWatcher(model=model)

results = watcher.analyze(compute_alphas=True)

data.append({"name": "vgg19torch", "summary": watcher.get_summary()})

In [None]:
import weightwatcher as ww
import torchvision.models as models

model = models.vgg19_bn(pretrained=True)

watcher = ww.WeightWatcher(model=model)

results = watcher.analyze(compute_alphas=True)

data.append({"name": "vgg19bntorch", "summary": watcher.get_summary()})

In [None]:
data

In [None]:
# pytorch Model accuracies 
# https://github.com/Cadene/pretrained-models.pytorch

accuracies = {
    "vgg11torch": 68.970,
    "vgg11bntorch": 70.452,
    "vgg13torch": 69.662,
    "vgg13bntorch": 71.508,
    "vgg16torch": 71.636,
    "vgg16bntorch": 73.518,
    "vgg19torch": 72.080,
    "vgg19bntorch": 74.266,
}

In [None]:
# pytorch Model accuracies 
# https://github.com/Cadene/pretrained-models.pytorch

accuracies5 = {
    "vgg11torch": 88.746,
    "vgg11bntorch": 89.818,
    "vgg13torch": 89.264,
    "vgg13bntorch": 90.494,
    "vgg16torch": 90.354,
    "vgg16bntorch": 91.608,
    "vgg19torch": 90.822,
    "vgg19bntorch": 92.066,
}

### 3.1 Log Norm of Weight Matrices vs Accuracies of models

The following graph demonstrates the linear relationship between the average Log Norm of Weight matrices and the test accuracies of the models (notice we didnt't need the test data):

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = [8,8]

for modelname, accuracy in accuracies5.items():
    x = accuracy
    summary = [d["summary"] for d in data if d["name"] == modelname]
    y = summary[0]["lognorm"]
    label = modelname
    plt.scatter(x,y,label=label)

plt.legend()
plt.title(r"Test Accuracy vs Average Log Norm $\langle\log\Vert W\Vert\rangle$"+"\nPretrained VGG and VGG_BN Models")
plt.xlabel(r"Test Accuracy")
plt.ylabel(r"$\langle\log\Vert W\Vert\rangle$");

Let's compare the average Log Norm with the average Log Norm compound:

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = [8,8]

x = []
y1, y2 = [], []
for modelname, accuracy in accuracies5.items():
    x.append(accuracy)
    summary = [d["summary"] for d in data if d["name"] == modelname]
    y1.append(summary[0]["lognorm"])
    y2.append(summary[0]["lognorm_compound"])
    label = modelname
plt.scatter(x,y1,label="Log Norm", color='r')
plt.scatter(x,y2,label="Log Norm Compound", color='b')

plt.legend()
plt.title(r"Test Accuracy vs (Average Log Norm $\langle\log\Vert W\Vert\rangle$ and Log Norm Compound)"+"\nPretrained VGG and VGG_BN Models")
plt.xlabel(r"Test Accuracy")
plt.ylabel(r"$\langle\log\Vert W\Vert\rangle$");

### 3.2 Power law fitting (Alpha) of Weight Matrices vs Accuracies of models

The linear relationship between the Power law fitting (Alpha) of the weight matrices and the accuracies of the models is demonstrated in the following graph:

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = [8,8]

for modelname, accuracy in accuracies5.items():
    x = accuracy
    summary = [d["summary"] for d in data if d["name"] == modelname]
    y = summary[0]["alpha_weighted"]
    label = modelname
    plt.scatter(x,y,label=label)

plt.legend()
plt.title(r"Test Accuracy vs Weighted Alpha"+"\nPretrained VGG and VGG_BN Models")
plt.xlabel(r"Test Accuracy")
plt.ylabel(r"Weighted Alpha");

The more accurate the model, the lower the exponent of the power law fit of the weight matrices is.

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = [8,8]

x = []
y1, y2 = [], []
for modelname, accuracy in accuracies5.items():
    x.append(accuracy)
    summary = [d["summary"] for d in data if d["name"] == modelname]
    y1.append(summary[0]["alpha_weighted"])
    y2.append(summary[0]["alpha_weighted_compound"])
plt.scatter(x,y1,label="Weighted Alpha", color='r')
plt.scatter(x,y2,label="Weighted Alpha Compound", color='b')

plt.legend()
plt.title(r"Test Accuracy vs (Weighted Alpha and Weighted Alpha compound)"+"\nPretrained VGG and VGG_BN Models")
plt.xlabel(r"Test Accuracy")
plt.ylabel(r"Weighted Alpha");

## 4. Conclusion

WeightWatcher helps you choose the best pretrained model for your needs.

You can use WeightWatcher to compare several pretrained models and choose the one with the lowest Log Norm.