<a href="https://colab.research.google.com/github/CalculatedContent/ww-phys_theory/blob/master/Jacobians.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install torch==1.5.0+cu101 torchvision==0.6.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html

Looking in links: https://download.pytorch.org/whl/torch_stable.html


In [2]:
!pip install ipython-autotime
%load_ext autotime

Collecting ipython-autotime
  Downloading https://files.pythonhosted.org/packages/e6/f9/0626bbdb322e3a078d968e87e3b01341e7890544de891d0cb613641220e6/ipython-autotime-0.1.tar.bz2
Building wheels for collected packages: ipython-autotime
  Building wheel for ipython-autotime (setup.py) ... [?25l[?25hdone
  Created wheel for ipython-autotime: filename=ipython_autotime-0.1-cp36-none-any.whl size=1832 sha256=a1aa54f5099a897d1133b327cdad2287f9e0431d37ecbaa6bf802bb434ed9335
  Stored in directory: /root/.cache/pip/wheels/d2/df/81/2db1e54bc91002cec40334629bc39cfa86dff540b304ebcd6e
Successfully built ipython-autotime
Installing collected packages: ipython-autotime
Successfully installed ipython-autotime-0.1


In [0]:
#from google.colab import drive
#drive.mount('/content/drive')


In [3]:
#!cd drive/'My Drive'

time: 1.06 ms


In [4]:
import os
import time

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Subset

from torchvision import datasets
from torchvision import transforms

import matplotlib.pyplot as plt
from PIL import Image

import math
import copy


class MLP(nn.Module):
  def __init__(self, n_units, init_scale=1.0):
    super(MLP, self).__init__()

    self._n_units = copy.copy(n_units)
    self._layers = []
    for i in range(1, len(n_units)):
      layer = nn.Linear(n_units[i-1], n_units[i], bias=False)
      variance = math.sqrt(2.0 / (n_units[i-1] + n_units[i]))
      layer.weight.data.normal_(0.0, init_scale * variance)
      self._layers.append(layer)

      name = 'fc%d' % i
      if i == len(n_units) - 1:
        name = 'fc'  # the prediction layer is just called fc
      self.add_module(name, layer)


  def forward(self, x):
    x = x.view(-1, self._n_units[0])
    out = self._layers[0](x)
    for layer in self._layers[1:]:
      out = F.relu(out)
      out = layer(out)
    return out

time: 3.46 s


In [5]:
mlp_spec = '512'
batch_size = 100
n_units = [int(x) for x in mlp_spec.split('x')] # hidden dims
n_units.append(10)  # output dim
n_units.insert(0, 32*32*3)        # input dim
model = MLP(n_units)
model = model.to("cuda:0")

time: 9.61 s


In [6]:
def get_data(batch_size=100):
  normalize = transforms.Normalize(mean=[x/255.0 for x in [125.3, 123.0, 113.9]], std=[x / 255.0 for x in [63.0, 62.1, 66.7]])
  transform_train = transforms.Compose([
          transforms.ToTensor(),
          normalize])
  transform_test = transforms.Compose([
        transforms.ToTensor(),
        normalize])
  train_dataset = datasets.CIFAR10(root='data', 
                                train=True, 
                                transform=transform_train,
                                download=True)
  test_dataset = datasets.CIFAR10(root='data', 
                                train=False, 
                                transform=transform_test,
                                download=True)  
  train_loader = DataLoader(dataset=train_dataset, 
                          batch_size=batch_size,
                          num_workers=4,
                          shuffle=True)
  test_loader = DataLoader(dataset=test_dataset, 
                         batch_size=batch_size,
                         num_workers=4,
                         shuffle=False)
  return train_loader, test_loader

time: 9.4 ms


In [7]:
!pip install GPUtil
import GPUtil

GPUtil.showUtilization()

Collecting GPUtil
  Downloading https://files.pythonhosted.org/packages/ed/0e/5c61eedde9f6c87713e89d794f01e378cfd9565847d4576fa627d758c554/GPUtil-1.4.0.tar.gz
Building wheels for collected packages: GPUtil
  Building wheel for GPUtil (setup.py) ... [?25l[?25hdone
  Created wheel for GPUtil: filename=GPUtil-1.4.0-cp36-none-any.whl size=7413 sha256=55a88f9e251c4974fdb3865a3342b4f29ed4f4e46632d989739a9c9a95e98895
  Stored in directory: /root/.cache/pip/wheels/3d/77/07/80562de4bb0786e5ea186911a2c831fdd0018bda69beab71fd
Successfully built GPUtil
Installing collected packages: GPUtil
Successfully installed GPUtil-1.4.0
| ID | GPU | MEM |
------------------
|  0 |  0% |  4% |
time: 4.39 s


In [8]:
def jacobian_vector_mult(model, data_loader, vec, batch_size, num_classes=10, device='cuda:0', data_dim=3*32*32):
  '''compute J(J*v)  matrix-vector Mv multiply,  M=JJ* , where J is the jacobian,'''

  # compute J*v
  Jvecs = []
  model = model.to("cuda:0")
  
  istart = 0
  iend = istart + batch_size

  for batch, data in enumerate(data_loader):
    features, _ = data
    features = features.to(device)

    v = vec[istart:iend].to(device)
    istart += batch_size
    iend = istart + batch_size

    J = torch.autograd.functional.jacobian(model, features)# create_graph=True)
    J = J.view(batch_size,num_classes*batch_size*data_dim)
    J = J.transpose_(0,1)
    x = torch.mv(J,v).to('cpu')

    del J
    torch.cuda.empty_cache()
    Jvecs.append(x)

    del x
    torch.cuda.empty_cache()

  JJvec = None

  # compute J(J*v)
  for batch, data in enumerate(data_loader):
    features, _ = data
    features = features.to(device)

    J = torch.autograd.functional.jacobian(model, features)
    J = J.view(batch_size,num_classes*batch_size*data_dim)
    Jvec = Jvecs[batch].to(device)
    x = torch.mv(J, Jvec).to('cpu')

    del J
    torch.cuda.empty_cache()

    if JJvec is None:
      JJvec = x
    else:
      JJvec = torch.cat((JJvec, x))

    del x
    torch.cuda.empty_cache()

  del Jvecs

  return JJvec


time: 37.6 ms


In [9]:
num_classes= 10
batch_size = 100
device = 'cuda:0'

num_data = 50000
v = torch.randn(num_data)  # generate random vectora
v = v/torch.norm(v, p=2, dim=0)
print(v.shape, torch.norm(v, p=2, dim=0))
GPUtil.showUtilization()

train_loader, _ = get_data(batch_size=batch_size)


torch.Size([50000]) tensor(1.0000)
| ID | GPU | MEM |
------------------
|  0 |  0% |  4% |
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Extracting data/cifar-10-python.tar.gz to data
Files already downloaded and verified
time: 6.2 s


In [10]:
#JJvec = jacobian_vector_mult(model, train_loader, v, batch_size)
#plt.hist(JJvec, bins=100);

time: 877 µs


### $J^{T}J$ Diagonal elements

In [11]:
def jacobian_diagonal(model, data_loader, batch_size, num_classes=10, device='cuda:0', data_dim=3*32*32):
  '''compute J(J*v) diagnonal elements , where J is the jacobian,'''

  # compute Jdiag
  Jdiag = []
  model = model.to(device)

  for batch, data in enumerate(data_loader):
    features, _ = data
    features = features.to(device)

    J = torch.autograd.functional.jacobian(model, features)# create_graph=True)
    J = J.view(batch_size,num_classes*batch_size*data_dim)
    Jt = J.clone().transpose_(0,1)
    batch_diag = torch.mm(J,Jt).to('cpu')
    del J, Jt
    torch.cuda.empty_cache()

    for ib in range(batch_size):
      Jdiag.append(batch_diag[ib, ib].to('cpu').numpy())

    del batch_diag
    torch.cuda.empty_cache()

  return np.array(Jdiag)


time: 13 ms


### ResNet 

In [12]:
!pip install pytorchcv

Collecting pytorchcv
[?25l  Downloading https://files.pythonhosted.org/packages/93/be/0bcd80dfc0d64e75ceb67836385402fece3c3b964c349172a21358813b25/pytorchcv-0.0.58-py2.py3-none-any.whl (435kB)
[K     |▊                               | 10kB 20.1MB/s eta 0:00:01[K     |█▌                              | 20kB 2.2MB/s eta 0:00:01[K     |██▎                             | 30kB 2.9MB/s eta 0:00:01[K     |███                             | 40kB 2.1MB/s eta 0:00:01[K     |███▊                            | 51kB 2.3MB/s eta 0:00:01[K     |████▌                           | 61kB 2.7MB/s eta 0:00:01[K     |█████▎                          | 71kB 2.9MB/s eta 0:00:01[K     |██████                          | 81kB 3.2MB/s eta 0:00:01[K     |██████▊                         | 92kB 3.6MB/s eta 0:00:01[K     |███████▌                        | 102kB 3.4MB/s eta 0:00:01[K     |████████▎                       | 112kB 3.4MB/s eta 0:00:01[K     |█████████                       | 122kB 3.4MB

In [13]:
import pytorchcv
from pytorchcv.model_provider import get_model as ptcv_get_model


time: 117 ms


In [14]:
!pip install powerlaw

Collecting powerlaw
  Downloading https://files.pythonhosted.org/packages/d5/4e/3ceab890fafff8e78a5fd7f5340c232c38b21d181fcd32d7a31079db3646/powerlaw-1.4.6.tar.gz
Building wheels for collected packages: powerlaw
  Building wheel for powerlaw (setup.py) ... [?25l[?25hdone
  Created wheel for powerlaw: filename=powerlaw-1.4.6-cp36-none-any.whl size=24787 sha256=09c4fe93c1ef3a6878d76441a896df76d9b97e90d54d4085a26c3efe3f261f3a
  Stored in directory: /root/.cache/pip/wheels/e0/27/02/08d0e2865072bfd8d7c655e290521e3feca3fc22f1ac460601
Successfully built powerlaw
Installing collected packages: powerlaw
Successfully installed powerlaw-1.4.6
time: 4.48 s


In [0]:
import powerlaw
for modelname in pytorchcv.model_provider._models.keys():
    if modelname.startswith('resnet') and modelname.endswith('cifar10'):
      if modelname == 'resnet164bn_cifar10':
        model = ptcv_get_model(modelname, pretrained=True)
        Jdiag = jacobian_diagonal(model, train_loader, batch_size)
        
        results = powerlaw.Fit(Jdiag)
        alpha = results.power_law.alpha
        print(modelname,alpha)

        plt.hist(Jdiag, bins=100, density=True)
        plt.title(modelname)
        plt.show()


Downloading /root/.torch/models/resnet164bn_cifar10-0368-74ae9f4b.pth.zip from https://github.com/osmr/imgclsmob/releases/download/v0.0.179/resnet164bn_cifar10-0368-74ae9f4b.pth.zip...


In [0]:
4