In [2]:
import os

import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn

from plotting_helpers import *

In [3]:
rid = 'wttozyi2'

config = get_run_config(rid)
config

{'k': 97,
 'm': 1,
 'p': 0.5,
 'lr': 0.0005,
 'seed': 229,
 'device': 'cuda',
 'epochs': 3,
 'input_p': 0,
 'n_hidden': 2,
 'run_name': None,
 'batch_size': 512,
 'input_size': 3072,
 'hidden_size': 2000,
 'num_workers': 4,
 'output_size': 10,
 'dataset_name': 'cifar10',
 'git_snapshot': 'git_snapshot_f10142_712bf9',
 'store_weights': True,
 'test_batch_size': 512,
 'n_random_subnets': 3,
 'skip_mask_performance': False}

In [4]:
run_path = f'../models/{rid}'
run_files = os.listdir(run_path)
run_files

['dropout_mask_2.pt',
 'weights_epoch_3.pt',
 'random_mask_1.pt',
 'weights_epoch_2.pt',
 'random_mask_0.pt',
 'weights_epoch_1.pt',
 'weights_epoch_0.pt',
 'random_mask_2.pt',
 'dropout_mask_0.pt',
 'dropout_mask_1.pt']

In [140]:
# load masks
epochs = config['epochs']
k = config['k']
n_masks = epochs * BATCHES_PER_EPOCH // k

dropout_masks = {}
for i in range(n_masks):
    dropout_masks[i] = torch.load(f'{run_path}/dropout_mask_{i}.pt')

random_masks = {}
for i in range(config['n_random_subnets']):
    random_masks[i] = torch.load(f'{run_path}/random_mask_{i}.pt')

In [6]:
def load_weights(epoch):
    return torch.load(f'{run_path}/weights_epoch_{epoch}.pt')

### Experiment for verifying dropping rows and columns

In [85]:
# TODO: try out dropping out rows and columns
w_size = 10
dims = [w_size] * 3

weights = []
for a, b in zip(dims[:-1], dims[1:]):
    weights.append(torch.randn(a, b))

masks = [(torch.randn(a) > 0).int() for a in dims]
input = torch.randn(w_size)

In [86]:
out = input
for w, m in zip(weights, masks[:-1]):
    out *= m
    out = w @ out
out *= masks[-1]
out

tensor([-1.8566, -0.0000, -0.0000,  0.0000,  0.0000, -0.8037, -0.3888,  0.0000,
         0.0000, -1.4470])

In [87]:
masked_weights = []
for w, pre_m, post_m in zip(weights, masks[:-1], masks[1:]):
    pre_m_expanded = pre_m.view(1, -1).expand_as(w)
    post_m_expanded = post_m.view(-1, 1).expand_as(w)

    masked_weights.append(w * pre_m_expanded * post_m_expanded)

out = input
for w, m in zip(masked_weights, masks[:-1]):
    out *= m
    out = w @ out
out *= masks[-1]
out

tensor([-1.8566,  0.0000,  0.0000,  0.0000,  0.0000, -0.8037, -0.3888,  0.0000,
         0.0000, -1.4470])

### Run the main experiment

In [129]:
def get_masked_weights(w: torch.Tensor, m_pre: torch.Tensor = None, m_post: torch.Tensor = None):
    if m_pre is None:
        m_pre = torch.ones(w.shape[1])
    if m_post is None:
        m_post = torch.ones(w.shape[0])

    m_pre_expanded = m_pre.view(1, -1).expand_as(w)
    m_post_expanded = m_post.view(-1, 1).expand_as(w)
    return w * m_pre_expanded * m_post_expanded

def get_net_l2(w1: list[torch.Tensor], w2: list[torch.Tensor], m: list[torch.Tensor]):
    # we're assuming that each mask immediately precedes its corresponding weight matrix
    # TODO: deal with biases

    w1_masked = []
    for w, m_pre, m_post in zip(w1[:-1], masks[:-1], masks[1:]):
        w1_masked.append(get_masked_weights(w, m_pre, m_post))
    w1_masked.append(get_masked_weights(w1[-1], m_pre=masks[-1]))
    w2_masked = []
    for w, m_pre, m_post in zip(w2[:-1], masks[:-1], masks[1:]):
        w2_masked.append(get_masked_weights(w, m_pre, m_post))
    w2_masked.append(get_masked_weights(w2[-1], m_pre=masks[-1]))

    return sum([torch.norm(w1 - w2) for w1, w2 in zip(w1_masked, w2_masked)]).item()

In [149]:
# TODO: test out get_net_l2

w1 = load_weights(1)[::2]
w2 = load_weights(3)[::2]
masks = dropout_masks[0]

get_net_l2(w1, w2, masks)

7.740933895111084

In [11]:
def plot_l2(masks: list[torch.Tensor]):
    TYPES = ('dropout', 'random')
    if type not in TYPES:
        raise ValueError(f'type must be one of {TYPES}')

    final_weights = load_weights(epochs)

    for i in range(final_weights):

    # TODO

IndentationError: expected an indented block (1680254462.py, line 10)