In [1]:
"""
testing.ipynb

File for performing testing to implement lottery ticket experiments.

Authors: Jordan Bourdeau, Casey Forey
Date Created: 3/8/24
"""

%load_ext tensorboard
import functools
import numpy as np
import os
import tensorflow as tf

from src.harness import constants as C
from src.harness.dataset import download_data, load_and_process_mnist
from src.harness.experiment import experiment
from src.harness.model import create_model, LeNet300, load_model
from src.harness.pruning import prune_by_percent
from src.harness.training import train
from src.lottery_ticket.foundations import paths

In [2]:
# Create a model
X_train, Y_train, X_test, Y_test = load_and_process_mnist()
model = create_model(0, X_train, Y_train)
initial_weights1: dict[str: np.array] = model.get_current_weights()

In [3]:
# Test Training a model
optimizer = C.OPTIMIZER()
make_dataset: callable = load_and_process_mnist
initial_weights2, final_weights = train(make_dataset, model, 0, optimizer, C.TEST_TRAINING_ITERATIONS)

for i in range(3):
    key: str = f'layer{i}'
    # Sanity check that the initial weights are correct
    assert np.array_equal(initial_weights1[key], initial_weights2[key])
    # Verify the final weights are different from initial weights
    assert not np.array_equal(initial_weights2[key], final_weights[key])


Number of gradients: 3
Gradient shape for layer0: (784, 300), Weight shape: (784, 300)
Gradient shape for layer1: (300, 100), Weight shape: (300, 100)
Gradient shape for layer2: (100, 10), Weight shape: (100, 10)
Iteration 1/10, Loss: 2.3580663204193115
Number of gradients: 3
Gradient shape for layer0: (784, 300), Weight shape: (784, 300)
Gradient shape for layer1: (300, 100), Weight shape: (300, 100)
Gradient shape for layer2: (100, 10), Weight shape: (100, 10)
Iteration 2/10, Loss: 2.2764296531677246
Number of gradients: 3
Gradient shape for layer0: (784, 300), Weight shape: (784, 300)
Gradient shape for layer1: (300, 100), Weight shape: (300, 100)
Gradient shape for layer2: (100, 10), Weight shape: (100, 10)
Iteration 3/10, Loss: 2.2057595252990723
Number of gradients: 3
Gradient shape for layer0: (784, 300), Weight shape: (784, 300)
Gradient shape for layer1: (300, 100), Weight shape: (300, 100)
Gradient shape for layer2: (100, 10), Weight shape: (100, 10)
Iteration 4/10, Loss: 2.1

In [8]:
# Try creating a model from the initial weights as a preset
make_model: callable = functools.partial(LeNet300, 0)
percents: dict[str: float] = {key: 0.5 for key in final_weights}
starting_masks: dict[str: np.ndarray] = {f'layer{i}': np.ones(initial_weights2[f'layer{i}'].shape) for i in range(3)}

# Create pruned masks
masks = prune_by_percent(C.PRUNING_PERCENTS, starting_masks, final_weights)

# This is transforming initial weights into a tensor when it should be Numpy?
# Passing in masks is the issue
model = make_model(X_train, Y_train, presets=initial_weights2, masks=masks)

# Sanity check that the weights are correctly loaded and are masked off accordingly
for i in range(3):
    key = f'layer{i}'
    layer_weights = model.get_current_weights()[key]
    layer_mask = model.masks[key]
    expected_weights: np.ndarray = initial_weights2[key] * layer_mask
    assert np.array_equal(expected_weights, layer_weights), f'Expected {expected_weights} but received {layer_weights}'
    assert np.array_equal(model.masks[key], masks[key])

# Save the tensors storing the actual weight values (these include the masked off weights)
pretrained_weights = model.weights.copy()

# Try doing a a simulated round of pruning
initial_weights3, final_weights2 = train(make_dataset, model, 1, optimizer, C.TEST_TRAINING_ITERATIONS)

# Make sure the masked off weights don't receive any updates in the actual tensorflow tensor
trained_weights = model.weights

print('Pretrained:\n\n')
print(pretrained_weights['layer0'][0][:10])
print()

print('Trained:\n\n')
print(trained_weights['layer0'][0][:10])

# Compare the masked weights before and after training
for i in range(3):
    key = f'layer{i}'
    pretrained_layer_weights = pretrained_weights[key]
    trained_layer_weights = trained_weights[key]
    
    # Invert the mask, to only look at the weights which WERE masked off
    inverted_mask: np.ndarray = 1 - masks[key]
    masked_pretrained_weights = pretrained_layer_weights * inverted_mask
    masked_trained_weights = trained_layer_weights * inverted_mask
    
    # Assert that the masked weights remain unchanged after training
    assert np.array_equal(masked_pretrained_weights, masked_trained_weights), f'Weights changed after training for layer {key}'
    assert not np.array_equal(initial_weights3, final_weights2)


Number of gradients: 3
Gradient shape for layer0: (784, 300), Weight shape: (784, 300)
Gradient shape for layer1: (300, 100), Weight shape: (300, 100)
Gradient shape for layer2: (100, 10), Weight shape: (100, 10)
Iteration 1/10, Loss: 2.2923543453216553
Number of gradients: 3
Gradient shape for layer0: (784, 300), Weight shape: (784, 300)
Gradient shape for layer1: (300, 100), Weight shape: (300, 100)
Gradient shape for layer2: (100, 10), Weight shape: (100, 10)
Iteration 2/10, Loss: 2.219968795776367
Number of gradients: 3
Gradient shape for layer0: (784, 300), Weight shape: (784, 300)
Gradient shape for layer1: (300, 100), Weight shape: (300, 100)
Gradient shape for layer2: (100, 10), Weight shape: (100, 10)
Iteration 3/10, Loss: 2.154776096343994
Number of gradients: 3
Gradient shape for layer0: (784, 300), Weight shape: (784, 300)
Gradient shape for layer1: (300, 100), Weight shape: (300, 100)
Gradient shape for layer2: (100, 10), Weight shape: (100, 10)
Iteration 4/10, Loss: 2.093

In [5]:
MODEL_INDEX: int = 0
# Get initial weights
dir: str = f'models/model_{MODEL_INDEX}/initial/'
weight_files = [paths.weights(dir) + f'/layer{i}.npy' for i in range(3)]
mask_files = [paths.masks(dir) + f'/layer{i}.npy' for i in range(3)]

layer_weights = {f'layer{i}': np.load(layer) for i, layer in enumerate(weight_files)}
masks = {f'layer{i}': np.load(layer) for i, layer in enumerate(mask_files)}
# Test loading a model
model: LeNet300 = load_model(MODEL_INDEX, 0, True)

for i in range(3):
    key: str = f'layer{i}'
    # Verify all the layer weights match
    assert np.array_equal(model.weights[key], layer_weights[key])
    # Verify all masks are 1s
    assert np.sum(masks[key]) == masks[key].size


In [6]:
# Test pruning
print([(key, layer.shape) for key, layer in layer_weights.items()])
percents: dict[str: float] = {key: 0.5 for key in layer_weights}
new_masks: dict[str, np.array] = prune_by_percent(percents, masks, layer_weights)
for key in new_masks:
    new_mask: np.array = new_masks[key]
    old_mask: np.array = masks[key]
    assert (old_mask.sum() / 2 - new_mask.sum()) <= 1, f'Doesn\'t match for key {key}'

[('layer0', (784, 300)), ('layer1', (300, 100)), ('layer2', (100, 10))]


In [7]:
# Test experiment
make_dataset: callable = load_and_process_mnist
# Make partial function application giving the model its random seed
make_model: callable = functools.partial(LeNet300, 0)
train_model: callable = train
prune_masks: callable = functools.partial(prune_by_percent, C.PRUNING_PERCENTS)
experiment(make_dataset, make_model, train_model, prune_masks, C.TEST_PRUNING_STEPS)

Training step 0
Model Weights on Pruning Step 0:
{'layer0': <tf.Variable 'layer0_w:0' shape=(784, 300) dtype=float32, numpy=
array([[-0.01095543, -0.03241236,  0.03861747, ..., -0.05164757,
         0.01649918, -0.03801171],
       [ 0.06459107, -0.00970608,  0.03922627, ...,  0.06924228,
        -0.04589269,  0.01476902],
       [ 0.00807161, -0.0618956 , -0.05287512, ...,  0.07366548,
        -0.05605013,  0.00401708],
       ...,
       [-0.06346384,  0.02620716, -0.03625106, ..., -0.04173488,
        -0.03119184,  0.04912826],
       [ 0.05347425,  0.03211229, -0.0073006 , ..., -0.07353579,
         0.02471122,  0.03639831],
       [-0.03244734, -0.06391738,  0.00431321, ...,  0.0412811 ,
        -0.07013982, -0.05800674]], dtype=float32)>, 'layer1': <tf.Variable 'layer1_w:0' shape=(300, 100) dtype=float32, numpy=
array([[-0.09693109,  0.00341514,  0.11638828, ..., -0.10317374,
         0.09409881, -0.09659272],
       [-0.05561571, -0.00772835, -0.12183296, ...,  0.05656306,
     