In [None]:
import tensorflow as tf
tf.keras.backend.set_floatx('float64')
from tensorflow import keras
import numpy.typing as npt
from importlib import reload
import numpy as np
from matplotlib import pyplot as plt
import matplotlib.ticker as mtick
import pandas as pd
from pprint import pprint
import os

print("Numpy Version:", np.__version__)
print("Tensorflow Version:", tf.__version__)

from tqdm import tqdm
from typing import Dict, Generator, List, Tuple

from src.harness import architecture as arch
from src.harness import dataset as ds
from src.harness import meta
from src.harness import history as hist

from src.metrics import features as f
from src.metrics.synflow import compute_synflow_per_weight

In [None]:
reload(f)

epath = "/users/j/b/jbourde2/lottery-tickets/experiments/11-04-2024/lenet_mnist_0_seed_5_experiments_1_batches_0.025_default_sparsity_lm_pruning_20241102-111614"
experiments = list(hist.get_experiments(epath))
e0 = experiments[0]
t0 = next(e0)
t0.seed_weights = lambda x: x

In [None]:
df_path = "weightabase.pkl"
merged_df = pd.read_pickle(df_path)
# corrected_wdf = correct_class_imbalance(wdf)
# merged_df = merge_dfs(tdf, ldf, corrected_wdf)
# merged_df.to_pickle(df_path)

In [None]:
merged_df.to_pickle("weightabase.pkl")

Feature importance ovservations:

- Mask & sign directly tell the model what the outcome is (sign == 0 rather than +/- 1) which gets it perfectly
- The final measures for weights all get >93% accuracy, magnitude gets 98.14%
- Measures of current sparsity get pretty high (layer and overall sparsity both ~78%)
- The initial percentile a weight falls in gets 75% (wi_std gets much worse even though they are the same measure- perhaps the scale being between 0 and 1 makes it easier to train on?)
- All the OHE and initial weight magnitude measures get 57.85% accuracy
    - What is special about this number?
    - Why are the initial metrics (including magnitude) so uninformative?
        - Could a normalization scheme help this?
    - Why does "wi_std" do worse than random chance?

In [None]:
from copy import copy as shallowcopy
import numpy as np
import numpy.typing as npt
import tensorflow as tf
from tensorflow import keras
from typing import Callable, List, Tuple

def make_meta_mask(
    meta: keras.Model,
    make_x: Callable[[str, str, keras.Model, List[npt.NDArray]], npt.NDArray],
    architecture: str,
    dataset: str,
    steps: int,
) -> Tuple[List[npt.NDArray], List[float]]:
    a = arch.Architecture(architecture, dataset)
    _, val_X, _, val_Y = a.load_data()
    model = a.get_model_constructor()()
    original_weights = copy.deepcopy(model.get_weights())
    model.compile(optimizer="Adam", loss=tf.keras.losses.CategoricalCrossentropy(), metrics=["accuracy"])
    masks = [np.ones_like(w) for w in model.get_weights()]
    
    def update_masks(mask_pred: npt.NDArray) -> List[npt.NDArray]:
        start = 0
        end = 0
        new_masks = []
        nonlocal masks
        for m in masks:
            end += m.size
            new_m = np.reshape(mask_pred[start:end], m.shape)
            new_masks.append(new_m)
            start = end
        return masks
            
    accuracies = []
    for step in range(steps):
        # Get validation accuracy
        _, accuracy = model.evaluate(val_X, val_Y)
        accuracies.append(accuracy)
        print(f"Step {step} accuracy: {accuracy:.2%}")
        # Extract features
        X = make_x(architecture, model, masks)
        # Predict and replace existing mask
        mask_pred = meta.predict(X, batch_size=2**20)
        masks = update_masks(mask_pred)
        model.set_weights([w * m for w, m in zip(original_weights, masks)])
        
    return masks, accuracies


def make_x(
    architecture: str,
    model: keras.Model,
    masks: List[npt.NDArray],
    train_steps: int = 0,
    batch_size: int = 32,
) -> npt.NDArray:
    # Layer features:
    nparams = sum(map(np.size, masks))
    nfeatures = 11
    features = np.zeros((nparams, nfeatures))
    
    # Helper functions to add the unrolled weight values and
    # scalar layer values to the feature matrix
    n = 0
    def add_layer_features(layer_values: List[float]):
        nonlocal n
        start = 0
        end = 0
        for v, size in zip(layer_values, map(np.size, masks)):
            end += size
            features[start:end, n] = v
            start = end
        n += 1
        
    def add_weight_features(weight_features: List[npt.NDArray]):
        nonlocal n
        start = 0
        end = 0
        for v in weight_features:
            end += v.size
            features[start:end, n] = np.ravel(v)
            start = end
        n += 1
    
    # Make a separate copy to compute synflow for
    masked_weights = [w * m for w, m in zip(model.get_weights(), masks)]
    masked_model = shallowcopy(model)
    masked_model.set_weights(masked_weights)
    synflow_scores = [np.reshape(scores, -1) for scores in compute_synflow_per_weight(masked_model)]
    
    # Mask features
    sparsities = [np.count_nonzero(m) / np.size(m) for m in masks]
    rel_size = [np.size(m) / nparams for m in masks]
    prop_pos = [np.count_nonzero(w >= 0) for w in masks]
    
    # Layer type
    layer_ohe = arch.Architecture.ohe_layer_types(architecture)
    for values in [sparsities, rel_size, prop_pos]:
        add_layer_features(values)
    
    # Weight features
    l_std = [np.std(w) for w in masked_weights]
    l_mean = [np.mean(w) for w in masked_weights]
    l_sorted = [np.sort(np.ravel(w)) for w in masked_weights]
    
    w_std = [(w - l_mean) / l_std for w, l_mean, l_std in zip(l_std, l_mean, masked_weights)]
    w_sign = [np.sign(w) for w in masked_weights]
    num_nonzero = sum(map(np.count_nonzero, masks))
    num_zero = nparams - num_nonzero
    w_perc = np.array([
        np.argmax(np.ravel(v) < v_sorted) - num_zero 
        for v, v_sorted in zip(masked_weights, l_sorted)]
    ) / num_nonzero
    
    flat_masks = [np.ravel(m) for m in masks]
    for values in [w_std, w_perc, synflow_scores, w_sign]:
        add_weight_features(values)
    
    for values in [layer_ohe[:, i] for i in range(layer_ohe.shape[1])]:
        add_layer_features(values)
        
    return features
