In [16]:
import tensorflow as tf
tf.keras.backend.set_floatx('float64')
from tensorflow import keras
import numpy.typing as npt
from importlib import reload
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import os

print("Numpy Version:", np.__version__)
print("Tensorflow Version:", tf.__version__)

from tqdm import tqdm
from typing import Dict, Generator, List, Tuple

from src.harness import architecture as arch
from src.harness import dataset as ds
from src.harness import history as hist

from src.metrics.features import *
from src.metrics.synflow import compute_synflow_per_weight

Numpy Version: 1.26.4
Tensorflow Version: 2.17.0


In [17]:
path = os.path.join(os.path.expanduser("~"), "lottery-tickets/experiments/11-04-2024/")
experiment_directories = os.listdir(path)
e = os.path.join(path, experiment_directories[1])
print(e)
experiments = list(hist.get_experiments(e))
e0 = experiments[0]
for trial in experiments[0]:
    trial.seed_weights = lambda x: x
    pass

a = arch.Architecture("lenet", "mnist")
model = a.get_model_constructor()()
model.set_weights([m * w for m, w in zip(trial.masks, trial.initial_weights)])

/users/j/b/jbourde2/lottery-tickets/experiments/11-04-2024/lenet_mnist_0_seed_5_experiments_1_batches_0.025_default_sparsity_lm_pruning_20241102-111614


In [15]:
merged_df.columns

Index(['sparsity', 'size', 'l_size', 'l_rel_size', 'l_sparsity', 'lf_mean',
       'lf_std', 'lf_prop_positive', 'li_mean', 'li_std', 'li_prop_positive',
       'dense', 'bias', 'conv', 'output', 'wf_sign', 'wi_sign', 'wf_val',
       'wi_val', 'wf_mag', 'wi_mag', 'wf_perc', 'wi_perc', 'wf_std', 'wi_std',
       'w_mask', 'wf_synflow', 'wi_synflow', 'arch_lenet', 'dataset_mnist'],
      dtype='object')

In [18]:
df_path = "weightabase.pkl"
merged_df = pd.read_pickle(df_path)
# corrected_wdf = correct_class_imbalance(wdf)
# merged_df = merge_dfs(tdf, ldf, corrected_wdf)
# merged_df.to_pickle(df_path)

In [9]:
merged_df["wi_mag"].max(), merged_df["wi_mag"].min()

(0.23342691, 0.0)

In [None]:
merged_df["norm_wi_mag"] = merged_df["wi_mag"]

Feature importance ovservations:

- Mask & sign directly tell the model what the outcome is (sign == 0 rather than +/- 1) which gets it perfectly
- The final measures for weights all get >93% accuracy, magnitude gets 98.14%
- Measures of current sparsity get pretty high (layer and overall sparsity both ~78%)
- The initial percentile a weight falls in gets 75% (wi_std gets much worse even though they are the same measure- perhaps the scale being between 0 and 1 makes it easier to train on?)
- All the OHE and initial weight magnitude measures get 57.85% accuracy
    - What is special about this number?
    - Why are the initial metrics (including magnitude) so uninformative?
        - Could a normalization scheme help this?
    - Why does "wi_std" do worse than random chance?

In [6]:
merged_df["wf_sign"].value_counts()

wf_sign
 0.0    2623048
-1.0    1328976
 1.0    1292028
Name: count, dtype: int64

In [231]:
f_features = ["l_sparsity", "l_rel_size", "lf_prop_positive", "wf_std", "wf_perc", "wf_synflow", "wf_sign", "dense", "bias", "conv", "output"]
fX, fY = featurize_db(merged_df, f_features)

print("Creating meta mask model off the final weights")
f_meta = create_meta(fX[0].shape)
f_meta.fit(fX, fY, epochs=3, batch_size=256, validation_split=0.2, shuffle=True)

Creating meta mask model off the final weights
Epoch 1/3


2025-02-22 20:43:58.964395: W tensorflow/core/util/util.cc:161] Not handling type DT_DOUBLE
2025-02-22 20:43:58.964540: W tensorflow/core/util/util.cc:161] Not handling type DT_DOUBLE
2025-02-22 20:43:58.964638: W tensorflow/core/util/util.cc:161] Not handling type DT_DOUBLE
2025-02-22 20:43:59.006219: W tensorflow/core/util/util.cc:161] Not handling type DT_DOUBLE
2025-02-22 20:43:59.006343: W tensorflow/core/util/util.cc:161] Not handling type DT_DOUBLE
2025-02-22 20:43:59.006402: W tensorflow/core/util/util.cc:161] Not handling type DT_DOUBLE


[1m16323/16388[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 647us/step - accuracy: 0.9577 - loss: 0.1832

2025-02-22 20:44:09.988488: W tensorflow/core/util/util.cc:161] Not handling type DT_DOUBLE
2025-02-22 20:44:09.988776: W tensorflow/core/util/util.cc:161] Not handling type DT_DOUBLE
2025-02-22 20:44:09.988834: W tensorflow/core/util/util.cc:161] Not handling type DT_DOUBLE
2025-02-22 20:44:10.000835: W tensorflow/core/util/util.cc:161] Not handling type DT_DOUBLE
2025-02-22 20:44:10.001098: W tensorflow/core/util/util.cc:161] Not handling type DT_DOUBLE
2025-02-22 20:44:10.001156: W tensorflow/core/util/util.cc:161] Not handling type DT_DOUBLE


[1m16388/16388[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 783us/step - accuracy: 0.9579 - loss: 0.1827 - val_accuracy: 1.0000 - val_loss: 2.2399e-04
Epoch 2/3
[1m16388/16388[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 775us/step - accuracy: 1.0000 - loss: 3.1697e-05 - val_accuracy: 1.0000 - val_loss: 4.9822e-06
Epoch 3/3
[1m16388/16388[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 775us/step - accuracy: 1.0000 - loss: 3.2371e-07 - val_accuracy: 1.0000 - val_loss: 3.2796e-07


<keras.src.callbacks.history.History at 0x2b635b506790>

In [204]:
i_features = ["l_sparsity", "l_rel_size", "li_prop_positive", "wi_std", "wi_perc", "wi_synflow", "wi_sign", "dense", "bias", "conv", "output"]

i_features = ["l_sparsity"]
iX, iY = featurize_db(merged_df, i_features)

print("Creating meta mask model off the initial weights")
i_meta = create_meta(iX[0].shape)
history = i_meta.fit(iX, iY, epochs=3, batch_size=256, validation_split=0.2, shuffle=True)


Creating meta mask model off the initial weights
Epoch 1/3


2025-02-22 15:24:41.700584: W tensorflow/core/util/util.cc:161] Not handling type DT_DOUBLE
2025-02-22 15:24:41.700714: W tensorflow/core/util/util.cc:161] Not handling type DT_DOUBLE
2025-02-22 15:24:41.700771: W tensorflow/core/util/util.cc:161] Not handling type DT_DOUBLE
2025-02-22 15:24:41.736803: W tensorflow/core/util/util.cc:161] Not handling type DT_DOUBLE
2025-02-22 15:24:41.736927: W tensorflow/core/util/util.cc:161] Not handling type DT_DOUBLE
2025-02-22 15:24:41.736985: W tensorflow/core/util/util.cc:161] Not handling type DT_DOUBLE


[1m16335/16388[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 607us/step - accuracy: 0.7388 - loss: 0.5134

2025-02-22 15:24:52.044018: W tensorflow/core/util/util.cc:161] Not handling type DT_DOUBLE
2025-02-22 15:24:52.044331: W tensorflow/core/util/util.cc:161] Not handling type DT_DOUBLE
2025-02-22 15:24:52.044399: W tensorflow/core/util/util.cc:161] Not handling type DT_DOUBLE
2025-02-22 15:24:52.056988: W tensorflow/core/util/util.cc:161] Not handling type DT_DOUBLE
2025-02-22 15:24:52.057278: W tensorflow/core/util/util.cc:161] Not handling type DT_DOUBLE
2025-02-22 15:24:52.057343: W tensorflow/core/util/util.cc:161] Not handling type DT_DOUBLE


[1m16388/16388[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 743us/step - accuracy: 0.7389 - loss: 0.5133 - val_accuracy: 0.8183 - val_loss: 0.4447
Epoch 2/3
[1m16388/16388[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 739us/step - accuracy: 0.7807 - loss: 0.4554 - val_accuracy: 0.8183 - val_loss: 0.4445
Epoch 3/3
[1m16388/16388[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 737us/step - accuracy: 0.7805 - loss: 0.4554 - val_accuracy: 0.8183 - val_loss: 0.4445


In [187]:
masks, accuracies = make_meta_mask(i_meta, make_x, "lenet", "mnist", 50)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.0911 - loss: 2.3523
Step 0 accuracy: 9.73%


  saveable.load_own_variables(weights_store.get(inner_path))
  w_std = [(w - l_mean) / l_std for w, l_mean, l_std in zip(l_std, l_mean, masked_weights)]


ValueError: Exception encountered when calling Sequential.call().

[1mInput 0 of layer "dense_607" is incompatible with the layer: expected axis -1 of input shape to have value 1, but received input with shape (266610, 11)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(266610, 11), dtype=float64)
  • training=False
  • mask=None