In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import save_model, load_model

In [2]:
def get_mask(pruned_model):
    mask = []
    for i in range(1,len(pruned_model.layers)):
        weights = np.array((pruned_model.layers[i].get_weights()[0] != 0)*1.0)
        biases = np.array((pruned_model.layers[i].get_weights()[1] != 0)*1.0)
        layer = [weights, biases]
        mask.append(layer)
    return mask

def set_model(init_model,pruned_model):
    mask = get_mask(pruned_model)
    for i in range(1,len(init_model.layers)):
        layer = []
        weights = init_model.layers[i].get_weights()[0]
        biases = init_model.layers[i].get_weights()[1]
        pruned_model.layers[i].set_weights([np.where(mask[i-1][0] == 0, 0, weights), biases])# pruning doesn't zero out biases
    return pruned_model                                                                      # so we just copy the init-biases

In [3]:
# all subsequent MINE functions work as above, just need different distributions and their dimensions

def get_mine_x_d1(model):
    model_1 = keras.models.clone_model(model) # only 1 model copy needed cause we only predict one distrib.
    
    distrib_x = x_train # first distribution is the input
    
    inputs = keras.Input(shape=(784,), name="digits")
    outputs = model_1.layers[1](inputs)
    distrib_y_model = keras.Model(inputs=inputs, outputs=outputs) # second distribution here is outputs Dense_1
    
    distrib_y_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1.2e-3), # Adam optimizer, lr=0.0012
    # Loss function to minimize
    loss=keras.losses.SparseCategoricalCrossentropy(), # multi-class classification loss function
    # List of metrics to monitor
    metrics=[keras.metrics.SparseCategoricalAccuracy()],
    )
    distrib_y = distrib_y_model.predict_on_batch(x_train) # d1_distribution are predicted outputs of Dense_1 given x_train
    
    mine = MINE(x_dim=input_dim, y_dim=d1_dim) # statistics network construction
    fit_loss_history, mutual_info = mine.fit(distrib_x, distrib_y, epochs=10, batch_size=128) # MI estimation
    
    return fit_loss_history, mutual_info

In [4]:
def get_mine_x_d2(model):
    
    model_1 = keras.models.clone_model(model) # distrib. 1 is Input, hence only need to predict the second
    
    distrib_x = x_train # first distribution is the input
    
    inputs = keras.Input(shape=(784,), name="digits")
    x = model_1.layers[1](inputs)
    outputs = model_1.layers[2](x)
    distrib_y_model = keras.Model(inputs=inputs, outputs=outputs) # second distribution here is outputs of Dense_2
    
    distrib_y_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1.2e-3), # Adam optimizer, lr=0.0012
    # Loss function to minimize
    loss=keras.losses.SparseCategoricalCrossentropy(), # multi-class classification loss function
    # List of metrics to monitor
    metrics=[keras.metrics.SparseCategoricalAccuracy()],
    )
    distrib_y = distrib_y_model.predict_on_batch(x_train) # d2_distribution are predicted outputs of Dense_2 given x_train
    
    mine = MINE(x_dim=input_dim, y_dim=d2_dim) # statistics network construction
    fit_loss_history, mutual_info = mine.fit(distrib_x, distrib_y, epochs=10, batch_size=128) # MI estimation
    
    return fit_loss_history, mutual_info

In [5]:
def get_mine_x_o(model):
    model_1 = keras.models.clone_model(model) # will predict second distrib. one copy of full model for outputs predictions
    
    distrib_x = x_train # first distribution is the input
    
    distrib_y_model = model_1 # second distribution here is model outputs
    
    distrib_y_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1.2e-3), # Adam optimizer, lr=0.0012
    # Loss function to minimize
    loss=keras.losses.SparseCategoricalCrossentropy(), # multi-class classification loss function
    # List of metrics to monitor
    metrics=[keras.metrics.SparseCategoricalAccuracy()],
    )
    distrib_y = distrib_y_model.predict_on_batch(x_train) # predicted model outputs given x_train
    
    mine = MINE(x_dim=input_dim, y_dim=output_dim) # statistics network construction
    fit_loss_history, mutual_info = mine.fit(distrib_x, distrib_y, epochs=10, batch_size=128) # MI estimation
    
    return fit_loss_history, mutual_info

In [6]:
def get_mine_d1_d2(model):
    model_1 = keras.models.clone_model(model) # copies of model for distibution predictions on x_input
    model_2 = keras.models.clone_model(model)
    
    inputs = keras.Input(shape=(784,), name="digits") 
    outputs = model_1.layers[1](inputs) # first distribution are outputs of first hidden layer
    distrib_x_model = keras.Model(inputs=inputs, outputs=outputs)
    
    distrib_x_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1.2e-3), # Adam optimizer, lr=0.0012
    # Loss function to minimize
    loss=keras.losses.SparseCategoricalCrossentropy(), # multi-class classification loss function
    # List of metrics to monitor
    metrics=[keras.metrics.SparseCategoricalAccuracy()],
    )
    distrib_x = distrib_x_model.predict_on_batch(x_train) # use predict function to create layer output
    
    inputs = keras.Input(shape=(784,), name="digits") # second distribution, outputs of second hidden layer, same as above
    x = model_2.layers[1](inputs)
    outputs = model_2.layers[2](x)
    distrib_y_model = keras.Model(inputs=inputs, outputs=outputs)
    
    distrib_y_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1.2e-3), # Adam optimizer, lr=0.0012
    # Loss function to minimize
    loss=keras.losses.SparseCategoricalCrossentropy(), # multi-class classification loss function
    # List of metrics to monitor
    metrics=[keras.metrics.SparseCategoricalAccuracy()],
    )
    distrib_y = distrib_y_model.predict_on_batch(x_train) # d2_layer outputs
    
    mine = MINE(x_dim=d1_dim, y_dim=d2_dim) # construction of statistics network for MI estimation, giving distibution dimensions
    fit_loss_history, mutual_info = mine.fit(distrib_x, distrib_y, epochs=10, batch_size=128) # estimate MI on distibution samples
    
    return fit_loss_history, mutual_info

In [7]:
def get_mine_d1_o(model):
    model_1 = keras.models.clone_model(model) # copies of model for distibution predictions on x_input
    model_2 = keras.models.clone_model(model)
    
    inputs = keras.Input(shape=(784,), name="digits") 
    outputs = model_1.layers[1](inputs) # first distribution are outputs of first hidden layer
    distrib_x_model = keras.Model(inputs=inputs, outputs=outputs)
    
    distrib_x_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1.2e-3), # Adam optimizer, lr=0.0012
    # Loss function to minimize
    loss=keras.losses.SparseCategoricalCrossentropy(), # multi-class classification loss function
    # List of metrics to monitor
    metrics=[keras.metrics.SparseCategoricalAccuracy()],
    )
    distrib_x = distrib_x_model.predict_on_batch(x_train) # use predict function to create layer output
    
    distrib_y_model = model_2 # distrib. 2 are the model outputs
    
    distrib_y_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1.2e-3), # Adam optimizer, lr=0.0012
    # Loss function to minimize
    loss=keras.losses.SparseCategoricalCrossentropy(), # multi-class classification loss function
    # List of metrics to monitor
    metrics=[keras.metrics.SparseCategoricalAccuracy()],
    )
    distrib_y = distrib_y_model.predict_on_batch(x_train) # d2_layer outputs
    
    mine = MINE(x_dim=d1_dim, y_dim=output_dim) # construction of statistics network for MI estimation, giving distibution dimensions
    fit_loss_history, mutual_info = mine.fit(distrib_x, distrib_y, epochs=10, batch_size=128) # estimate MI on distibution samples
    
    return fit_loss_history, mutual_info

In [8]:
def get_mine_d2_o(model):
    model_1 = keras.models.clone_model(model) # copies of model for distibution predictions on x_input
    model_2 = keras.models.clone_model(model)
    
    inputs = keras.Input(shape=(784,), name="digits") 
    x = model_1.layers[1](inputs)
    outputs = model_1.layers[2](x) # first distribution are outputs of second hidden layer
    distrib_x_model = keras.Model(inputs=inputs, outputs=outputs)
    
    distrib_x_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1.2e-3), # Adam optimizer, lr=0.0012
    # Loss function to minimize
    loss=keras.losses.SparseCategoricalCrossentropy(), # multi-class classification loss function
    # List of metrics to monitor
    metrics=[keras.metrics.SparseCategoricalAccuracy()],
    )
    distrib_x = distrib_x_model.predict_on_batch(x_train) # use predict function to create layer output
    
    distrib_y_model = model_2 # distrib. 2 are the model outputs
    
    distrib_y_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1.2e-3), # Adam optimizer, lr=0.0012
    # Loss function to minimize
    loss=keras.losses.SparseCategoricalCrossentropy(), # multi-class classification loss function
    # List of metrics to monitor
    metrics=[keras.metrics.SparseCategoricalAccuracy()],
    )
    distrib_y = distrib_y_model.predict_on_batch(x_train) # d2_layer outputs
    
    mine = MINE(x_dim=d2_dim, y_dim=output_dim) # construction of statistics network for MI estimation, giving distibution dimensions
    fit_loss_history, mutual_info = mine.fit(distrib_x, distrib_y, epochs=10, batch_size=128) # estimate MI on distibution samples
    
    return fit_loss_history, mutual_info

In [9]:
# just a quick function to iteratively determine sparsity level given a certain number of pruning operations with a given rate
def calc_sparsity(iteration, pruning_rate):
    sparsity = 100 * (1 - pruning_rate) ** (iteration+1)
    return 100-sparsity

In [10]:
def calculate_average(list_of_lists):
    average = np.average(list_of_lists, axis=0)
    
    return average

In [11]:
# takes a list of loss_scores and averages out the dimension of the number of experiments -> 10 runs > average loss of 10 runs
def calculate_average_loss(losses_list):
    avg_losses = np.array(losses_list)
    avg_losses = np.average(avg_losses, axis=0)

    return avg_losses