In [1]:
#imports

import os
import numpy as np
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from tensorflow.keras import layers, models, callbacks
from tensorflow.keras.callbacks import Callback


In [2]:
#Loading the dataset
mnist = keras.datasets.mnist
(x_full, y_full), (x_test_full, y_test_full) = mnist.load_data()

In [3]:
# Training Data
'''
TRAINING DATA
5400 - ones/zeros
5400 - other 
    [600 * 9 of 1/0 to 9]
'''

x_train_0 = x_full[y_full == 0][:5400]
y_train_0 = np.ones((5400,))
print("shape_training_0 =>")
print("\t",x_train_0.shape)
print("\t",y_train_0.shape)

x_train_others = []
y_train_others = []
for digit in range(1,10):
    x_digit = x_full[y_full == digit][:600]
    y_digit = np.zeros((600,))  
    x_train_others.append(x_digit)
    y_train_others.append(y_digit)
print("lenght_training_other =>")
print("\t",len(x_train_others),len(y_train_others))

x_train_others = np.concatenate(x_train_others, axis=0)
y_train_others = np.concatenate(y_train_others, axis=0)
print("shape_training_other =>")
print("\t",x_train_others.shape)
print("\t",y_train_others.shape)

x_train = np.concatenate([x_train_0, x_train_others], axis=0)
y_train = np.concatenate([y_train_0, y_train_others], axis=0)
x_train, y_train = shuffle(x_train, y_train, random_state=42)
print("shape_training_complete =>")
print("\t",x_train.shape)
print("\t",y_train.shape)


shape_training_0 =>
	 (5400, 28, 28)
	 (5400,)
lenght_training_other =>
	 9 9
shape_training_other =>
	 (5400, 28, 28)
	 (5400,)
shape_training_complete =>
	 (10800, 28, 28)
	 (10800,)


In [4]:
# Testing Data
'''
TESTING DATA
810 - ones/zeros
810 - other 
    [90 * 9 of 1/0 to 9]
'''

x_test_0 = x_test_full[y_test_full == 0][:810]
y_test_0 = np.ones((810,))
print("shape_testing_0 =>")
print("\t",x_test_0.shape)
print("\t",y_test_0.shape)

x_test_others = []
y_test_others = []
for digit in range(1,10):
    x_digit = x_test_full[y_test_full == digit][:90]
    y_digit = np.zeros((90,))  
    x_test_others.append(x_digit)
    y_test_others.append(y_digit)
print("lenght_testing_other =>")
print("\t",len(x_test_others),len(y_test_others))

x_test_others = np.concatenate(x_test_others, axis=0)
y_test_others = np.concatenate(y_test_others, axis=0)
print("shape_testing_other =>")
print("\t",x_test_others.shape)
print("\t",y_test_others.shape)


x_test = np.concatenate([x_test_0, x_test_others], axis=0)
y_test = np.concatenate([y_test_0, y_test_others], axis=0)
x_test, y_test = shuffle(x_test, y_test, random_state=42)
print("shape_testing_complete =>")
print("\t",x_test.shape)
print("\t",y_test.shape)


shape_testing_0 =>
	 (810, 28, 28)
	 (810,)
lenght_testing_other =>
	 9 9
shape_testing_other =>
	 (810, 28, 28)
	 (810,)
shape_testing_complete =>
	 (1620, 28, 28)
	 (1620,)


In [5]:
# Define the model
model = models.Sequential()

model.add(layers.Conv2D(1, (9, 9), padding='valid', input_shape=(28, 28, 1)))
model.add(layers.Conv2D(1, (7, 7), padding='valid'))
model.add(layers.Conv2D(1, (5, 5), padding='valid'))
model.add(layers.Conv2D(1, (3, 3), padding='valid'))


model.add(layers.Flatten())
model.add(layers.Dense(16, activation='relu'))  
model.add(layers.Dense(4, activation='relu'))  
model.add(layers.Dense(1, activation='sigmoid'))  

model.compile(
    optimizer='adam',               
    loss='binary_crossentropy',    
    metrics=['accuracy']           
)

model.summary()

  super().__init__(


In [6]:
model.layers[7].get_weights()[0]

array([[ 0.0323149 ],
       [ 0.2769996 ],
       [-0.23033816],
       [-0.64303297]], dtype=float32)

In [7]:
def load_model_weights(model, weight_dir="../../generateModelWeights_week4/InitWeightsSeriesConverge"):
    
    for layer_num, layer in enumerate(model.layers):
        file_path = os.path.join(weight_dir, f"layer_number_{layer_num}.npz")
        if os.path.exists(file_path):
            loaded = np.load(file_path)
            weights = [loaded[key] for key in loaded]
            if layer.weights: 
                layer.set_weights(weights)
                print(f"Loaded weights for layer {layer_num} from {file_path}")
            else:
                print(f"Layer {layer_num} has no weights.")
        else:
            print(f"No weights found for layer {layer_num}.")

load_model_weights(model)

Loaded weights for layer 0 from ../../generateModelWeights_week4/InitWeightsSeriesConverge\layer_number_0.npz
Loaded weights for layer 1 from ../../generateModelWeights_week4/InitWeightsSeriesConverge\layer_number_1.npz
Loaded weights for layer 2 from ../../generateModelWeights_week4/InitWeightsSeriesConverge\layer_number_2.npz
Loaded weights for layer 3 from ../../generateModelWeights_week4/InitWeightsSeriesConverge\layer_number_3.npz
No weights found for layer 4.
Loaded weights for layer 5 from ../../generateModelWeights_week4/InitWeightsSeriesConverge\layer_number_5.npz
Loaded weights for layer 6 from ../../generateModelWeights_week4/InitWeightsSeriesConverge\layer_number_6.npz
Loaded weights for layer 7 from ../../generateModelWeights_week4/InitWeightsSeriesConverge\layer_number_7.npz


In [8]:
'''
Should match
array([[ 0.7231482 ],
       [-0.81789035],
       [-0.07746065],
       [-0.6027653 ]], dtype=float32)
'''
model.layers[7].get_weights()[0]

array([[ 0.7231482 ],
       [-0.81789035],
       [-0.07746065],
       [-0.6027653 ]], dtype=float32)

In [9]:
initial_kernels = [layer.get_weights()[0] for layer in model.layers if len(layer.get_weights()) > 0]


In [10]:
class SaveWeightsCallback(callbacks.Callback):
    def __init__(self, save_dir):
        super(SaveWeightsCallback, self).__init__()
        self.save_dir = save_dir

    def on_epoch_end(self, epoch, logs=None):
        epoch_dir = os.path.join(self.save_dir, f"AfterEpoch{epoch + 1}")
        if not os.path.exists(epoch_dir):
            os.makedirs(epoch_dir)
        
        for layer_num, layer in enumerate(self.model.layers):
            if layer.weights:  # Only save weights for layers that have weights
                file_path = os.path.join(epoch_dir, f"Layer_{layer_num}.npz")
                weights = layer.get_weights()
                np.savez(file_path, *weights)
                print(f"Saved weights for layer {layer_num} to {file_path}")


In [11]:
save_dir = "./ZeroModelWeightsTraining"
save_weights_callback = SaveWeightsCallback(save_dir)

In [12]:
history = model.fit(
    x_train, y_train,
    epochs=10,  
    callbacks=[save_weights_callback]
)

Epoch 1/10
[1m336/338[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.6986 - loss: 1.2816Saved weights for layer 0 to ./ZeroModelWeightsTraining\AfterEpoch1\Layer_0.npz
Saved weights for layer 1 to ./ZeroModelWeightsTraining\AfterEpoch1\Layer_1.npz
Saved weights for layer 2 to ./ZeroModelWeightsTraining\AfterEpoch1\Layer_2.npz
Saved weights for layer 3 to ./ZeroModelWeightsTraining\AfterEpoch1\Layer_3.npz
Saved weights for layer 5 to ./ZeroModelWeightsTraining\AfterEpoch1\Layer_5.npz
Saved weights for layer 6 to ./ZeroModelWeightsTraining\AfterEpoch1\Layer_6.npz
Saved weights for layer 7 to ./ZeroModelWeightsTraining\AfterEpoch1\Layer_7.npz
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.6992 - loss: 1.2765
Epoch 2/10
[1m332/338[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.8566 - loss: 0.3304Saved weights for layer 0 to ./ZeroModelWeightsTraining\AfterEpoch2\Layer_0.npz
Saved weights

In [13]:
for epoch, accuracy in enumerate(history.history['accuracy'], 1):
    print(f"Epoch {epoch}: Accuracy = {accuracy:.4f}")


Epoch 1: Accuracy = 0.7669
Epoch 2: Accuracy = 0.8718
Epoch 3: Accuracy = 0.9242
Epoch 4: Accuracy = 0.9544
Epoch 5: Accuracy = 0.9631
Epoch 6: Accuracy = 0.9677
Epoch 7: Accuracy = 0.9697
Epoch 8: Accuracy = 0.9741
Epoch 9: Accuracy = 0.9771
Epoch 10: Accuracy = 0.9803


In [14]:
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f'Test accuracy: {test_acc}')

[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9727 - loss: 0.0615
Test accuracy: 0.9771605134010315


In [15]:
final_kernels = [layer.get_weights()[0] for layer in model.layers if len(layer.get_weights()) > 0]


In [16]:
def save_kernels(before_weights, after_weights, layer_num, vmin, vmax):
    fig, axes = plt.subplots(1, 2, figsize=(10, 5))
    
    # Heatmap for weights before training
    sns.heatmap(before_weights, ax=axes[0], cmap="coolwarm", cbar=False, vmin=vmin, vmax=vmax, annot=True)
    axes[0].set_title(f'Layer {layer_num} - Before Training')

    # Heatmap for weights after training
    sns.heatmap(after_weights, ax=axes[1], cmap="coolwarm", cbar=False, vmin=vmin, vmax=vmax, annot=True)
    axes[1].set_title(f'Layer {layer_num} - After Training')

    # Save the figure
    plt.savefig(f'kernel_{layer_num}.png')
    plt.close()

In [17]:
for i in range(4):
    
    # Extract kernels for the first input channel (assuming input has only 1 channel)
    initial_kernel = initial_kernels[i][:, :, 0, 0]  # Extract the 2D kernel
    final_kernel = final_kernels[i][:, :, 0, 0]
    
    # Ensure the same color range for both heatmaps
    vmin = min(initial_kernel.min(), final_kernel.min())
    vmax = max(initial_kernel.max(), final_kernel.max())

    # Save the comparison as an image file with consistent color range
    save_kernels(initial_kernel, final_kernel, i + 1, vmin, vmax)