In [None]:
import tensorflow as tf
import keras
from keras import layers
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
!pip install tensorflow-model-optimization
from tensorflow_model_optimization.python.core.sparsity.keras import prune, pruning_callbacks
import tensorflow_model_optimization as tfmot
from tensorflow_model_optimization.sparsity import keras as sparsity
from tensorflow.keras.models import load_model
from google.colab import files
import shutil
import gc

np.random.seed(42)
tf.random.set_seed(42)



In [None]:
# Load and preprocess MNIST data
num_classes = 10
input_shape = (28, 28, 1)

# Load the data and split it between train and test sets
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255
# Make sure images have shape (28, 28, 1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")


# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)



x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


In [None]:
model_name = 'LeNet'
dataset = 'MNIST'

---

# Distillation

In [None]:
def train_custom(model, train_data, train_labels, test_data, test_labels):
    initailsoftmax = model.student.predict(test_data)
    model.student.save_weights(init_weights)
    model.student.save(init_model)
    initaildf = pd.DataFrame(initailsoftmax)
    filename = filepath + '0_softmax.csv'
    initaildf.to_csv(filename,index=False)
    epoch = 1
    while epoch < 26:
        print(f"Epoch {epoch}:")

        # Training on one epoch
        model.fit(train_data, train_labels, epochs=1, batch_size=64, verbose=1)

        # Evaluate on the test dataset
        results = model.student.predict(test_data)
        softmax_df = pd.DataFrame(results)
        filename = filepath+str(epoch)+'_softmax.csv'
        softmax_df.to_csv(filename,index=False)
        loss,accuracy = model.student.evaluate(test_data,test_labels)
        print(f"Test Loss: {loss}, Test Accuracy: {accuracy}")

        if epoch ==25:
          model.student.save_weights(final_weights)
          model.student.save(final_model)
        epoch += 1

# TO DO EVALUATE FUNCTION


In [None]:
class Distiller(tf.keras.Model):
    def __init__(self, student, teacher):
        super().__init__()
        self.teacher = teacher
        self.student = student

    def compile(
        self,
        optimizer,
        metrics,
        student_loss_fn,
        distillation_loss_fn,
        alpha,
        temperature,
    ):
        super().compile(optimizer=optimizer, metrics=metrics)
        self.student_loss_fn = student_loss_fn
        self.distillation_loss_fn = distillation_loss_fn
        self.alpha = alpha
        self.temperature = temperature

    def train_step(self, data):
        # Unpack data
        x, y = data

        # Forward pass of teacher
        teacher_predictions = self.teacher(x, training=False)

        with tf.GradientTape() as tape:
            # Forward pass of student
            student_predictions = self.student(x, training=True)

            # Compute losses
            student_loss = self.student_loss_fn(y, student_predictions)

            distillation_loss = (
                self.distillation_loss_fn(
                    tf.nn.softmax(teacher_predictions / self.temperature, axis=1),
                    tf.nn.softmax(student_predictions / self.temperature, axis=1),
                )
                * self.temperature**2
            )

            loss = self.alpha * student_loss + (1 - self.alpha) * distillation_loss

        # Compute gradients
        trainable_vars = self.student.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)

        # Update weights
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))

        # Update the metrics configured in `compile()`.
        self.compiled_metrics.update_state(y, student_predictions)

        # Return a dict of performance
        results = {m.name: m.result() for m in self.metrics}
        results.update(
            {"student_loss": student_loss, "distillation_loss": distillation_loss}
        )

        return results

    def test_step(self, data):
        # Unpack the data
        x, y = data

        # Compute predictions
        y_prediction = self.student(x, training=False)
        print(y_prediction)
        # Calculate the loss
        student_loss = self.student_loss_fn(y, y_prediction)

        # Update the metrics.
        self.compiled_metrics.update_state(y, y_prediction)

        # Return a dict of performance
        results = {m.name: m.result() for m in self.metrics}
        results.update({"student_loss": student_loss})
        return results

In [None]:
teacher_model_path = '/content/base/LeNetMNIST_final.tf'
techer_model = load_model(teacher_model_path)

# Student 0.1 Alpha (1)

In [None]:
model_path = '/content/base/LeNetMNIST_init.tf'
student = load_model(model_path)
student_scratch = tf.keras.models.clone_model(student)
distiller = Distiller(student=student, teacher=techer_model)
distiller.compile(
    optimizer=tf.keras.optimizers.Adamax(learning_rate=0.001),
    metrics=[tf.keras.metrics.CategoricalCrossentropy()],
    student_loss_fn=tf.keras.losses.CategoricalCrossentropy(),
    distillation_loss_fn=tf.keras.losses.KLDivergence(),
    alpha=0.1,
    temperature=3,
)
filepath = '/content/student_same/'
init_weights = filepath + model_name + dataset + '_init_weights.tf'
init_model = filepath + model_name + dataset + '_init.tf'
final_weights = filepath + model_name + dataset + '_final_weights.tf'
final_model= filepath + model_name + dataset + 'final.tf'
!mkdir '/content/student_same/'
train_custom(distiller, x_train,y_train,x_test,y_test)

Epoch 1:
Test Loss: 0.07424774765968323, Test Accuracy: 0.9771000146865845
Epoch 2:
Test Loss: 0.05483292415738106, Test Accuracy: 0.983299970626831
Epoch 3:
Test Loss: 0.048971135169267654, Test Accuracy: 0.9843000173568726
Epoch 4:
Test Loss: 0.04400905966758728, Test Accuracy: 0.9854000210762024
Epoch 5:
Test Loss: 0.04068867489695549, Test Accuracy: 0.9871000051498413
Epoch 6:
Test Loss: 0.03888789936900139, Test Accuracy: 0.9872999787330627
Epoch 7:
Test Loss: 0.036325279623270035, Test Accuracy: 0.9879999756813049
Epoch 8:
Test Loss: 0.03467479720711708, Test Accuracy: 0.9884999990463257
Epoch 9:
Test Loss: 0.032892655581235886, Test Accuracy: 0.9894000291824341
Epoch 10:
Test Loss: 0.03322526067495346, Test Accuracy: 0.9896000027656555
Epoch 11:
Test Loss: 0.03330741822719574, Test Accuracy: 0.9894999861717224
Epoch 12:
Test Loss: 0.03321162611246109, Test Accuracy: 0.9896000027656555
Epoch 13:
Test Loss: 0.032024212181568146, Test Accuracy: 0.9901000261306763
Epoch 14:
Test Los

In [None]:
shutil.make_archive("student_same", 'zip', "student_same")
files.download("student_same.zip")

# Student 0.1 Alpha (2)

In [None]:
np.random.seed(24)
tf.random.set_seed(24)

model_path = '/content/base/LeNetMNIST_init.tf'
student = load_model(model_path)
student_scratch = tf.keras.models.clone_model(student)

distiller = Distiller(student=student, teacher=techer_model)
distiller.compile(
    optimizer=tf.keras.optimizers.Adamax(learning_rate=0.001),
    metrics=[tf.keras.metrics.CategoricalCrossentropy()],
    student_loss_fn=tf.keras.losses.CategoricalCrossentropy(),
    distillation_loss_fn=tf.keras.losses.KLDivergence(),
    alpha=0.1,
    temperature=3,
)
filepath = '/content/student_same_2/'
init_weights = filepath + model_name + dataset + '_init_weights.tf'
init_model = filepath + model_name + dataset + '_init.tf'
final_weights = filepath + model_name + dataset + '_final_weights.tf'
final_model= filepath + model_name + dataset + 'final.tf'
!mkdir '/content/student_same_2/'
train_custom(distiller, x_train,y_train,x_test,y_test)

Epoch 1:
Test Loss: 0.07480966299772263, Test Accuracy: 0.9778000116348267
Epoch 2:
Test Loss: 0.05010770261287689, Test Accuracy: 0.9846000075340271
Epoch 3:
Test Loss: 0.040816713124513626, Test Accuracy: 0.9868000149726868
Epoch 4:
Test Loss: 0.03639257699251175, Test Accuracy: 0.988099992275238
Epoch 5:
Test Loss: 0.03305165469646454, Test Accuracy: 0.9894000291824341
Epoch 6:
Test Loss: 0.030797796323895454, Test Accuracy: 0.989300012588501
Epoch 7:
Test Loss: 0.028368651866912842, Test Accuracy: 0.9894999861717224
Epoch 8:
Test Loss: 0.027927255257964134, Test Accuracy: 0.9897000193595886
Epoch 9:
Test Loss: 0.027299432083964348, Test Accuracy: 0.9901999831199646
Epoch 10:
Test Loss: 0.027795376256108284, Test Accuracy: 0.9898999929428101
Epoch 11:
Test Loss: 0.028765100985765457, Test Accuracy: 0.9901000261306763
Epoch 12:
Test Loss: 0.02999752201139927, Test Accuracy: 0.989799976348877
Epoch 13:
Test Loss: 0.030495095998048782, Test Accuracy: 0.9901999831199646
Epoch 14:
Test L

Zip data

In [None]:
shutil.make_archive("student_same_2", 'zip', "student_same_2")
files.download("student_same_2.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Student 0.1 Alpha (3)

In [None]:
np.random.seed(2)
tf.random.set_seed(2)

model_path = '/content/base/LeNetMNIST_init.tf'
student = load_model(model_path)
student_scratch = tf.keras.models.clone_model(student)

distiller = Distiller(student=student, teacher=techer_model)
distiller.compile(
    optimizer=tf.keras.optimizers.Adamax(learning_rate=0.001),
    metrics=[tf.keras.metrics.CategoricalCrossentropy()],
    student_loss_fn=tf.keras.losses.CategoricalCrossentropy(),
    distillation_loss_fn=tf.keras.losses.KLDivergence(),
    alpha=0.1,
    temperature=3,
)
filepath = '/content/student_same_3/'
init_weights = filepath + model_name + dataset + '_init_weights.tf'
init_model = filepath + model_name + dataset + '_init.tf'
final_weights = filepath + model_name + dataset + '_final_weights.tf'
final_model= filepath + model_name + dataset + 'final.tf'
!mkdir '/content/student_same_3/'
train_custom(distiller, x_train,y_train,x_test,y_test)

Epoch 1:
Test Loss: 0.07702013850212097, Test Accuracy: 0.9757999777793884
Epoch 2:
Test Loss: 0.04685361310839653, Test Accuracy: 0.9839000105857849
Epoch 3:
Test Loss: 0.03800017014145851, Test Accuracy: 0.9872000217437744
Epoch 4:
Test Loss: 0.034115079790353775, Test Accuracy: 0.9890000224113464
Epoch 5:
Test Loss: 0.031365685164928436, Test Accuracy: 0.9894999861717224
Epoch 6:
Test Loss: 0.030122121796011925, Test Accuracy: 0.989799976348877
Epoch 7:
Test Loss: 0.02977604977786541, Test Accuracy: 0.9900000095367432
Epoch 8:
Test Loss: 0.03019726648926735, Test Accuracy: 0.9898999929428101
Epoch 9:
Test Loss: 0.03027428314089775, Test Accuracy: 0.9898999929428101
Epoch 10:
Test Loss: 0.03084123507142067, Test Accuracy: 0.9900000095367432
Epoch 11:
Test Loss: 0.030780022963881493, Test Accuracy: 0.9904000163078308
Epoch 12:
Test Loss: 0.031299516558647156, Test Accuracy: 0.9905999898910522
Epoch 13:
Test Loss: 0.031740907579660416, Test Accuracy: 0.9909999966621399
Epoch 14:
Test L

In [None]:
shutil.make_archive("student_same_3", 'zip', "student_same_3")
files.download("student_same_3.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Student same Alpha 0.5

In [None]:
np.random.seed(42)
tf.random.set_seed(42)

model_path = '/content/base/LeNetMNIST_init.tf'
student = load_model(model_path)
student_scratch = tf.keras.models.clone_model(student)

distiller = Distiller(student=student, teacher=techer_model)
distiller.compile(
    optimizer=tf.keras.optimizers.Adamax(learning_rate=0.001),
    metrics=[tf.keras.metrics.CategoricalCrossentropy()],
    student_loss_fn=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
    distillation_loss_fn=tf.keras.losses.KLDivergence(),
    alpha=0.5,
    temperature=3,
)

filepath = '/content/student_same_50/'
init_weights = filepath + model_name + dataset + '_init_weights.tf'
init_model = filepath + model_name + dataset + '_init.tf'
final_weights = filepath + model_name + dataset + '_final_weights.tf'
final_model= filepath + model_name + dataset + 'final.tf'

!mkdir '/content/student_same_50/'

train_custom(distiller, x_train,y_train,x_test,y_test)

Epoch 1:


  output, from_logits = _get_logits(


Test Loss: 0.07488416135311127, Test Accuracy: 0.9768999814987183
Epoch 2:
Test Loss: 0.05644461140036583, Test Accuracy: 0.9821000099182129
Epoch 3:
Test Loss: 0.049728453159332275, Test Accuracy: 0.9840999841690063
Epoch 4:
Test Loss: 0.04503950849175453, Test Accuracy: 0.9850000143051147
Epoch 5:
Test Loss: 0.04039209708571434, Test Accuracy: 0.9869999885559082
Epoch 6:
Test Loss: 0.038291968405246735, Test Accuracy: 0.9868999719619751
Epoch 7:
Test Loss: 0.03648250177502632, Test Accuracy: 0.9879999756813049
Epoch 8:
Test Loss: 0.035454172641038895, Test Accuracy: 0.9883999824523926
Epoch 9:
Test Loss: 0.03435523435473442, Test Accuracy: 0.9890000224113464
Epoch 10:
Test Loss: 0.03420323133468628, Test Accuracy: 0.9889000058174133
Epoch 11:
Test Loss: 0.033600807189941406, Test Accuracy: 0.9897000193595886
Epoch 12:
Test Loss: 0.033306751400232315, Test Accuracy: 0.989799976348877
Epoch 13:
Test Loss: 0.03252328187227249, Test Accuracy: 0.9901999831199646
Epoch 14:
Test Loss: 0.031

In [None]:
shutil.make_archive("student_same_50", 'zip', "student_same_50")
files.download("student_same_50.zip")

# Student 0.5 (2)

In [None]:
np.random.seed(24)
tf.random.set_seed(24)
model_path = '/content/base/LeNetMNIST_init.tf'
student = load_model(model_path)
student_scratch = tf.keras.models.clone_model(student)

distiller = Distiller(student=student, teacher=techer_model)
distiller.compile(
    optimizer=tf.keras.optimizers.Adamax(learning_rate=0.001),
    metrics=[tf.keras.metrics.CategoricalCrossentropy()],
    student_loss_fn=tf.keras.losses.CategoricalCrossentropy(),
    distillation_loss_fn=tf.keras.losses.KLDivergence(),
    alpha=0.5,
    temperature=3,
)

filepath = '/content/student_same_50_2/'
init_weights = filepath + model_name + dataset + '_init_weights.tf'
init_model = filepath + model_name + dataset + '_init.tf'
final_weights = filepath + model_name + dataset + '_final_weights.tf'
final_model= filepath + model_name + dataset + 'final.tf'
!mkdir '/content/student_same_50_2/'

train_custom(distiller, x_train,y_train,x_test,y_test)

shutil.make_archive("student_same_50_2", 'zip', "student_same_50_2")
files.download("student_same_50_2.zip")

Epoch 1:


  output, from_logits = _get_logits(


Test Loss: 0.07745856791734695, Test Accuracy: 0.9750000238418579
Epoch 2:
Test Loss: 0.04759855195879936, Test Accuracy: 0.9829999804496765
Epoch 3:
Test Loss: 0.038304463028907776, Test Accuracy: 0.9868999719619751
Epoch 4:
Test Loss: 0.03440842777490616, Test Accuracy: 0.988099992275238
Epoch 5:
Test Loss: 0.03192931041121483, Test Accuracy: 0.9886000156402588
Epoch 6:
Test Loss: 0.030151600018143654, Test Accuracy: 0.989799976348877
Epoch 7:
Test Loss: 0.02957168035209179, Test Accuracy: 0.989799976348877
Epoch 8:
Test Loss: 0.029460079967975616, Test Accuracy: 0.9898999929428101
Epoch 9:
Test Loss: 0.03008436970412731, Test Accuracy: 0.989799976348877
Epoch 10:
Test Loss: 0.030622445046901703, Test Accuracy: 0.9900000095367432
Epoch 11:
Test Loss: 0.031191829591989517, Test Accuracy: 0.9901000261306763
Epoch 12:
Test Loss: 0.03182699531316757, Test Accuracy: 0.9904000163078308
Epoch 13:
Test Loss: 0.0325569286942482, Test Accuracy: 0.9905999898910522
Epoch 14:
Test Loss: 0.0343279

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# student_50 (3)

In [None]:
np.random.seed(2)
tf.random.set_seed(2)
model_path = '/content/base/LeNetMNIST_init.tf'
student = load_model(model_path)
student_scratch = tf.keras.models.clone_model(student)

distiller = Distiller(student=student, teacher=techer_model)
distiller.compile(
    optimizer=tf.keras.optimizers.Adamax(learning_rate=0.001),
    metrics=[tf.keras.metrics.CategoricalCrossentropy()],
    student_loss_fn=tf.keras.losses.CategoricalCrossentropy(),
    distillation_loss_fn=tf.keras.losses.KLDivergence(),
    alpha=0.5,
    temperature=3,
)

filepath = '/content/student_same_50_3/'
init_weights = filepath + model_name + dataset + '_init_weights.tf'
init_model = filepath + model_name + dataset + '_init.tf'
final_weights = filepath + model_name + dataset + '_final_weights.tf'
final_model= filepath + model_name + dataset + 'final.tf'
!mkdir '/content/student_same_50_3/'

train_custom(distiller, x_train,y_train,x_test,y_test)

shutil.make_archive("student_same_50_3", 'zip', "student_same_50_3")
files.download("student_same_50_3.zip")

Epoch 1:
Test Loss: 0.07787740230560303, Test Accuracy: 0.9745000004768372
Epoch 2:
Test Loss: 0.047682225704193115, Test Accuracy: 0.9830999970436096
Epoch 3:
Test Loss: 0.03870305046439171, Test Accuracy: 0.9864000082015991
Epoch 4:
Test Loss: 0.034302856773138046, Test Accuracy: 0.988099992275238
Epoch 5:
Test Loss: 0.03222411498427391, Test Accuracy: 0.9890000224113464
Epoch 6:
Test Loss: 0.030788522213697433, Test Accuracy: 0.9902999997138977
Epoch 7:
Test Loss: 0.02985871210694313, Test Accuracy: 0.9898999929428101
Epoch 8:
Test Loss: 0.02978339232504368, Test Accuracy: 0.9904000163078308
Epoch 9:
Test Loss: 0.03005969524383545, Test Accuracy: 0.9901999831199646
Epoch 10:
Test Loss: 0.030875032767653465, Test Accuracy: 0.9900000095367432
Epoch 11:
Test Loss: 0.031501881778240204, Test Accuracy: 0.9904999732971191
Epoch 12:
Test Loss: 0.032929856330156326, Test Accuracy: 0.9904000163078308
Epoch 13:
Test Loss: 0.03383447974920273, Test Accuracy: 0.9904000163078308
Epoch 14:
Test L

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Student same Alpha 0.9

In [None]:
np.random.seed(42)
tf.random.set_seed(42)
model_path = '/content/base/LeNetMNIST_init.tf'
student = load_model(model_path)
student_scratch = tf.keras.models.clone_model(student)

distiller = Distiller(student=student, teacher=techer_model)
distiller.compile(
    optimizer=tf.keras.optimizers.Adamax(learning_rate=0.001),
    metrics=[tf.keras.metrics.CategoricalCrossentropy()],
    student_loss_fn=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
    distillation_loss_fn=tf.keras.losses.KLDivergence(),
    alpha=0.9,
    temperature=3,
)

filepath = '/content/student_same_90/'
init_weights = filepath + model_name + dataset + '_init_weights.tf'
init_model = filepath + model_name + dataset + '_init.tf'
final_weights = filepath + model_name + dataset + '_final_weights.tf'
final_model= filepath + model_name + dataset + 'final.tf'

!mkdir '/content/student_same_90/'
train_custom(distiller, x_train,y_train,x_test,y_test)

Epoch 1:


  output, from_logits = _get_logits(


Test Loss: 0.07502587139606476, Test Accuracy: 0.9769999980926514
Epoch 2:
Test Loss: 0.05474058911204338, Test Accuracy: 0.9825999736785889
Epoch 3:
Test Loss: 0.047644395381212234, Test Accuracy: 0.9842000007629395
Epoch 4:
Test Loss: 0.04341650381684303, Test Accuracy: 0.9861999750137329
Epoch 5:
Test Loss: 0.04079493507742882, Test Accuracy: 0.986299991607666
Epoch 6:
Test Loss: 0.037476472556591034, Test Accuracy: 0.9878000020980835
Epoch 7:
Test Loss: 0.03554585576057434, Test Accuracy: 0.9883000254631042
Epoch 8:
Test Loss: 0.034172993153333664, Test Accuracy: 0.9887999892234802
Epoch 9:
Test Loss: 0.03378065675497055, Test Accuracy: 0.9894000291824341
Epoch 10:
Test Loss: 0.03333618491888046, Test Accuracy: 0.9897000193595886
Epoch 11:
Test Loss: 0.033250514417886734, Test Accuracy: 0.9898999929428101
Epoch 12:
Test Loss: 0.03291836753487587, Test Accuracy: 0.9901999831199646
Epoch 13:
Test Loss: 0.03249417245388031, Test Accuracy: 0.9904000163078308
Epoch 14:
Test Loss: 0.0312

In [None]:
shutil.make_archive("student_same_90", 'zip', "student_same_90")
files.download("student_same.zip_90")

# Student 90 (2)

In [None]:
np.random.seed(24)
tf.random.set_seed(24)
model_path = '/content/base/LeNetMNIST_init.tf'
student = load_model(model_path)
student_scratch = tf.keras.models.clone_model(student)

distiller = Distiller(student=student, teacher=techer_model)
distiller.compile(
    optimizer=tf.keras.optimizers.Adamax(learning_rate=0.001),
    metrics=[tf.keras.metrics.CategoricalCrossentropy()],
    student_loss_fn=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
    distillation_loss_fn=tf.keras.losses.KLDivergence(),
    alpha=0.9,
    temperature=3,
)

filepath = '/content/student_same_90_2/'
init_weights = filepath + model_name + dataset + '_init_weights.tf'
init_model = filepath + model_name + dataset + '_init.tf'
final_weights = filepath + model_name + dataset + '_final_weights.tf'
final_model= filepath + model_name + dataset + 'final.tf'

!mkdir '/content/student_same_90_2/'
train_custom(distiller, x_train,y_train,x_test,y_test)
shutil.make_archive("student_same_90_2", 'zip', "student_same_90_2")
files.download("student_same_90_2.zip")

Epoch 1:


  output, from_logits = _get_logits(


Test Loss: 0.07493124157190323, Test Accuracy: 0.9776999950408936
Epoch 2:
Test Loss: 0.050224658101797104, Test Accuracy: 0.9848999977111816
Epoch 3:
Test Loss: 0.04056088998913765, Test Accuracy: 0.9871000051498413
Epoch 4:
Test Loss: 0.03632732108235359, Test Accuracy: 0.9882000088691711
Epoch 5:
Test Loss: 0.033225882798433304, Test Accuracy: 0.9891999959945679
Epoch 6:
Test Loss: 0.030622247606515884, Test Accuracy: 0.9898999929428101
Epoch 7:
Test Loss: 0.02926594577729702, Test Accuracy: 0.989799976348877
Epoch 8:
Test Loss: 0.028120553120970726, Test Accuracy: 0.9908000230789185
Epoch 9:
Test Loss: 0.02787102945148945, Test Accuracy: 0.9901000261306763
Epoch 10:
Test Loss: 0.02800314873456955, Test Accuracy: 0.9908999800682068
Epoch 11:
Test Loss: 0.028590787202119827, Test Accuracy: 0.9908999800682068
Epoch 12:
Test Loss: 0.029595129191875458, Test Accuracy: 0.9909999966621399
Epoch 13:
Test Loss: 0.0296745877712965, Test Accuracy: 0.991100013256073
Epoch 14:
Test Loss: 0.0309

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# student 90 (3)

In [None]:
np.random.seed(2)
tf.random.set_seed(2)
model_path = '/content/base/LeNetMNIST_init.tf'
student = load_model(model_path)
student_scratch = tf.keras.models.clone_model(student)

distiller = Distiller(student=student, teacher=techer_model)
distiller.compile(
    optimizer=tf.keras.optimizers.Adamax(learning_rate=0.001),
    metrics=[tf.keras.metrics.CategoricalCrossentropy()],
    student_loss_fn=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
    distillation_loss_fn=tf.keras.losses.KLDivergence(),
    alpha=0.9,
    temperature=3,
)

filepath = '/content/student_same_90_3/'
init_weights = filepath + model_name + dataset + '_init_weights.tf'
init_model = filepath + model_name + dataset + '_init.tf'
final_weights = filepath + model_name + dataset + '_final_weights.tf'
final_model= filepath + model_name + dataset + 'final.tf'

!mkdir '/content/student_same_90_3/'
train_custom(distiller, x_train,y_train,x_test,y_test)
shutil.make_archive("student_same_90_3", 'zip', "student_same_90_3")
files.download("student_same_90_3.zip")

Epoch 1:


  output, from_logits = _get_logits(


Test Loss: 0.07425890117883682, Test Accuracy: 0.9775000214576721
Epoch 2:
Test Loss: 0.04969808831810951, Test Accuracy: 0.9846000075340271
Epoch 3:
Test Loss: 0.04062308743596077, Test Accuracy: 0.9868999719619751
Epoch 4:
Test Loss: 0.03699176758527756, Test Accuracy: 0.9883000254631042
Epoch 5:
Test Loss: 0.03343488648533821, Test Accuracy: 0.9890000224113464
Epoch 6:
Test Loss: 0.030403781682252884, Test Accuracy: 0.9901000261306763
Epoch 7:
Test Loss: 0.028927791863679886, Test Accuracy: 0.9900000095367432
Epoch 8:
Test Loss: 0.02826273813843727, Test Accuracy: 0.9907000064849854
Epoch 9:
Test Loss: 0.027987316250801086, Test Accuracy: 0.9904000163078308
Epoch 10:
Test Loss: 0.028116373345255852, Test Accuracy: 0.9908999800682068
Epoch 11:
Test Loss: 0.02945682406425476, Test Accuracy: 0.9904999732971191
Epoch 12:
Test Loss: 0.030312344431877136, Test Accuracy: 0.9905999898910522
Epoch 13:
Test Loss: 0.031505830585956573, Test Accuracy: 0.991100013256073
Epoch 14:
Test Loss: 0.03

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

---