# Knowledge Distillation in MHIST Dataset

In [30]:
import os
import sys
import cv2
import csv
import pandas as pd
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import math
import random

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from Utilities import *

In [31]:
def load_images_from_folder(folder):
    images = []
    file_names = []
    for filename in os.listdir(folder):
        img = cv2.imread(os.path.join(folder,filename))
        if img is not None:
            images.append(img)
            file_names.append(filename)
    return images, file_names

path = "mhist_dataset/images"
data, file_names = load_images_from_folder(path)
CSVfile = "mhist_dataset/annotations.csv"

labels = pd.read_csv(CSVfile, usecols = [1])
Partitions = pd.read_csv(CSVfile, usecols = [3])
labels = labels.to_numpy()
Partitions = Partitions.to_numpy()
X_train = []
X_test = []
y_train = []
y_test = []
for i in range(len(data)):
    if Partitions[i] == 'train':
        X_train.append(data[i])
        if (labels[i] == 'SSA'):
            y_train.append([1,0])
        if (labels[i] == 'HP'):
            y_train.append([0,1])
    if Partitions[i] == 'test':
        X_test.append(data[i])
        if (labels[i] == 'SSA'):
            y_test.append([1,0])
        if (labels[i] == 'HP'):
            y_test.append([0,1])

c = list(zip(X_train, y_train))
random.shuffle(c)
X_train, y_train = zip(*c)

c = list(zip(X_test, y_test))
random.shuffle(c)
X_test, y_test = zip(*c)

#RotatedX_test = np.rot90(X_test,axes=(-2,-1))
#X_test.append(data[RotatedX_test])
#for i in range(X_test.shape[0]))
    #y_test.append(y_test[i])

X_train = np.asarray(X_train)
X_test = np.asarray(X_test)
y_train = np.asarray(y_train)
y_test = np.asarray(y_test)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

n_batches = 32
Train_Data = []
Train_Label = []
for i in range(math.ceil(X_train.shape[0]/n_batches)):
    # Local batches and labels
    local_X, local_y = X_train[i*n_batches:(i+1)*n_batches,], y_train[i*n_batches:(i+1)*n_batches,]
    #print(local_X.shape)
    Train_Data.append(local_X)
    Train_Label.append(local_y)

Test_Data = []
Test_Label = []
for i in range(math.ceil(X_test.shape[0]/n_batches)):
    # Local batches and labels
    local_X, local_y = X_test[i*n_batches:(i+1)*n_batches,], y_test[i*n_batches:(i+1)*n_batches,]
    print(local_X.shape)
    Test_Data.append(local_X)
    Test_Label.append(local_y)

#Train_Data = np.asarray(Train_Data)
#Train_Label = np.asarray(Train_Label)
#Test_Data = np.asarray(Test_Data)
#Test_Label = np.asarray(Test_Label)
#print(Train_Data[0])
#print(Test_Data.shape[0])

(2175, 224, 224, 3)
(977, 224, 224, 3)
(2175, 2)
(977, 2)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(32, 224, 224, 3)
(17, 224, 224, 3)


In [32]:
def train_evaluate_resnet(model,trainingData, testingData, trainingLabel, testLabel):
    """Perform training and evaluation for the teacher model model.

    Args:
    model: Instance of tf.keras.Model.
    compute_loss_fn: A function that computes the training loss given the
        images, and labels.
    """

    # your code start from here for step 4
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

    for epoch in range(1, NUM_EPOCHS + 1):
        # Run training.
        print('Epoch {}: '.format(epoch), end='')
        for i in range(math.ceil(X_train.shape[0]/n_batches)):
            with tf.GradientTape() as tape:
                loss_value = compute_loss(model,trainingData[i],trainingLabel[i])
            grads = tape.gradient(loss_value, model.trainable_variables)
            optimizer.apply_gradients(zip(grads, model.trainable_variables))

        # Run evaluation.
        num_correct = 0
        num_total = X_test.shape[0]
        for i in range(math.ceil(X_test.shape[0]/n_batches)):
            # your code start from here for step 4
            num_correct += compute_num_correct(model,testingData[i],testLabel[i])[0]
        print("Class_accuracy: " + '{:.2f}%'.format(num_correct / num_total * 100))

In [33]:
# Load Resnet
resNetBase= tf.keras.applications.resnet_v2.ResNet50V2(
    include_top = False,
    weights='imagenet',
    input_shape=(224,224,3),
    pooling=None,
)
for layer in resNetBase.layers[:-2]:
    layer.trainable = False
x = tf.keras.layers.Flatten()(resNetBase.output)
x = tf.keras.layers.Dense(2)(x)
restNet = tf.keras.Model(inputs=resNetBase.input, outputs=x)

In [34]:
train_evaluate_resnet(restNet,Train_Data,Test_Data,Train_Label,Test_Label)

Epoch 1: Class_accuracy: 43.09%
Epoch 2: Class_accuracy: 62.23%
Epoch 3: Class_accuracy: 49.85%
Epoch 4: Class_accuracy: 49.44%


In [35]:
def train_and_evaluate_mobileNet_using_KD(studentModel, teacherModel,trainingData, testingData, trainingLabel, testLabel, alpha, temprature):
    """Perform training and evaluation for the teacher model model.

    Args:
    model: Instance of tf.keras.Model.
    compute_loss_fn: A function that computes the training loss given the
        images, and labels.
    """

    # your code start from here for step 4
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

    for epoch in range(1, NUM_EPOCHS + 1):
        # Run training.
        print('Epoch {}: '.format(epoch), end='')
        for i in range(math.ceil(X_train.shape[0]/n_batches)):
            with tf.GradientTape() as tape:
                loss_value = compute_student_loss_using_KD(studentModel, teacherModel,trainingData[i],trainingLabel[i], alpha, temprature)
            grads = tape.gradient(loss_value, studentModel.trainable_variables)
            optimizer.apply_gradients(zip(grads, studentModel.trainable_variables))

        # Run evaluation.
        num_correct = 0
        num_total = X_test.shape[0]
        for i in range(math.ceil(X_test.shape[0]/n_batches)):
            # your code start from here for step 4
            num_correct += compute_num_correct(studentModel,testingData[i],testLabel[i])[0]
        print("Class_accuracy: " + '{:.2f}%'.format(num_correct / num_total * 100))

In [36]:
studenModel2 = tf.keras.applications.mobilenet_v2.MobileNetV2(
    include_top = False,
    weights='imagenet',
    input_shape=(224,224,3),
    pooling=None,
)
for layer in studenModel2.layers[:-2]:
    layer.trainable = False
x = tf.keras.layers.Flatten()(studenModel2.output)
x = tf.keras.layers.Dense(2)(x)
mobileNet = tf.keras.Model(inputs=studenModel2.input, outputs=x)


In [37]:
train_and_evaluate_mobileNet_using_KD(mobileNet,restNet,Train_Data,Test_Data,Train_Label,Test_Label, 0.5,4)

Epoch 1: Class_accuracy: 48.93%
Epoch 2: Class_accuracy: 57.01%
Epoch 3: Class_accuracy: 58.65%
Epoch 4: Class_accuracy: 57.63%


In [38]:
def testModel(model,testData, testLabel):
    num_correct = 0
    num_total = 977

    for i in range(math.ceil(X_test.shape[0]/n_batches)):
        num_correct += compute_num_correct(model,testData[i],testLabel[i])[0]
    print("model Testing Accuracy: " + '{:.2f}%'.format(
        (num_correct / num_total) * 100))
    return (num_correct / num_total) * 100

In [39]:
teachingAssistantAcc = testModel(restNet,Test_Data,Test_Label)
teachingAssistantAcc = testModel(mobileNet,Test_Data,Test_Label)

model Testing Accuracy: 49.44%
model Testing Accuracy: 57.63%


In [40]:
testACC = []
tempratureValues = [1, 2, 4, 16, 32, 64]
for temp in tempratureValues:
    st = getStudentModel()
    train_and_evaluate_mobileNet_using_KD(mobileNet,restNet,Train_Data,Test_Data,Train_Label,Test_Label, 0.5,4)
    testACC += [testModel(mobileNet,Test_Data,Test_Label)]
print(testACC)

Epoch 1: Class_accuracy: 61.00%
Epoch 2: Class_accuracy: 53.33%
Epoch 3: Class_accuracy: 46.37%
Epoch 4: Class_accuracy: 60.18%
model Testing Accuracy: 60.18%
Epoch 1: Class_accuracy: 49.74%
Epoch 2: 