In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
import tensorflow as tf,keras,keras.losses,keras.metrics
from  tensorflow.keras.applications  import VGG16
from tensorflow.keras.models import Sequential,Model
from tensorflow.keras.layers import Input, Dense, concatenate, Flatten
import logging
logging.getLogger("tensorflow").setLevel(logging.ERROR)
tf.autograph.set_verbosity(0)
from scipy.special import expit
from sklearn.metrics import mean_squared_error

In [2]:
data=pd.read_csv("test_USA.csv")
data.shape

(33000, 306)

In [3]:
X = data.iloc[:, :-1]  # Select all columns except the last one
y=data["GSNR_1"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
#======Converting to Arrays=========
X_train=np.array(X_train)
y_train=np.array(y_train)
X_test=np.array(X_test)
y_test=np.array(y_test)
#========end=======================
#=======Reshaping Labels===========
y_train=y_train.reshape(-1,1)
y_test=y_test.reshape(-1,1)
#======end========================
#===========printing Shapes======
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(26400, 305)
(26400, 1)
(6600, 305)
(6600, 1)


In [4]:
scaler=StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test=scaler.transform(X_test)
label_scaler=MinMaxScaler()
label_scaler.fit(y_train)
y_train=label_scaler.transform(y_train)
y_test=label_scaler.transform(y_test)

In [5]:
teacher= keras.models.load_model('test_USA.h5')

In [6]:
teacher._name='teacher'

In [7]:
teacher.summary()

Model: "teacher"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 305)               93330     
                                                                 
 dense_1 (Dense)             (None, 256)               78336     
                                                                 
 dense_2 (Dense)             (None, 128)               32896     
                                                                 
 dense_3 (Dense)             (None, 64)                8256      
                                                                 
 dense_4 (Dense)             (None, 32)                2080      
                                                                 
 dense_5 (Dense)             (None, 16)                528       
                                                                 
 dense_6 (Dense)             (None, 1)                 17  

In [8]:
student= tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(305,)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1, activation='linear')
])

In [9]:
student._name='student'

In [10]:
student.summary()

Model: "student"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 128)               39168     
                                                                 
 dense_1 (Dense)             (None, 64)                8256      
                                                                 
 dense_2 (Dense)             (None, 32)                2080      
                                                                 
 dense_3 (Dense)             (None, 16)                528       
                                                                 
 dense_4 (Dense)             (None, 1)                 17        
                                                                 
Total params: 50,049
Trainable params: 50,049
Non-trainable params: 0
_________________________________________________________________


In [11]:
class Distiller(keras.Model):
    def __init__(self,student,teacher):
        super(Distiller,self).__init__()
        self.student=student
        self.teacher=teacher
    def compile(self,optimizer,metrics,student_loss_fn,distillation_loss_fn,alpha=0.1,temperature=3):
        super(Distiller,self).compile(optimizer=optimizer,metrics=metrics)
        self.student_loss_fn=student_loss_fn
        self.distillation_loss_fn=distillation_loss_fn
        self.alpha=alpha
        self.temperature=temperature
    def train_step(self,data1):
        x,y=data1
        teacher_predictions=self.teacher(x,training=False)
        with tf.GradientTape() as tape:
            student_predictions=self.student(x,training=True)
            student_loss=self.student_loss_fn(y,student_predictions)
            distillation_loss=self.distillation_loss_fn(tf.nn.softmax(teacher_predictions/self.temperature),tf.nn.softmax(student_predictions/self.temperature))
            #distillation_loss=self.distillation_loss_fn((student_predictions/self.temperature),(teacher_predictions/self.temperature))
            loss=self.alpha*student_loss+(1-self.alpha)*distillation_loss
        modelParameters=self.student.trainable_variables
        gradients=tape.gradient(loss,modelParameters)
        self.optimizer.apply_gradients(zip(gradients,modelParameters))
        self.compiled_metrics.update_state(y,student_predictions)
        
        results={m.name:m.result() for m in self.metrics}
        results.update(
            {'student_loss':student_loss,'distillation_loss':distillation_loss}
        )
        return results
    def test_step(self,data1):
        x,y=data1
        y_prediction = self.student(x, training=False)

        # Calculate the loss
        student_loss = self.student_loss_fn(y, y_prediction)

        # Update the metrics.
        self.compiled_metrics.update_state(y, y_prediction)
        
        results={m.name:m.result() for m in self.metrics}
        results.update(
            {'student_loss': student_loss}
        )
        return results
        
    
            
            
    
            

In [12]:
distiller=Distiller(student=student,teacher=teacher)
distiller.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    metrics=[tf.keras.metrics.MeanSquaredError()],
    student_loss_fn=tf.keras.losses.MeanSquaredError(),
    #distillation_loss_fn=keras.losses.MeanSquaredError(),
    distillation_loss_fn=keras.losses.KLDivergence(),
    alpha=0.1,
    temperature=10,
    
    
)
distiller.fit(X_train,y_train,epochs=25)
distiller.evaluate(X_test,y_test)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


[0.004417788237333298, 2.9080927561153658e-05]