In [13]:
import os
import keras
import random
from keras import backend as K
import tensorflow as tf
from keras.layers.core import Dense, Activation
from keras import models
from keras.optimizers import Adam
import matplotlib.pyplot as plt
from tensorflow.keras import layers
from keras.metrics import categorical_crossentropy
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras.models import Model, Sequential
from keras.applications import imagenet_utils
from keras.layers import Dense, GlobalAveragePooling2D, Input, Dropout, Conv2D, Flatten
from keras.applications import MobileNet
from keras.applications.mobilenet import preprocess_input
import numpy as np
from IPython.display import Image
from keras.optimizers import Adam

In [18]:
class Distiller(keras.Model):
    def __init__(self, student, teacher):
        super(Distiller, self).__init__()
        self.teacher = teacher
        self.student = student
        #self.conv1 = tf.keras.layers.Conv2D(32, 3, strides=2, activation="relu")

    def call(self, x):
        #x = self.conv1
        return x
        
    def compile(
        self,
        optimizer,
        metrics,
        student_loss_fn,
        distillation_loss_fn,
        alpha=0.1,
        temperature=3,
    ):
        super(Distiller, self).compile(optimizer=optimizer, metrics=metrics)
        self.student_loss_fn = student_loss_fn
        self.distillation_loss_fn = distillation_loss_fn
        self.alpha = alpha
        self.temperature = temperature

    def train_step(self, data):
        # Unpack data
        x, y = data

        # Forward pass of teacher
        teacher_predictions = self.teacher(x, training=False)

        with tf.GradientTape() as tape:
            # Forward pass of student
            student_predictions = self.student(x, training=True)

            # Compute losses
            student_loss = self.student_loss_fn(y, student_predictions)
            distillation_loss = self.distillation_loss_fn(
                tf.nn.softmax(teacher_predictions / self.temperature, axis=1),
                tf.nn.softmax(student_predictions / self.temperature, axis=1),
            )
            loss = self.alpha * student_loss + (1 - self.alpha) * distillation_loss

        # Compute gradients
        trainable_vars = self.student.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)

        # Update weights
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))

        # Update the metrics configured in `compile()`.
        self.compiled_metrics.update_state(y, student_predictions)

        # Return a dict of performance
        results = {m.name: m.result() for m in self.metrics}
        results.update(
            {"student_loss": student_loss, "distillation_loss": distillation_loss}
        )
        return results

    def test_step(self, data):
        # Unpack the data
        x, y = data

        # Compute predictions
        y_prediction = self.student(x, training=False)

        # Calculate the loss
        student_loss = self.student_loss_fn(y, y_prediction)

        # Update the metrics.
        self.compiled_metrics.update_state(y, y_prediction)

        # Return a dict of performance
        results = {m.name: m.result() for m in self.metrics}
        results.update({"student_loss": student_loss})
        return results

In [20]:
# Create the teacher
"mobile = keras.applications.mobilenet.MobileNet()"
base_model=MobileNet(weights='imagenet',include_top=False, input_shape = (224, 224, 3)) #imports the mobilenet model and discards the last 1000 neuron layer.
x=base_model.output
x=GlobalAveragePooling2D()(x)
x=Dense(1024,activation='relu')(x) #we add dense layers so that the model can learn more complex functions and classify for better results.
x=Dropout(0.2)(x)
x=Dense(1024,activation='relu')(x) #dense layer 2
x=Dropout(0.2)(x)
x=Dense(512,activation='relu')(x) #dense layer 3
preds=Dense(2,activation='softmax')(x) #final layer with softmax activation
teacher=Model(inputs=base_model.input,outputs=preds)

for layer in teacher.layers[:20]:
    layer.trainable=False
for layer in teacher.layers[20:]:
    layer.trainable=True

# Create the student
student = Sequential()
#add model layers
student.add(Conv2D(64, kernel_size=3, activation='relu', input_shape=(224, 224, 3)))
student.add(Conv2D(32, kernel_size=3, activation='relu'))
student.add(Flatten())
student.add(Dense(2, activation='softmax'))

# Clone student for later comparison
student_scratch = keras.models.clone_model(student)

In [21]:
nsfw_images = [fn for fn in os.listdir(r'D:\Data\Train\NSFW') if fn.endswith('.jpg')]
sfw_images = [fn for fn in os.listdir(r'D:\Data\Train\SFW') if fn.endswith('.jpg')]
train_dir = r'D:\Data\Train'
validation_dir = r'D:\Data\Validation'

train_datagen=ImageDataGenerator(preprocessing_function=preprocess_input,
                                rescale = 1.0/255.) #included in our dependencies

train_generator=train_datagen.flow_from_directory(train_dir,
                                                 target_size=(224,224),
                                                 color_mode='rgb',
                                                 batch_size=30,
                                                 class_mode='categorical',
                                                 shuffle=True)

validation_datagen = ImageDataGenerator(preprocessing_function=preprocess_input,
                                rescale = 1.0/255.)

validation_generator =validation_datagen.flow_from_directory( validation_dir,
                                                          batch_size  = 30,
                                                          class_mode  = 'categorical', 
                                                          target_size = (224, 224))

batch_size = 30

Found 6394 images belonging to 2 classes.
Found 2137 images belonging to 2 classes.


In [22]:
teacher.compile(optimizer=Adam(lr=0.00001),loss='categorical_crossentropy',metrics=['accuracy'])

# Train and evaluate teacher on data.
teacher.fit(train_generator,
          validation_data=validation_generator,
          steps_per_epoch=int(6394/batch_size),
          validation_steps=int(2137/batch_size),
          epochs=7)

Epoch 1/7


UnknownError:  Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
	 [[node functional_7/conv1/Conv2D (defined at <ipython-input-22-1b9b50ca3dc0>:4) ]] [Op:__inference_train_function_21594]

Function call stack:
train_function


In [12]:
distiller = Distiller(student=student, teacher=teacher)
distiller.compile(
    optimizer=Adam(lr=0.00001),
    metrics=['accuracy'],
    student_loss_fn=keras.losses.CategoricalCrossentropy(from_logits=True),
    distillation_loss_fn=keras.losses.KLDivergence(),
    alpha=0.1,
    temperature=10,
)
# Distill teacher to student
distiller.fit(train_generator,
          validation_data=validation_generator,
          steps_per_epoch=int(6394/batch_size),
          validation_steps=int(2137/batch_size),
          epochs=7)


Epoch 1/7


UnknownError:  Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
	 [[node functional_1/conv1/Conv2D (defined at <ipython-input-11-7d07e56f835b>:45) ]] [Op:__inference_train_function_4813]

Function call stack:
train_function
