In [1]:
data_dir = 'E:/Jupyter NB/10 class Classification/'

train_dir = 'E:/Jupyter NB/10 class Classification/Train_val_test/train'
val_dir = 'E:/Jupyter NB/10 class Classification/Train_val_test/val'
test_dir = 'E:/Jupyter NB/10 class Classification/Train_val_test/test'

In [2]:
import numpy as np
import sys
sys.path.append('utils/')

import keras
from keras import layers
from keras import models

from keras import optimizers
from keras.callbacks import ReduceLROnPlateau, EarlyStopping

from image_preprocessing_ver2 import ImageDataGenerator

from keras.models import Model
from keras.layers import Lambda, concatenate, Activation
from keras.losses import categorical_crossentropy as logloss
from keras.metrics import categorical_accuracy, top_k_categorical_accuracy
from keras import backend as K

Using TensorFlow backend.


In [3]:
train_logits = np.load(data_dir + 'train_logits.npy', allow_pickle = True)[()]
val_logits = np.load(data_dir + 'val_logits.npy', allow_pickle = True)[()]

In [4]:
def preprocess_input(x):
    x /= 255.0
    x -= 0.5
    x *= 2.0
    return x

In [5]:

data_generator = ImageDataGenerator(
    data_format='channels_last',
    preprocessing_function=preprocess_input
)

# note: i'm also passing dicts of logits
train_generator = data_generator.flow_from_directory(
    train_dir, train_logits,
    target_size=(299, 299),
    batch_size=20
)

validation_generator = data_generator.flow_from_directory(
    val_dir, val_logits,
    target_size=(299, 299),
    batch_size=20
)

Found 2000 images belonging to 20 classes.
Found 400 images belonging to 20 classes.


In [6]:
std_model = models.Sequential([
    layers.Conv2D(16, (3,3), activation = 'relu', input_shape = (299,299,3)),
    layers.MaxPooling2D(2,2),
    layers.Conv2D(32, (3,3), activation = 'relu'),
    layers.MaxPooling2D(2,2),
    
    layers.Flatten(),
    layers.Dense(64, activation = 'relu'),
    layers.Dense(20, activation = 'softmax')
    
])

std_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 297, 297, 16)      448       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 148, 148, 16)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 146, 146, 32)      4640      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 73, 73, 32)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 170528)            0         
_________________________________________________________________
dense_1 (Dense)              (None, 64)                10913856  
_________________________________________________________________
dense_2 (Dense)              (None, 20)               

In [7]:
temperature = 5.0

# remove softmax
std_model.layers.pop()

# usual probabilities
logits = std_model.layers[-1].output
probabilities = Activation('softmax')(logits)

# softed probabilities
logits_T = Lambda(lambda x: x/temperature)(logits)
probabilities_T = Activation('softmax')(logits_T)

output = concatenate([probabilities, probabilities_T])
model = Model(std_model.input, output)
# now model outputs 512 dimensional vectors

In [8]:
def knowledge_distillation_loss(y_true, y_pred, lambda_const):    
    
    # split in 
    #    onehot hard true targets
    #    logits from xception
    y_true, logits = y_true[:, :20], y_true[:, 20:]
    
    # convert logits to soft targets
    y_soft = K.softmax(logits/temperature)
    
    # split in 
    #    usual output probabilities
    #    probabilities made softer with temperature
    y_pred, y_pred_soft = y_pred[:, :20], y_pred[:, 20:]    
    
    return lambda_const*logloss(y_true, y_pred) + logloss(y_soft, y_pred_soft)

In [9]:
def accuracy(y_true, y_pred):
    y_true = y_true[:, :20]
    y_pred = y_pred[:, :20]
    return categorical_accuracy(y_true, y_pred)

In [10]:
def top_5_accuracy(y_true, y_pred):
    y_true = y_true[:, :20]
    y_pred = y_pred[:, :20]
    return top_k_categorical_accuracy(y_true, y_pred)

In [11]:
def categorical_crossentropy(y_true, y_pred):
    y_true = y_true[:, :20]
    y_pred = y_pred[:, :20]
    return logloss(y_true, y_pred)

In [12]:
# logloss with only soft probabilities and targets
def soft_logloss(y_true, y_pred):     
    logits = y_true[:, 20:]
    y_soft = K.softmax(logits/temperature)
    y_pred_soft = y_pred[:, 20:]    
    return logloss(y_soft, y_pred_soft)

In [13]:
lambda_const = 0.07

model.compile(loss=lambda y_true, y_pred: knowledge_distillation_loss(y_true, y_pred, lambda_const),
    optimizer=optimizers.SGD(lr=1e-3),
    metrics=[categorical_crossentropy, accuracy, top_5_accuracy, soft_logloss])

In [14]:
history = model.fit_generator(
    train_generator,
    steps_per_epoch=150,
    epochs=30,
    validation_data=validation_generator,
    validation_steps=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30


Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
