In [1]:
import tensorflow as tf
from tensorflow import keras
import math

In [2]:
class LogisticEndpoint(keras.layers.Layer):
    def __init__(self, name=None):
        super(LogisticEndpoint, self).__init__(name=name)
        self.loss_fn = keras.losses.BinaryCrossentropy(from_logits=True)
        self.accuracy_fn = keras.metrics.BinaryAccuracy()

    def call(self, targets, logits, sample_weights=None):
        # Compute the training-time loss value and add it
        # to the layer using `self.add_loss()`.
        loss = self.loss_fn(targets, logits, sample_weights)
        self.add_loss(loss)

        # Log accuracy as a metric and add it
        # to the layer using `self.add_metric()`.
        acc = self.accuracy_fn(targets, logits, sample_weights)
        self.add_metric(acc, name="accuracy")

        # Return the inference-time prediction tensor (for `.predict()`).
        return tf.nn.softmax(logits)
    
class BranchEndpoint(keras.layers.Layer):
    def __init__(self, name=None):
        super(BranchEndpoint, self).__init__(name=name)
        self.loss_fn = keras.losses.BinaryCrossentropy(from_logits=True)
        self.loss_coefficient = 1
        self.feature_loss_coefficient = 1
#         self.loss_fn = keras.losses.sparse_categorical_crossentropy()

    def call(self, prediction, targets, additional_loss=None, student_features=None, teaching_features=None, sample_weights=None):
        # Compute the training-time loss value and add it
        # to the layer using `self.add_loss()`.
        print(prediction)
        #loss functions are (True, Prediction)
        loss = self.loss_fn(targets, prediction, sample_weights)
        
        #if loss is a list of additional loss objects
        if isinstance(additional_loss,list):
            for i in range(len(additional_loss)):
                loss += self.loss_fn(targets, additional_loss[i], sample_weights) * self.loss_coefficient
        elif additional_loss is not None:
            loss += self.loss_fn(targets, additional_loss, sample_weights) * self.loss_coefficient
            
        #feature distillation
        if teaching_features is not None and student_features is not None:
            diff = tf.norm(tf.math.abs(student_features - teaching_features)) * self.feature_loss_coefficient
            loss += self.loss_fn(targets, additional_loss, sample_weights)
            
        
        #TODO might be faster to concatenate all elements together and then perform the loss once on all the elements.
        
        self.add_loss(loss)

        return tf.nn.softmax(prediction)
    

class FeatureDistillation(keras.layers.Layer):
    def __init__(self, name=None):
        super(FeatureDistillation, self).__init__(name=name)
        self.loss_fn = keras.losses.BinaryCrossentropy(from_logits=True)
        self.loss_coefficient = 1
        self.feature_loss_coefficient = 0.3
        self.regularizer_fn = tf.keras.regularizers.L2(self.feature_loss_coefficient)
    #         self.loss_fn = keras.losses.sparse_categorical_crossentropy()
    def call(self, prediction, teaching_features, sample_weights=None):
        # Compute the training-time loss value and add it
        # to the layer using `self.add_loss()`.
        print(prediction)
        #loss functions are (True, Prediction)
        #feature distillation
#         l2_loss = self.regularizer_fn(prediction, teaching_features)
        l2_loss = self.feature_loss_coefficient * tf.reduce_sum(tf.square(prediction - teaching_features))
        #TODO might be faster to concatenate all elements together and then perform the loss once on all the elements.
        self.add_loss(l2_loss)
        return prediction

In [3]:
def logit(num, count =1,classes=10):
    output = np.zeros(classes) 
    pos = max(0,num-1)
    output[pos] = 1
    
    return output




In [4]:
#process for self distilation
#add y_true as an input for the model, here called 'targets'. targets is not linked to the main model path
#targets is added as input at the model define call
#targets is used as an additional input to the endpoint layers
#in endpoint layers, perform the loss function using the prev_layer input and the 'targets'

#determine if the additional loss is precomputed or computed at the endpoint layer.

from tensorflow.keras import layers

import numpy as np 
inputs = keras.Input(shape=(3,), name="inputs")
targets = keras.Input(shape=(10,), name="targets")
x = layers.Dense(512, activation="relu")(inputs)
x= layers.Dropout(0.2)(x)

x = layers.Dense(512, activation="relu")(x)
x= layers.Dropout(0.2)(x)

branch1_256 = keras.layers.Dense(256,activation="relu")(x)
print(branch1_256.shape)


x = layers.Dense(512, activation="relu")(x)
x= layers.Dropout(0.2)(x)

branch2_256 = keras.layers.Dense(256,activation="relu")(x)

x = layers.Dense(512, activation="relu")(x)
x= layers.Dropout(0.2)(x)

teaching_feat = layers.Dense(256, activation="relu")(x)
# teacher_feat = featureDistil(x)
x= layers.Dropout(0.2)(teaching_feat)

output = layers.Dense(10, name="output")(x)

softmax = layers.Softmax()(output)
endpoint = LogisticEndpoint(name="endpoint")(softmax, targets)

#rest of branches
branch1_teaching = FeatureDistillation(name="branch1_teaching")(branch1_256,teaching_feat)
branch1_dense = keras.layers.Dense(10)(branch1_teaching)
branch1_predictions = BranchEndpoint(name="branch1_predictions")(branch1_dense, targets, softmax, [branch1_256], teaching_feat)


branch2_teaching = FeatureDistillation(name="branch2_teaching")(branch2_256,teaching_feat)
branch2_dense = keras.layers.Dense(10)(branch2_teaching)
branch2_predictions = BranchEndpoint(name="branch2_predictions")(branch2_dense, targets, softmax, [branch2_256], teaching_feat)


model = keras.Model(inputs=[inputs, targets], outputs=[endpoint])
model.compile(optimizer="adam", loss =keras.losses.BinaryCrossentropy(from_logits=True))

targets =[]

for i in range(30):
    target = logit(np.random.randint(0,10))
    targets.append(target)
targets = np.array(targets)
data = {
    "inputs": np.random.random((30, 3)),
    "targets": targets,
}

# print(data['targets'])
model.fit(data,targets)

print(model.outputs)

(None, 256)
Tensor("Placeholder:0", shape=(None, 256), dtype=float32)
Tensor("Placeholder:0", shape=(None, 10), dtype=float32)
Tensor("Placeholder:0", shape=(None, 256), dtype=float32)
Tensor("Placeholder:0", shape=(None, 10), dtype=float32)


ValueError: object __array__ method not producing an array

In [73]:
class foo:
    y = 10

elem = foo()
print(elem.y)
x = elem.y + 5
elem.y 
print(x)
x

10
15


15

In [8]:
inputs = keras.Input(shape=(227,227,3))
x = keras.layers.Conv2D(filters=96, kernel_size=(11,11), strides=(4,4), activation='relu', input_shape=(227,227,3))(inputs)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2))(x)
x = keras.layers.Conv2D(filters=256, kernel_size=(5,5), strides=(1,1), activation='relu', padding="same")(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2))(x)
x = keras.layers.Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same")(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.Conv2D(filters=384, kernel_size=(1,1), strides=(1,1), activation='relu', padding="same")(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.Conv2D(filters=256, kernel_size=(1,1), strides=(1,1), activation='relu', padding="same")(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2))(x)
x = keras.layers.Flatten()(x)
x = keras.layers.Dense(4096, activation='relu')(x)
x = keras.layers.Dropout(0.5)(x)
x = keras.layers.Dense(4096, activation='relu')(x)
x = keras.layers.Dropout(0.5)(x)
x = keras.layers.Dense(10, activation='softmax')(x)

model = keras.Model(inputs=inputs, outputs=[x], name="alexnet")
model.compile(loss='sparse_categorical_crossentropy', optimizer=tf.optimizers.SGD(lr=0.001,momentum=0.9), metrics=['accuracy'])
    
model.summary()

Model: "alexnet"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 227, 227, 3)]     0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 55, 55, 96)        34944     
_________________________________________________________________
batch_normalization (BatchNo (None, 55, 55, 96)        384       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 27, 27, 96)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 27, 27, 256)       614656    
_________________________________________________________________
batch_normalization_1 (Batch (None, 27, 27, 256)       1024      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 13, 13, 256)       0   

In [1]:
x = 10 //2
x

5

In [15]:
model.layers[1].output_shape

(None, 55, 55, 96)

In [13]:
model.layers[1].input_shape

(None, 227, 227, 3)