In [None]:
import numpy as np
import keras
#import tensorflow
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPool2D, InputLayer, ZeroPadding2D, GlobalAvgPool2D, Reshape, Softmax
from keras.datasets import cifar10
from keras.applications.mobilenet import MobileNet
from keras.utils import np_utils
import matplotlib.pyplot as plt
from skimage.transform import resize


%matplotlib inline

## Load Data: (32,32,3) Images

In [None]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

In [None]:
#Convince yourself that the labels are consistent with the data.  
#See https://www.cs.toronto.edu/~kriz/cifar.html
print(y_train[7])   
plt.imshow(x_train[4999])

In [None]:
%%time

#  Resize.
#  One option to deal with the fact that MobileNet does not like (32,32,3) shape is to resize the images.
#  NOTE:  On my machine, resizing the entire training data set would take, according to my precise calculations, a very long time.
#  I will only be resizing and training on 5,000 (about 50 minutes for me).

#  Base case.


resized_train_data = []
resized_train_data = np.reshape(np.append(resized_train_data, resize(x_train[0],(128,128,3))),(128,128,3))

#  Change range to 50000 if you want to do the entire set.    
#  NOTE:  It may take a while.  If you know a better way, please share. :)


for row in range(5000):
    if row > 0:
        resized = resize(x_train[row],(128,128,3))
        resized_train_data = np.reshape(np.append(resized_train_data, resized),((row+1),128,128,3))

            

In [None]:
plt.imshow(resized_train_data[4999])

In [None]:
#  Sloppy data (just reading off the timer), but resizing scales horribly.
seconds = np.array([23,84,211,436])
drange = np.array([500,1000,1500,2000])
plt.plot(drange,seconds)

## Clean Data:  Binary Reps. of Categories, Unrolling, Scaling

In [None]:
x_train_unrolled = x_train.reshape(-1,32*32*3) / 256

#Reshape for convolutions
x_train_reshape = x_train.reshape(-1,32,32,3) / 256
print(x_train_reshape.shape)
#print(x_train_unrolled.shape)
y_train_encoded = np_utils.to_categorical(y_train)
print(y_train_encoded.shape)

x_test_unrolled = x_test.reshape(-1,32*32*3) / 256
y_test_encoded = np_utils.to_categorical(y_test)

In [None]:
load_resized = np.load('resized_5k_images.npy')

In [None]:
onek_labels = y_test_encoded[:1000]
onek_resized = load_resized[:1000,:,:]
onek_resized.shape

In [None]:
fivek_labels = y_test_encoded[:5000]
fivek_resized = load_resized[:5000,:,:]
fivek_resized.shape

## Create Unbiased Conv. NN

In [None]:
fresh = MobileNet(input_shape=(128,128,3),include_top =True, weights=None,classes=10)
fresh.summary()
len(fresh.layers)
fresh.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])



In [None]:
%%time
fresh.fit(onek_resized,onek_labels,epochs = 5,batch_size=4)

## Add Adapters for Data Dimensions

In [None]:
#Documentation for MobileNet says smaller shapes allowed, but throws error.  https://keras.io/applications/#mobilenet
#Thus, we have to adapt a new model to take the dimensions of our data.

rand_init_model = Sequential()

rand_init_model.add(InputLayer(input_shape=(128,128,3),name='INPUT'))
#rand_init_model.add(ZeroPadding2D())
#rand_init_model.add(Conv2D(filters=1,kernel_size=(1,1),input_shape=(32,32,3),name='CONVA'))
#rand_init_model.add(Dense(64,input_dim=(32*32*3), activation = 'sigmoid',name='cifar10_in_adapter'))
rand_init_model.layers.extend(fresh.layers[1:])

#print(rand_init_model.layers[-1].output_shape)
#rand_init_model.add(ZeroPadding2D())
#print(rand_init_model.layers[-1].output_shape)
#rand_init_model.add(Reshape((10)))
#print(rand_init_model.layers[-1].output_shape)
rand_init_model.add(GlobalAvgPool2D(data_format='channels_first'))
#print(rand_init_model.layers[-1].output_shape)
rand_init_model.add(Reshape((1,1,-1)))

#print(rand_init_model.layers[-1].output_shape)
#rand_init_model.add(Dropout(rate=0.001))
#print(rand_init_model.layers[-1].output_shape)
rand_init_model.add(Conv2D(filters=10,kernel_size=(5,5),name='CONVZ'))
print(rand_init_model.layers[-1].output_shape)
rand_init_model.add(Softmax())
print(rand_init_model.layers[-1].output_shape)
#rand_init_model.add(Dense(10,activation = 'softmax', name='cifar10_class_adapter'))
rand_init_model.add(Reshape((-1,)))
print(rand_init_model.layers[-1].output_shape)

rand_init_model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
rand_init_model.summary()

## Fit and Plot

In [None]:
%%time
history_fresh = rand_init_model.fit(x_train,y_train_encoded, epochs=25)

In [None]:
plt.plot(history_fresh.history['loss'])
#plt.plot(history_fresh.history['acc'])

## Create Biased Conv. NN

In [None]:
# Pretrained weights only exist for certain shapes, which is why we get an error with smaller image sizes.
# We will use the weights anyways for this tutorial, but try loading other data sets with compatible image sizes.
trained_model = MobileNet(input_shape = (128,128,3),include_top = False, weights='imagenet')

#Freeze: keep some pre-trained weights as they are.
#Try freezing other layers.
for layer in trained_model.layers:
    layer.trainable = False

#Check number of trainable parameters after freezing layers.
trained_model.summary()
#trained_model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

In [None]:
#Create new model to add the trained model into.  Again with adapters.
transfer_model = Sequential()
#transfer_model.add(Dense(64,input_dim=(32*32*3), activation = 'sigmoid',name='cifar10_in_adapter'))
    
    
#Extend list of layers to include layers of trained_model.
#Check summary on model to see the layer structures.
#Remember, include_top = False has already chopped off the classification layers.

transfer_model.add(InputLayer(input_shape=(128,128,3),name='INPUT'))

transfer_model.layers.extend(trained_model.layers[1:97])
print(transfer_model.layers[-1].output_shape)
#transfer_model.add(Reshape((1,1,-1)))
#transfer_model.add(Reshape((-1,)))
print(transfer_model.layers[-1].output_shape)
#transfer_model.add(Conv2D(filters=10,kernel_size=(3,3),name='CONVZ'))
transfer_model.add(GlobalAvgPool2D(data_format='channels_first'))
transfer_model.add(Dropout(rate=0.001))
print(transfer_model.layers[-1].output_shape)
#transfer_model.add(Flatten())
print(transfer_model.layers[-1].output_shape)
transfer_model.add(Dense(10, activation = 'softmax', name = 'cifar10_class_adapter'))
print(transfer_model.layers[-1].output_shape)
#transfer_model.add(GlobalAvgPool2D(data_format='channels_first'))
print(transfer_model.layers[-1].output_shape)
#transfer_model.add(Reshape((1,1,-1)))
print(transfer_model.layers[-1].output_shape)
#transfer_model.add(Dropout(rate=0.001))
print(transfer_model.layers[-1].output_shape)
#transfer_model.add(Conv2D(filters=10,kernel_size=(3,3),name='CONVZ'))
print(transfer_model.layers[-1].output_shape)
#transfer_model.add(Softmax())
print(transfer_model.layers[-1].output_shape)
#transfer_model.add(Reshape((-1,)))

print(transfer_model.layers[-1].output_shape)
#transfer_model.add(Softmax())

transfer_model.add(Flatten())
print(transfer_model.layers[-1].output_shape)

#Experiment: freeze all trainable params.  What do you expect to happen to the acc?  Check summary.
#transfer_model.trainable = False  
transfer_model.summary()
transfer_model.compile(optimizer = 'adam',loss = 'categorical_crossentropy',metrics=['accuracy'])

In [None]:
%%time
transfer_model.fit(fivek_resized,fivek_labels,epochs=10,batch_size=4)

## Fit

In [None]:
%%time
history_transfer = transfer_model.fit(x_train_unrolled,y_train_encoded,epochs=15)

In [None]:
plt.plot(history_transfer.history['loss'])

## Prediction and Scoring

In [None]:
# Supress scientific notation for easier comparison.
np.set_printoptions(suppress=True)

#Predict and look at an example to compare between biased and unbiased.
unbiased_prediction = rand_init_model.predict_proba(x_test_unrolled)
sum(unbiased_prediction[76])

In [None]:
biased_prediction = transfer_model.predict_proba(x_test_unrolled)
biased_prediction[76]

In [None]:
#True label:
y_test_encoded[76]

In [None]:
#Unbiased
#Brier score, lower is better: smaller distance between prediction and true label.  
#Try looking at prediction scores before and after training.
unbiased_diff = y_test_encoded - unbiased_prediction
score_u = np.sum((1/10000)*(np.power(unbiased_diff,2)),axis=1)

#Overall score for 10000 test examples.
sum(score_u)

In [None]:
#Biased
biased_diff = y_test_encoded - biased_prediction  
score_b = np.sum((1/10000)*(np.power(biased_diff,2)),axis=1)

sum(score_b)