In [1]:
import tensorflow as tf
from tensorflow import keras

In [2]:
# Print the versions of tensorlfow and keras

print (tf.__version__)

print (keras.__version__)


2.0.0
2.2.4-tf


In [3]:
# Lets load fashion MNIST contains 70,000 gray scale images of 28 x 28 pixels 
# Pixel intensities vary from 0 to 255
# 0 - white , 255 - gray

fashion_mnist = keras.datasets.fashion_mnist

(X_train_full,y_train_full), (X_test,y_test) = fashion_mnist.load_data()



In [4]:
# Lets check the shape of X_train and X_test

print (X_train_full.shape)

print (X_test.shape)

(60000, 28, 28)
(10000, 28, 28)


In [5]:
# Create the validation set
#X_valid = X_train_full[:5000]
#Y_valid = Y_train_full[:5000]
#X_train = X_train_full[5000:]
#Y_train = Y_train_full[5000:]
X_valid, X_train = X_train_full[:5000] / 255.0, X_train_full[5000:] / 255.0
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]
X_test = X_test / 255.0

In [6]:
# Since we are going train to neural network  we need to scale the pixel intensities between 0 amd 1
# This is required for gradient descent to work

#X_valid = X_valid/255.0
#Y_valid = Y_valid/255.0
#X_train = X_train/255.0
#Y_train = Y_train/255.0
#X_test = X_test/255.0


In [7]:
class_names = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat","Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]

In [8]:
class_names[y_train[0]]

'Coat'

In [9]:
# 4 stands for coat

print (y_train[0])

4


In [10]:
# Building a keras model using sequential API

model = keras.models.Sequential() # Creates Sequential model
model.add(keras.layers.Flatten(input_shape=[28,28])) # Input layer of 28 x 28
model.add(keras.layers.Dense(300, activation="relu")) # Dense layer of 300 neurons
model.add(keras.layers.Dense(100, activation="relu")) # Dense Layer of 100 neurons
model.add(keras.layers.Dense(10,  activation="softmax")) # Dense layer of 10 neurons with softmax activation because we have 10 classes 


In [11]:
# Check the mode built so far

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 300)               235500    
_________________________________________________________________
dense_1 (Dense)              (None, 100)               30100     
_________________________________________________________________
dense_2 (Dense)              (None, 10)                1010      
Total params: 266,610
Trainable params: 266,610
Non-trainable params: 0
_________________________________________________________________


The first dense layer has 784 × 300 connection weights, plus 300 bias terms, which adds up to 235,500 parameters

In [12]:
# You can see all layers or access the layers individually

model.layers

[<tensorflow.python.keras.layers.core.Flatten at 0x7f93f01fca10>,
 <tensorflow.python.keras.layers.core.Dense at 0x7f93f021fed0>,
 <tensorflow.python.keras.layers.core.Dense at 0x7f93f0246690>,
 <tensorflow.python.keras.layers.core.Dense at 0x7f93f01e5610>]

In [13]:
hidden1 = model.layers[1]

print (hidden1.name)

dense


In [14]:
# Access the weights and biases of any layer using get_weights() method

weights, biases = hidden1.get_weights()

In [15]:
weights.shape # has 784 * 300 connection weights 

(784, 300)

In [16]:
biases.shape # Has 300 biases 

(300,)

In [17]:
# Check the weights
weights

array([[ 0.0558591 , -0.04061107,  0.05752498, ...,  0.0667164 ,
         0.06637913, -0.00724912],
       [ 0.03006566, -0.02547301, -0.01542773, ...,  0.05170052,
         0.00932687,  0.03089765],
       [-0.01349444,  0.06435312,  0.07176194, ..., -0.01910824,
        -0.00092441,  0.00093476],
       ...,
       [-0.06763235,  0.00162117, -0.02850636, ...,  0.03515697,
        -0.07408655, -0.05305213],
       [ 0.02489706, -0.01873919,  0.02838717, ...,  0.03167585,
        -0.06308326, -0.01655463],
       [-0.01124635,  0.03441236,  0.02438602, ...,  0.07373294,
        -0.05673486, -0.05525436]], dtype=float32)

As seen above the weights are initialized randomly. This is important.

If all network weights are intialized to zero then the network will never learn.

Initializing weights randomly helps the network break symmetry and learn.

In [18]:
# Compile the model.
# When compiling, we need to specify loss function, optimizer, and metric

model.compile(loss="sparse_categorical_crossentropy", optimizer ="adam", metrics=["accuracy"])

"sparse_categorical_crossentropy" - Used for loss because we have integer labels i.e classes are exclusive

If instead labels were one hot encoded then we would have used "categorical_crossentropy" loss.

For Binary classification, 

We would use "sigmoid" instead of "softmax" activation in the output layer and while compiling the model we 

would use "binary_crossentropy" loss.

There are obviously other losses and optimizers. The full list is here:

 https://keras.io/losses, 
 
 https://keras.io/optimizers, 
 
 and 
 
 https://keras.io/metrics.

In [None]:
# Train the model using the fit method

model.fit(X_train,y_train,epochs=30,validation_data=(X_valid,y_valid))

Train on 55000 samples, validate on 5000 samples
Epoch 1/30


Notice that with each epoch the loss reduced and accuracy increased

The training and validation accuracy as 95% and 89.5% respectively.

So there is not much of over fitting going on here





In [1]:
# The model is trained, lets make predictions

X_new = X_test[:3]

y_proba = model.predict(X_new)

print (y_proba.round(3))

NameError: name 'X_test' is not defined

In [None]:
# Predict classes



y_classes = model.predict_classes(X_new)

print (y_classes)

[9 2 1]


In [None]:
# Compare prediction with actual classes

y_test[:3]

array([9, 2, 1], dtype=uint8)

In [None]:
# Get class names
import numpy as np

np.array(class_names)[y_classes]

array(['Ankle boot', 'Pullover', 'Trouser'], dtype='<U11')

In [None]:
# Save your model

model.save("fashion_mnist.H5")

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: fashion_mnist.H5/assets


In [None]:
# Loading the saved model


model1 = keras.models.load_model("fashion_mnist.H5")

In [None]:
# Make predictions using the model you just loaded

y_classes1 = model1.predict_classes(X_new)

In [None]:
print (y_classes1)

[9 2 1]


As seens above the results are matching. So you just have to compile your model then save it and then load the saved model to keep working.

This is important because training deep learning models is computationally expensive.


# Building a Regression MLP


In [None]:
from sklearn.datasets import  fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

housing = fetch_california_housing()

# Split into train - test set

X_train_full, X_test, y_train_full, y_test = train_test_split(housing.data,housing.target) 

# Split train set into train and validation sets

X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full)

# Scale the inputs so that SGD algo can train the network

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_valid = scaler.fit_transform(X_valid)
X_test = scaler.fit_transform(X_test)


Downloading Cal. housing from https://ndownloader.figshare.com/files/5976036 to /Users/adityanarvekar/scikit_learn_data


In [None]:
# input for the regression model will be all the columns. 
# The input layer of the neural network will therefore have 8 neurons

X_train.shape[1:]

(8,)

In [None]:
# Define the model structure

model3 = keras.models.Sequential()
model3.add(keras.layers.InputLayer(input_shape=X_train.shape[1:]))
model3.add(keras.layers.Dense(30,activation="relu"))
model3.add(keras.layers.Dense(1))


In [None]:
# compile the model. Define loss and optimizer

model3.compile(loss="mean_squared_error", optimizer="adam")


In [None]:
# Train the model

model3.fit(X_train, y_train, epochs=30, validation_data=(X_valid,y_valid))

Train on 11610 samples, validate on 3870 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x7ff30bc6a5d0>

In [None]:
X_new = X_test[:3]

y_pred = model3.predict(X_new)

y_pred

array([[1.7664998],
       [1.6733724],
       [2.2074418]], dtype=float32)

# Complex models using Functional API

We have seen sequential networks so far. 

Next we look at non-sequential neural networks.

Complex non-sequential models can be built using Keras's Functional API



![image.png](attachment:image.png)

The architecture shown above is for Wide and Deep Neural Network proposed by Google Researchers in 2016.

It connects all or part of the inputs directly to the output layer, as shown above.

This architecture makes it possible for the neural network to learn both deep patterns (using the deep path) and simple rules (through the short path).

In contrast, a regular MLP forces all the data to flow through the full stack of layers; thus, simple patterns in the data may end up being distorted by this sequence of transformations.

The paper is available here: https://arxiv.org/abs/1606.07792


In [None]:
# Lets build the wide and deep Neural Network for the california housing model

input_ = keras.layers.Input(shape=X_train.shape[1:])
hidden1 = keras.layers.Dense(30, activation="relu")(input_)
hidden2 = keras.layers.Dense(30, activation="relu")(hidden1)
concat = keras.layers.Concatenate()([input_, hidden2]) # Mixes the input layer with output of hidden2 layer
output = keras.layers.Dense(1)(concat)
model = keras.Model(inputs=[input_], outputs=[output])



The next steps remain unchanged i.e compile, fit, predict

# Multi-input Architectures

Use these architectures you can feed a subset of features through the wide path and a different subset through the deep path.


![image.png](attachment:image.png)

In [None]:
# California housing problem
# first 5 features (0-4) --> Wide path
# Six features (2-7) --> Deep path

input_A = keras.layers.Input(shape=[5], name="wide_input")
input_B = keras.layers.Input(shape=[6], name="deep_input")
hidden1 = keras.layers.Dense(30, activation="relu")(input_B)
hidden2 = keras.layers.Dense(30, activation="relu")(hidden1)
concat = keras.layers.concatenate([input_A, hidden2])
output = keras.layers.Dense(1, name="output")(concat)
model4 = keras.Model(inputs=[input_A, input_B], outputs=[output])


In [None]:
# Compile the model.

model4.compile(loss="mse", optimizer="adam")

# Fit the model. Remember we now have to pass 2 inputs when we fit the model.
# We need to split X_train, X_valid, X_test, X_new into two separate inputs

X_train_A, X_train_B = X_train[:,:5], X_train[:,2:]
X_valid_A, X_valid_B = X_valid[:,:5], X_valid[:,2:]
X_test_A, X_test_B = X_test[:,:5], X_test[:,2:]
X_new_A, X_new_B = X_test_A[:3], X_test_B[:3]



In [None]:
# Fit the model. Note: fit method returns an object containing history. We will see its usage

history = model4.fit((X_train_A, X_train_B), y_train, epochs=15,
                    validation_data=((X_valid_A, X_valid_B), y_valid))



Train on 11610 samples, validate on 3870 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [None]:
# Evaluate and predict


mse_test = model4.evaluate((X_test_A, X_test_B), y_test)

y_pred = model.predict((X_new_A, X_new_B))

print (y_pred)

print (mse_test)



[[-0.17300448]
 [-0.0865384 ]
 [-0.48652852]]
0.34560617462609167


# Multi-output Neural Network

Just like you had multiple inputs you might need a deep neural network to produce multiple outputs.

For example, in image recognition a neural network must often detect an object such as a car in the image which is a classification task and the same network must also output the location of the object (co-ordinates of the center, height, width of the box).

This is an example of a classifier and a regression model. The neural network in such a case will require multiple outputs.



![image.png](attachment:image.png)

In [None]:
# Lets build the above architecture for California housing data set

input_A = keras.layers.Input(shape=[5], name="wide_input")
input_B = keras.layers.Input(shape=[6], name="deep_input")
hidden1 = keras.layers.Dense(30, activation="relu")(input_B)
hidden2 = keras.layers.Dense(30, activation="relu")(hidden1)
concat = keras.layers.concatenate([input_A, hidden2])
output = keras.layers.Dense(1,name="main_output")(concat)
aux_output = keras.layers.Dense(1,name="aux_output")(hidden2)
model5 = keras.Model(inputs=[input_A,input_B],outputs=[output,aux_output]) # Passing 2 inputs and 2 outputs

In [None]:
# While compiling the model, we will need 2 losses.
# Keras will add the losses together to compute the total loss.
# However, in most cases we care more about the main loss than the auxilary loss. This is done by assigning a weight to the loss


model5.compile(loss=["mse","mse"],optimizer="adam", loss_weights=[0.9,0.1])





In [None]:
# When fitting the model, we need to pass y_train twice because in this model main output and auxillary output are measuring the same thing

history = model5.fit([X_train_A,X_train_B],[y_train,y_train],epochs=30,validation_data=([X_valid_A,X_valid_B],[y_valid,y_valid]))
                     
                     


Train on 11610 samples, validate on 3870 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [None]:
# Use Evaluate method to calculate the total, main and auxilary loss

total_loss, main_loss, aux_loss = model5.evaluate([X_test_A,X_test_B],[y_test,y_test])



In [None]:
print (total_loss,main_loss,aux_loss)

0.34872430664624354 0.34225875 0.41587195


In [None]:
# Predict using the model built

y_pred_main, y_pred_aux = model5.predict([X_new_A,X_new_B])

# Callbacks 

The fit() method has a callbacks argument which is a list of operations you can call at the following points:

1) Start and end of training

2) Start and end of each epoch

3) Before or after processesing each mini-batch


In [None]:
# For example: ModelCheckpoint callback saves weights of your best model on the validation data

checkpoint_cb = keras.callbacks.ModelCheckpoint("best_model5.h5",save_best_only=True)

history = model5.fit([X_train_A,X_train_B],[y_train,y_train],epochs=30,
                     validation_data=([X_valid_A,X_valid_B],[y_valid,y_valid]),
                     callbacks=[checkpoint_cb])



Train on 11610 samples, validate on 3870 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [None]:
# Load the model that performed best on the validation data

model6 = keras.models.load_model("best_model5.h5")

In [None]:
# Early Stopping can be implemented by using EarlyStopping callback
# This callback will interrupt training when it measures no progress on the validation set for a number of epochs 
# This is defined by the patient argument


early_stopping_cb = keras.callbacks.EarlyStopping(patience=10,restore_best_weights=True)


# You can now train the model for a large number of epochs without worrying about wasting time

# Training will stop automatically when there is improvement for 10 epochs and the best model will be saved

history = model5.fit([X_train_A,X_train_B],[y_train,y_train],epochs=100,
                     validation_data=([X_valid_A,X_valid_B],[y_valid,y_valid]),
                     callbacks=[early_stopping_cb])









Train on 11610 samples, validate on 3870 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100


As you can see above the training actually stopped at the Epoch 14 automatically
