In [7]:
import tensorflow as tf 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Destroys the current TF graph and session, and creates a new one.
tf.keras.backend.clear_session()


## Getting started with the Sequential Model

The Sequential model is a linear stack of layers.

You can create a Sequential model by passing a list of layer instances to the constructor:

In [8]:
model = Sequential([
    Dense(32, activation='relu', input_shape=(784,)),
    Dense(10, activation='softmax'),
])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 32)                25120     
_________________________________________________________________
dense_1 (Dense)              (None, 10)                330       
Total params: 25,450
Trainable params: 25,450
Non-trainable params: 0
_________________________________________________________________


You can also simply add layers via the `.add()` method:

In [9]:
model = models.Sequential()

model.add(Dense(32, activation='relu', input_shape=(784,)))
model.add(Dense(10, activation='softmax'))

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_2 (Dense)              (None, 32)                25120     
_________________________________________________________________
dense_3 (Dense)              (None, 10)                330       
Total params: 25,450
Trainable params: 25,450
Non-trainable params: 0
_________________________________________________________________


## Specifying the input shape

The model needs to know what input shape it should expect. For this reason, the first layer in a Sequential model (and only the first, because following layers can do automatic shape inference) needs to receive information about its input shape. There are several possible ways to do this:


* Pass an input_shape argument to the first layer. This is a shape tuple (a tuple of integers or None entries, where None indicates that any positive integer may be expected). In input_shape, the batch dimension is not included.
* Some 2D layers, such as Dense, support the specification of their input shape via the argument input_dim, and some 3D temporal layers support the arguments input_dim and input_length.
* If you ever need to specify a fixed batch size for your inputs (this is useful for stateful recurrent networks), you can pass a batch_size argument to a layer. If you pass both batch_size=32 and input_shape=(6, 8) to a layer, it will then expect every batch of inputs to have the batch shape (32, 6, 8).

As such, the following snippets are strictly equivalent:

In [10]:
model = Sequential()
model.add(Dense(32, input_shape=(784,)))

model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 32)                25120     
Total params: 25,120
Trainable params: 25,120
Non-trainable params: 0
_________________________________________________________________


In [11]:
model = Sequential()
model.add(Dense(32, input_dim=784))

model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_5 (Dense)              (None, 32)                25120     
Total params: 25,120
Trainable params: 25,120
Non-trainable params: 0
_________________________________________________________________


## Compilation

Before training a model, you need to configure the learning process, which is done via the compile method. It receives three arguments:

* An optimizer. This could be the string identifier of an existing optimizer (such as rmsprop or adagrad), or an instance of the Optimizer class. See: optimizers.
* A loss function. This is the objective that the model will try to minimize. It can be the string identifier of an existing loss function (such as categorical_crossentropy or mse), or it can be an objective function. See: losses.
* A list of metrics. For any classification problem you will want to set this to `metrics=['accuracy']`. A metric could be the string identifier of an existing metric or a custom metric function.

We will separately go over optimizers, loss functions, and metrics in a later lesson.

In [12]:
# For a multi-class classification problem
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# For a binary classification problem
model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# For a mean squared error regression problem
model.compile(optimizer='rmsprop',
              loss='mse')

# For custom metrics
import tensorflow.keras.backend as K

def mean_pred(y_true, y_pred):
    return K.mean(y_pred)

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy', mean_pred])

## Training

Keras models are trained on Numpy arrays of input data and labels. For training a model we can use three functions:

* The fit function, this is the most basic
* The fit_generator. This is a bit more complicated as it takes in a generator instead of a numpy array. Often used for large datasets.
* the train_on_batch function which allows you to do a single gradient update over one batch of samples.

In [14]:

model = Sequential()
model.add(Dense(32, activation='relu', input_dim=100))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Generate dummy data
import numpy as np
data = np.random.random((1000, 100))
labels = np.random.randint(2, size=(1000, 1))

model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 32)                3232      
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 33        
Total params: 3,265
Trainable params: 3,265
Non-trainable params: 0
_________________________________________________________________


In [15]:
model.fit(
    data, 
    labels, 
    batch_size=32, 
    epochs=10, verbose=2, 
    callbacks=None, 
    validation_split=0.2, 
    validation_data=None, 
    shuffle=True, 
    class_weight=None, 
    sample_weight=None, 
    initial_epoch=0)

Train on 800 samples, validate on 200 samples
Epoch 1/10
800/800 - 1s - loss: 0.7022 - accuracy: 0.5138 - val_loss: 0.7026 - val_accuracy: 0.5200
Epoch 2/10
800/800 - 0s - loss: 0.6914 - accuracy: 0.5387 - val_loss: 0.7100 - val_accuracy: 0.4750
Epoch 3/10
800/800 - 0s - loss: 0.6888 - accuracy: 0.5525 - val_loss: 0.7041 - val_accuracy: 0.5350
Epoch 4/10
800/800 - 0s - loss: 0.6849 - accuracy: 0.5475 - val_loss: 0.7014 - val_accuracy: 0.5550
Epoch 5/10
800/800 - 0s - loss: 0.6824 - accuracy: 0.5300 - val_loss: 0.7048 - val_accuracy: 0.4800
Epoch 6/10
800/800 - 0s - loss: 0.6798 - accuracy: 0.5663 - val_loss: 0.7039 - val_accuracy: 0.5300
Epoch 7/10
800/800 - 0s - loss: 0.6761 - accuracy: 0.5850 - val_loss: 0.7034 - val_accuracy: 0.5400
Epoch 8/10
800/800 - 0s - loss: 0.6726 - accuracy: 0.5900 - val_loss: 0.7031 - val_accuracy: 0.5500
Epoch 9/10
800/800 - 0s - loss: 0.6702 - accuracy: 0.5763 - val_loss: 0.7047 - val_accuracy: 0.5200
Epoch 10/10
800/800 - 0s - loss: 0.6656 - accuracy: 0.

<tensorflow.python.keras.callbacks.History at 0x646e00050>

In [16]:
model.predict_classes(
    data, 
    batch_size=32, 
    verbose=1)



array([[0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [1],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [1],
       [1],
       [1],
       [0],
       [0],
       [1],
       [0],
       [1],
       [1],
       [1],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [1],
       [0],
       [0],
       [1],
       [1],
       [0],
       [1],
       [1],
       [0],
       [1],
       [0],
       [0],
       [1],
       [0],
       [0],
       [1],
       [1],
       [0],
       [1],
       [0],
       [1],
       [0],
       [1],
       [0],
       [0],
       [1],
       [0],
       [1],
       [1],
       [0],
       [0],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
    

In [17]:
model.predict_proba(
    data, 
    batch_size=32, 
    verbose=1)



array([[0.45500144],
       [0.505467  ],
       [0.5620613 ],
       [0.6372395 ],
       [0.6209452 ],
       [0.5443704 ],
       [0.55046934],
       [0.5954305 ],
       [0.49542913],
       [0.4983981 ],
       [0.44334888],
       [0.48487353],
       [0.4659502 ],
       [0.45865682],
       [0.56216395],
       [0.5787022 ],
       [0.5084622 ],
       [0.4709742 ],
       [0.47352293],
       [0.43648085],
       [0.46334404],
       [0.45033148],
       [0.5986371 ],
       [0.51056063],
       [0.5430888 ],
       [0.5161677 ],
       [0.46177158],
       [0.42863292],
       [0.600273  ],
       [0.48337123],
       [0.50654316],
       [0.5382304 ],
       [0.5400594 ],
       [0.5177635 ],
       [0.45067853],
       [0.48514456],
       [0.4906109 ],
       [0.48663774],
       [0.40215474],
       [0.46747234],
       [0.5832549 ],
       [0.502511  ],
       [0.39669335],
       [0.4738682 ],
       [0.5878903 ],
       [0.5206717 ],
       [0.45169097],
       [0.520