In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

In [2]:
import tensorflow as tf
tf.__version__

'2.0.0'

In [4]:
# Import the Sequential model and Dense layer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

### 1. simplest flow

In [None]:
# Create a Sequential model
model = Sequential()
# Add an input layer and a hidden layer with 10 neurons
model.add(Dense(10, input_shape=(2,), activation="relu"))
# Add a 1-neuron output layer
model.add(Dense(1))
# Summarise your model
model.summary()

In [None]:
# Compile your model
model.compile(optimizer = 'adam', loss = 'mse')
print("Training started..., this can take a while:")
# Fit your model on your data for 30 epochs
model.fit(X_train, y_train, epochs = 30)
# Evaluate your model 
print("Final lost value:",model.evaluate(X_train, y_train))

### 2. Binary classification

- use sigmoid as output, and loss = "binary_crossentropy", and output is 1 units

In [None]:
# Create a sequential model
model = Sequential()
# Add a dense layer 
model.add(Dense(1, input_shape=(4,), activation="sigmoid"))
# Compile your model
model.compile(loss='binary_crossentropy', optimizer="sgd", metrics=['accuracy'])
# Display a summary of your model
model.summary()
# Train your model for 20 epochs
model.fit(X_train,y_train, epochs=20)
# Evaluate your model accuracy on the test set
accuracy = model.evaluate(X_test,y_test)[1]
# Print accuracy
print('Accuracy:',accuracy)

### 3. Multi-class 

- Use softmax, and loss = "categorical_crossentropy"

In [None]:
# Instantiate a sequential model
model = Sequential()
  
# Add 3 dense layers of 128, 64 and 32 neurons each
model.add(Dense(128, input_shape=(2,), activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))  
# Add a dense layer with as many neurons as competitors
model.add(Dense(4, activation="softmax"))  
# Compile your model using categorical_crossentropy loss
model.compile(loss="categorical_crossentropy",
              optimizer='adam',
              metrics=['accuracy'])
# Train your model on the training data for 200 epochs
model.fit(X_train, y_train,epochs=200)
# Evaluate your model accuracy on the test data
accuracy = model.evaluate(X_test, y_test)[1]
# Print accuracy
print('Accuracy:', accuracy)

In [None]:
# Predict on new data
preds = model.predict(X_new)
# Print preds vs true values
print("{:45} | {}".format('Raw Model Predictions','True labels'))
for i,pred in enumerate(preds):
  print("{} | {}".format(pred,X_new[i]))

# Extract the indexes of the highest probable predictions
preds = [np.argmax(pred) for pred in preds]

# Print preds vs true values
print("{:10} | {}".format('Rounded Model Predictions','True labels'))
for i,pred in enumerate(preds):
  print("{:25} | {}".format(pred, X_new[i]))

### 4. Multi label

- we still use "sigmoid" for output,  loss is "binary_crossentropy" just like the case classification. But the units at output layer is equal to the number of classes we have. 
- E.g If our multi-label problems has 3 labels, the output layer should output 3 units

In [None]:
# Instantiate a Sequential model
model = Sequential()

# Add a hidden layer of 64 neurons and a 20 neuron's input
model.add(Dense(64, input_shape=(20,), activation="relu"))

# Add an output layer of 3 neurons with sigmoid activation
model.add(Dense(3, activation="sigmoid"))

# Compile your model with adam and binary crossentropy loss
model.compile(optimizer="adam",
           loss = "binary_crossentropy",
           metrics=['accuracy'])

model.summary()

### 5. Callback

- The history callback is returned by default every time you train a model with the .fit() method. To access these metrics you can access the history dictionary inside the returned callback object and the corresponding keys.
- Depends on the loss, metrics we define, history will tracks those things after each epoch

In [None]:
# Instantiate a Sequential model
model = Sequential()
# Add a hidden layer of 64 neurons and a 20 neuron's input
model.add(Dense(64, input_shape=(20,), activation="relu"))
# Add an output layer of 3 neurons with sigmoid activation
model.add(Dense(3, activation="sigmoid"))
# Compile your model with adam and binary crossentropy loss
model.compile(optimizer="adam",
           loss = "binary_crossentropy",
           metrics=['accuracy'])
model.summary()

In [None]:
# Train your model and save its history
history = model.fit(X_train, y_train, epochs = 50, validation_data=(X_test,y_test))
# Plot train vs test loss during training
plot_loss(history.history["loss"], history.history["val_loss"])
# Plot train vs test accuracy during training
plot_accuracy(history.history["acc"],history.history["val_acc"])

#### 5.1 EarlyStopping

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
# Define a callback to monitor val_acc
monitor_val_acc = EarlyStopping(monitor="val_acc", patience=5)

# Train your model using the early stopping callback
model.fit(X_train,y_train, epochs=1000, validation_data=(X_test, y_test),
           callbacks=[monitor_val_acc])

#### 5.2 ModelCheckpoint

In [None]:
# Import the EarlyStopping and ModelCheckpoint callbacks
from tensorflow.keras.callbacks import EarlyStopping,ModelCheckpoint

# Early stop on validation accuracy
monitor_val_acc = EarlyStopping(monitor = "val_acc",patience=3)
# Save the best model as best_banknote_model.hdf5
modelCheckpoint = ModelCheckpoint("best_banknote_model.hdf5", save_best_only = True)
# Fit your model for a stupid amount of epochs
history = model.fit(X_train, y_train,
                    epochs = 10000000,
                    callbacks = [monitor_val_acc,modelCheckpoint],
                    validation_data = (X_test,y_test))

### 6. Learning curves

In [None]:
def plot_loss(loss,val_loss):
  plt.figure()
  plt.plot(loss)
  plt.plot(val_loss)
  plt.title('Model loss')
  plt.ylabel('Loss')
  plt.xlabel('Epoch')
  plt.legend(['Train', 'Test'], loc='upper right')
  plt.show()
def plot_loss(loss,val_loss):
  plt.figure()
  plt.plot(loss)
  plt.plot(val_loss)
  plt.title('Model loss')
  plt.ylabel('Loss')
  plt.xlabel('Epoch')
  plt.legend(['Train', 'Test'], loc='upper right')
  plt.show()

In [None]:
# Instantiate a Sequential model
model = Sequential()

# Input and hidden layer with input_shape, 16 neurons, and relu 
model.add(Dense(16, input_shape = (64,), activation = "relu"))

# Output layer with 10 neurons (one per digit) and softmax
model.add(Dense(10, activation="softmax"))

# Compile your model
model.compile(optimizer = "adam", loss = "categorical_crossentropy", metrics = ["accuracy"])

# Test if your model works and can process input data
print(model.predict(X_train))

#### 6.1 View loss after epochs

In [None]:
# Train your model for 60 epochs, using X_test and y_test as validation data
history = model.fit(X_train, y_train, epochs=60, validation_data=(X_test, y_test), verbose=0)

# Extract from the history object loss and val_loss to plot the learning curve
plot_loss(history.history["loss"], history.history["val_loss"])

<img src="files/model_loss.png" style="height:400px"/>

#### 6.2. # of training examples w.r.t accuracy

In [5]:
def plot_results(train_accs,test_accs):
  plt.plot(training_sizes, train_accs, 'o-', label="Training Accuracy")
  plt.plot(training_sizes, test_accs, 'o-', label="Test Accuracy")
  plt.title('Accuracy vs Number of training samples')
  plt.xlabel('# of training samples')
  plt.ylabel('Accuracy')
  plt.legend(loc="best")
  plt.show()

In [None]:
initial_weights = model.get_weights() # the models above
train_accs = []
test_accs = []
training_sizes = [125, 502, 879]
for size in training_sizes:
    # Get a fraction of training data (we only care about the training data)
    X_train_frac, y_train_frac = X_train[:size], y_train[:size]
    # Reset the model to the initial weights and train it on the new data fraction
    model.set_weights(initial_weights)
    model.fit(X_train_frac, y_train_frac, epochs = 50, callbacks = [monitor_val_acc])

    # Evaluate and store the train fraction and the complete test set results
    train_accs.append(model.evaluate(X_train, y_train)[1])
    test_accs.append(model.evaluate(X_test, y_test)[1])
    
# Plot train vs test accuracies
plot_results(train_accs, test_accs)

<img src="files/num_training.png" style="height:400px" />

#### 6.3 How models changes if we change activation functions

- The function get_model() returns a copy of model (we already build) and applies the activation function, passed on as a parameter, to its hidden layer --> not really understand how to use this using ModelWrapper

In [3]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))
def tanh(x):
    return np.tanh(x)
def ReLU(x):
    return np.maximum(0.0, x)
def leaky_ReLU(x,alpha=0.01):
    return np.maximum(alpha*x, x)

In [None]:
def get_model(act_function):
  if act_function not in ['relu', 'leaky_relu', 'sigmoid', 'tanh']:
    raise ValueError('Make sure your activation functions are named correctly!')
  print("Finishing with",act_function,"...")
  return ModelWrapper(act_function)

In [None]:
# Activation functions to try
activations = ["relu", "leaky_relu","sigmoid", "tanh"]

# Loop over the activation functions
activation_results = {}
val_loss_per_function ={}
val_acc_per_function ={}

for act in activations:
  # Get a new model with the current activation
  model = get_model(act_function=act)
  # Fit the model
  history = model.fit(X_train, y_train, validation_data =(X_test, y_test),epochs = 100, verbose=0)
  activation_results[act] = history
  val_loss_per_function[act] = history.history["val_loss"]
  val_acc_per_function[act] = history.history["val_acc"]

In [None]:
# Create a dataframe from val_loss_per_function
val_loss= pd.DataFrame(val_loss_per_function)

# Call plot on the dataframe
val_loss.plot()
plt.show()

# Create a dataframe from val_acc_per_function
val_acc = pd.DataFrame(val_acc_per_function)

# Call plot on the dataframe
val_acc.plot()
plt.show()

<img src="files/var_loss_with_different_activations.png" style="height:400px"/>

<img src="files/var_acc_with_different_activations.png" style="height:400px"/>

#### 6.4 BatchNormalization or not

- Build a models with batch normalization, a model without, then run and compare

In [6]:
# Import batch normalization from keras layers
from tensorflow.keras.layers import BatchNormalization

In [9]:
def compare_histories_acc(h1,h2):
  plt.plot(h1.history['acc'])
  plt.plot(h1.history['val_acc'])
  plt.plot(h2.history['acc'])
  plt.plot(h2.history['val_acc'])
  plt.title("Batch Normalization Effects")
  plt.xlabel('Epoch')
  plt.ylabel('Accuracy')
  plt.legend(['Train', 'Test', 'Train with Batch Normalization', 'Test with Batch Normalization'], loc='best')
  plt.show()

In [None]:
# Build a model with batch normalization
batchnorm_model = Sequential()
batchnorm_model.add(Dense(50, input_shape=(64,), activation='relu', kernel_initializer='normal'))
batchnorm_model.add(BatchNormalization())
batchnorm_model.add(Dense(50, activation='relu', kernel_initializer='normal'))
batchnorm_model.add(BatchNormalization())
batchnorm_model.add(Dense(50, activation='relu', kernel_initializer='normal'))
batchnorm_model.add(BatchNormalization())
batchnorm_model.add(Dense(10, activation='softmax', kernel_initializer='normal'))
# Compile your model with sgd
batchnorm_model.compile(optimizer="sgd", loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# a model without batch normalizaton
standard_model = Sequential()
standard_model.add(Dense(50, input_shape=(64,), activation='relu', kernel_initializer='normal'))
standard_model.add(Dense(50, activation='relu', kernel_initializer='normal'))
standard_model.add(Dense(50, activation='relu', kernel_initializer='normal'))
standard_model.add(Dense(10, activation='softmax', kernel_initializer='normal'))
standard_model.compile(optimizer="sgd", loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Train your standard model, storing its history
history1 = standard_model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=10, verbose=0)

# Train the batch normalized model you recently built, store its history
history2 = batchnorm_model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, verbose=0)

# Call compare_acc_histories passing in both model histories
compare_histories_acc(history1, history2)

<img src="files/batch_normalization.png" style="height:400px" />

### 7. Tuning with keras

In [11]:
from tensorflow.keras.optimizers import Adam

In [10]:
# Creates a model given an activation and learning rate
def create_model(learning_rate=0.01, activation='relu'):
  
  # Create an Adam optimizer with the given learning rate
  opt = Adam(lr=learning_rate)
  
  # Create your binary classification model  
  model = Sequential()
  model.add(Dense(128, input_shape=(30,), activation=activation))
  model.add(Dense(256, activation=activation))
  model.add(Dense(1, activation='sigmoid'))
  
  # Compile your model with your optimizer, loss, and metrics
  model.compile(optimizer=opt, loss="binary_crossentropy", metrics=['accuracy'])
  return model

In [None]:
# this version if we want to tune number of hidden layers(nl), and number of units in each layer(nn)
def create_model2(learning_rate=0.01, activation='relu', nl=1, nn=256):
  model = Sequential()
  model.add(Dense(128, input_shape=(30,), activation=activation))
  for i in range(nl):
        model.add(Dense(nn, activation=activation))
  model.add(Dense(1, activation='sigmoid'))
  
  # Compile your model with your optimizer, loss, and metrics
  model.compile(optimizer=Adam(lr=learning_rate), loss="binary_crossentropy", metrics=['accuracy'])
  return model

In [12]:
# Import KerasClassifier from keras wrappers. Using this wrapper, keras model 
# now can be used as an sklearn estimator, eg. we do cross_val_score
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

In [None]:
# Create a KerasClassifier
model = KerasClassifier(build_fn = create_model)

# Define the parameters to try out. We can also tune number of layers and number of nodes
# in hidden layers. 
params = {'activation': ["relu", "tanh"], 
          'batch_size': [32, 128, 256], 
          'epochs': [50, 100, 200], 'learning_rate': [0.1, 0.01, 0.001]}

# Create a randomize search cv object passing in the parameters to try
random_search = RandomizedSearchCV(model, param_distributions = params, cv = KFold(3))
random_search_results = random_search.fit(X, y)
print(random_search_results.best_score_, random_search_results.best_params_)

- After running the results select the best paramets: Best: 0.975395 using {learning_rate: 0.001, epochs: 50, batch_size: 128, activation: relu} 

In [None]:
# Import KerasClassifier from keras wrappers
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score

In [None]:
# Create a KerasClassifier
model = KerasClassifier(build_fn = create_model, epochs = 50, 
             batch_size = 128, verbose = 0)

# Calculate the accuracy score for each fold
kfolds = cross_val_score(model, X, y, cv = 3)

# Print the mean accuracy
print('The mean accuracy was:', kfolds.mean())

# Print the accuracy standard deviation
print('With a standard deviation of:', kfolds.std())