In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import imdb

print(tf.version.VERSION)
print(tf.keras.__version__)

In [None]:
#configuring optimzers
from tensorflow.keras import optimizers
# custom losses and metrics
from tensorflow.keras import losses
from tensorflow.keras import metrics

from tensorflow.keras import regularizers

In [None]:
def plot_history(history_dict, what_to_plot, title, xlabel='Epochs', ylabel='Accuracy'):
    # [(k1,l1),(k2,l2)]
    
    key1, plot_label_1 = what_to_plot[0]
    key2, plot_label_2 = what_to_plot[1]
        
    epochs = range(1, len(history_dict[key1]) + 1)
    plt.clf() # clear figure

    plt.plot(epochs, history_dict[key1], 'bo', label=plot_label_1)
    plt.plot(epochs, history_dict[key2], 'b', label=plot_label_2)
    #plt.title('Training and validation accuracy')
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.legend()

    plt.show()

In [None]:
#def plot_loss_accuracy():
#    fig, (ax1, ax2) = plt.subplots(1, 2)
#    fig.suptitle('Horizontally stacked subplots')
#    ax1.plot(x, y)
#    ax2.plot(x, -y)

In [None]:
from tensorflow.keras.datasets import reuters

(train_data, train_labels), (test_data, test_labels) = reuters.load_data(num_words=10000)

print(len(train_data), len(test_data))

In [None]:
train_data[10]

In [None]:
word_index = reuters.get_word_index()
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
decoded_newswire = ' '.join([reverse_word_index.get(i - 3, '?') for i in
train_data[0]])

In [None]:
import numpy as np
def vectorize_sequences(sequences, dimension=10000):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1.
    return results

x_train = vectorize_sequences(train_data)
x_test = vectorize_sequences(test_data)

In [None]:
from tensorflow.keras.utils import to_categorical

one_hot_train_labels = to_categorical(train_labels)
one_hot_test_labels = to_categorical(test_labels)

def to_one_hot(labels, dimension=46):
    results = np.zeros((len(labels), dimension))
    for i, label in enumerate(labels):
        results[i, label] = 1.
    return results

one_hot_train_labels = to_one_hot(train_labels)
one_hot_test_labels = to_one_hot(test_labels)

#one_hot_train_labels = to_categorical(train_labels)
#one_hot_test_labels = to_categorical(test_labels)


In [None]:
from tensorflow.keras import models
from tensorflow.keras import layers

In [None]:
model = models.Sequential()
model.add(layers.Dense(64, activation='relu', input_shape=(10000,)))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(46, activation='softmax'))

#The best loss function to use in this case is categorical_crossentropy. It measures
#the distance between two probability distributions: here, between the probability distribution
#output by the network and the true distribution of the labels

model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])



In [None]:
x_val = x_train[:1000]
partial_x_train = x_train[1000:]

y_val = one_hot_train_labels[:1000]
partial_y_train = one_hot_train_labels[1000:]

In [None]:
history = model.fit(partial_x_train, partial_y_train, epochs=20, 
                    batch_size=512, validation_data=(x_val, y_val))

In [None]:
history_dict = history.history
history_dict.keys()

In [None]:
plot_history(history_dict, what_to_plot=[('loss', 'Training loss'),('val_loss', 'Validation loss')], 
              title='Training and validation loss', xlabel='Epochs', ylabel='Loss')

In [None]:
plot_history(history_dict, what_to_plot=[('accuracy', 'Training acc'),('val_accuracy', 'Validation acc')], 
              title='Training and validation accuracy', xlabel='Epochs', ylabel='accuracy')

In [None]:
def plot_loss_and_accuracy(history_dict, what_to_plot_loss=[('loss', 'Training loss'),('val_loss', 'Validation loss')],
                          what_to_plot_acc=[('accuracy', 'Training acc'),('val_accuracy', 'Validation acc')]):
    plot_history(history_dict, what_to_plot=what_to_plot_loss, 
              title='Training and validation loss', xlabel='Epochs', ylabel='Loss')
    plot_history(history_dict, what_to_plot=what_to_plot_acc, 
              title='Training and validation accuracy', xlabel='Epochs', ylabel='accuracy')

In [None]:
# model overfits after 9 epochs
model = models.Sequential()
model.add(layers.Dense(64, activation='relu', input_shape=(10000,)))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(46, activation='softmax'))


model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit(partial_x_train, partial_y_train, epochs=9, 
                    batch_size=512, validation_data=(x_val, y_val))

results = model.evaluate(x_test, one_hot_test_labels, verbose=0)
print(results)

In [None]:
predictions = model.predict(x_test)
print(predictions[0].shape)

predictions[0]
print(np.argmax(predictions[0]))

We mentioned earlier that another way to encode the labels would be to cast them as
an integer tensor, like this:

y_train = np.array(train_labels)
y_test = np.array(test_labels)

Then we would have to use sparse_categorical_crossentropy instead of categorical_crossentropy

In [None]:
# create informational bottleneck :
model = models.Sequential()
model.add(layers.Dense(64, activation='relu', input_shape=(10000,)))
model.add(layers.Dense(4, activation='relu'))
model.add(layers.Dense(46, activation='softmax'))
model.compile(optimizer='rmsprop', loss='categorical_crossentropy',metrics=['accuracy'])
history = model.fit(partial_x_train, partial_y_train, epochs=20, batch_size=128, validation_data=(x_val, y_val))

history_dict = history.history

plot_loss_and_accuracy(history_dict)

In [None]:
def test_hyperparameters(parameters, partial_x_train=partial_x_train, partial_y_train=partial_y_train,
                        x_val=x_val, y_val=y_val, x_test=x_test, y_test=one_hot_test_labels):
    loss_function = parameters['loss_function']
    dense_layer_count = parameters['dense_layer_count']
    hidden_units = parameters['hidden_units']
    activation_function = parameters['activation_function']
    batch_size = parameters.get('batch_size') or 512
    lr = parameters.get('lr') or 0.001
    kernel_regularizer_param=parameters.get('kernel_regularizer') or None
    #{'l1': 0.0010000000474974513, 'l2': 0.0}
    kernel_regularizer = None
    if kernel_regularizer_param is not None:
        kernel_regularizer = regularizers.l1_l2(l1=kernel_regularizer_param['l1'] or 0.
                                               ,l2=kernel_regularizer_param['l2'] or 0.)   
    
    model = models.Sequential()
    model.add(layers.Dense(hidden_units, kernel_regularizer=kernel_regularizer,
                           activation=activation_function, input_shape=(10000,)))
    for _ in range(dense_layer_count - 1):        
        model.add(layers.Dense(hidden_units, kernel_regularizer=kernel_regularizer,
                               activation=activation_function))
        
    #model.add(layers.Dense(1, activation='sigmoid'))
    # last output layer
    model.add(layers.Dense(46, activation='softmax')) 

    model.compile(optimizer=optimizers.RMSprop(lr=lr), 
                  loss=loss_function, metrics=['accuracy'])   
    
    history = model.fit(partial_x_train, partial_y_train,
                   epochs=20, batch_size=batch_size, validation_data=(x_val, y_val))
    
    history_dict = history.history
    results = model.evaluate(x_test, one_hot_test_labels, verbose=0)

    print(results)
    plot_loss_and_accuracy(history_dict)

In [None]:
regularizer = regularizers.l1(0.001)
print(regularizers.serialize(regularizer))

In [None]:
parameters = {'loss_function' : 'categorical_crossentropy', 'dense_layer_count':1, 
              'hidden_units' : 16, 'activation_function': 'relu', 'batch_size': 128, 'lr': 0.001}
test_hyperparameters(parameters)

In [None]:
parameters = {'loss_function' : 'categorical_crossentropy', 'dense_layer_count':2, 
              'hidden_units' : 16, 'activation_function': 'relu', 'batch_size': 128, 'lr': 0.001}
test_hyperparameters(parameters)

In [None]:
parameters = {'loss_function' : 'categorical_crossentropy', 'dense_layer_count':2, 
              'hidden_units' : 32, 'activation_function': 'relu', 'batch_size': 128, 'lr': 0.001}
test_hyperparameters(parameters)

In [None]:
parameters = {'loss_function' : 'categorical_crossentropy', 'dense_layer_count':2, 
              'hidden_units' : 64, 'activation_function': 'relu', 'batch_size': 128, 'lr': 0.001}
test_hyperparameters(parameters)

In [None]:
parameters = {'loss_function' : 'categorical_crossentropy', 'dense_layer_count':3, 
              'hidden_units' : 64, 'activation_function': 'relu', 'batch_size': 128, 'lr': 0.001}
test_hyperparameters(parameters)

In [None]:
parameters = {'loss_function' : 'categorical_crossentropy', 'dense_layer_count':2, 
              'hidden_units' : 128, 'activation_function': 'relu', 'batch_size': 128, 'lr': 0.001}
test_hyperparameters(parameters)

In [None]:
parameters = {'loss_function' : 'categorical_crossentropy', 'dense_layer_count':2, 
              'hidden_units' : 64, 'activation_function': 'relu', 'batch_size': 128, 'lr': 0.001
             , 'kernel_regularizer': {'l1': 0.001, 'l2': 0.}}
test_hyperparameters(parameters)

In [None]:
parameters = {'loss_function' : 'categorical_crossentropy', 'dense_layer_count':2, 
              'hidden_units' : 64, 'activation_function': 'relu', 'batch_size': 128, 'lr': 0.001
             , 'kernel_regularizer': {'l1': 0., 'l2': 0.001}}
test_hyperparameters(parameters)

In [None]:
parameters = {'loss_function' : 'categorical_crossentropy', 'dense_layer_count':2, 
              'hidden_units' : 128, 'activation_function': 'relu', 'batch_size': 128, 'lr': 0.001
             , 'kernel_regularizer': {'l1': 0.001, 'l2': 0.}}
test_hyperparameters(parameters)

In [None]:
parameters = {'loss_function' : 'categorical_crossentropy', 'dense_layer_count':2, 
              'hidden_units' : 256, 'activation_function': 'relu', 'batch_size': 128, 'lr': 0.001
             , 'kernel_regularizer': {'l1': 0.01, 'l2': 0.01}}
test_hyperparameters(parameters)

In [None]:
parameters = {'loss_function' : 'categorical_crossentropy', 'dense_layer_count':2, 
              'hidden_units' : 256, 'activation_function': 'relu', 'batch_size': 128, 'lr': 0.001
             , 'kernel_regularizer': {'l1': 0., 'l2': 0.001}}
test_hyperparameters(parameters)

In [None]:
parameters = {'loss_function' : 'categorical_crossentropy', 'dense_layer_count':2, 
              'hidden_units' : 256, 'activation_function': 'relu', 'batch_size': 128, 'lr': 0.001
             , 'kernel_regularizer': {'l1': 0., 'l2': 0.001}}
test_hyperparameters(parameters)

In [None]:
parameters = {'loss_function' : 'categorical_crossentropy', 'dense_layer_count':2, 
              'hidden_units' : 64, 'activation_function': 'relu', 'batch_size': 128, 'lr': 0.001
             , 'kernel_regularizer': {'l1': 0., 'l2': 0.001}}
test_hyperparameters(parameters)

In [None]:
parameters = {'loss_function' : 'categorical_crossentropy', 'dense_layer_count':2, 
              'hidden_units' : 128, 'activation_function': 'relu', 'batch_size': 128, 'lr': 0.001
             , 'kernel_regularizer': {'l1': 0.001, 'l2': 0.}}
test_hyperparameters(parameters)

In [None]:
parameters = {'loss_function' : 'categorical_crossentropy', 'dense_layer_count':2, 
              'hidden_units' : 256, 'activation_function': 'relu', 'batch_size': 128, 'lr': 0.001
             , 'kernel_regularizer': {'l1': 0.01, 'l2': 0.01}}
test_hyperparameters(parameters)

In [None]:
parameters = {'loss_function' : 'categorical_crossentropy', 'dense_layer_count':2, 
              'hidden_units' : 256, 'activation_function': 'relu', 'batch_size': 128, 'lr': 0.001
             , 'kernel_regularizer': {'l1': 0., 'l2': 0.001}}
test_hyperparameters(parameters)