In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import os
import tensorflow as tf

# needed to avoid a tf error
try:
    gpus= tf.config.experimental.list_physical_devices('GPU')
    tf.config.experimental.set_memory_growth(gpus[0], True)
except:
    pass

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

import tensorflow as tf
import numpy as np

# Boston Model

In [2]:
# loading models that we prepared earlier
X_train = np.load(os.path.join('data', 'boston', 'boston_train_vectors.npy'), allow_pickle=True)
X_test = np.load(os.path.join('data', 'boston', 'boston_test_vectors.npy'), allow_pickle=True)
y_train = np.load(os.path.join('data', 'boston', 'boston_train_labels.npy'), allow_pickle=True)
y_test = np.load(os.path.join('data', 'boston', 'boston_test_labels.npy'), allow_pickle=True)

In [3]:
X_train.shape, y_train.shape

((404, 13), (404,))

In [4]:
# setting seed to get reproducible results
tf.random.set_seed(42)
np.random.seed(42)

# building a small model as an experiment
def build_model():
    model = Sequential()
    model.add(Dense(13, activation='sigmoid'))
    model.add(Dense(50, activation='sigmoid'))
    model.add(Dense(50, activation='sigmoid'))
    model.add(Dense(1, activation='linear'))
    model.compile(loss='mse', optimizer='sgd', metrics=['mae'])
    return model

In [5]:
# early stopping is not used in the current implementation, but we plan to use it in the final model
cb = [EarlyStopping(monitor="val_mae", min_delta=0.01, patience=2, verbose=1, 
                    mode="auto", baseline=None, restore_best_weights=True)]

In [6]:
#model.fit(X_train, y_train, batch_size=1, epochs=20, validation_split=0.1, callbacks=cb)

# Using the custom training loop

In [7]:
# importing our custom loop
from loop import TrainingLoop
# importing our batch selection algorithms
from batch_selection import windowed_batch_selector, sorting_batch_selector

In [8]:
# using SGD oprimizer for training
optimizer = tf.keras.optimizers.SGD()

# MSE loss function for this regression task
loss_function = tf.keras.losses.MeanSquaredError()

batch_size = 8
epochs = 20

# using MAE as our secondary metric
train_metrics = tf.keras.metrics.MeanAbsoluteError()
val_metrics = tf.keras.metrics.MeanAbsoluteError()

In [9]:
def train(model, X_train, y_train, batch_selector, epochs):
    selectors = {windowed_batch_selector: 'windowed', sorting_batch_selector: 'sorting', None: 'original'}
    print('\n\n'+selectors[batch_selector]+'\n')
    # defining the training class
    training = TrainingLoop(model, X_train, y_train, 
        loss_function, 
        optimizer, 
        train_metrics, 
        val_metrics, 
        validation_split=0.1, 
        batch_size=batch_size,
        batch_selection=batch_selector,
        log_file = os.path.join('logs', selectors[batch_selector], 'boston_houses.csv')
    )
    # training the model
    training.train(epochs)

In [10]:
def evaluate(model, X_test, y_test):
    res = model.evaluate(X_test, y_test)
    print(np.sqrt(res[0]), res[1])

In [13]:
@tf.function
def calc_loss(x_train, y_train, model, loss_function):
    with tf.GradientTape() as tape:
        logits = model(x_train, training=False)
        loss_value = loss_function(y_train, logits)
    return loss_value

length = 5
def windowed_batch_selector(data, idx, model, loss_function ):
    largest_loss = 0
    largest_loss_idx = idx

    if idx < len(data) - length:
        for i in range(idx, idx+length):
            x_batch_train = data[i][0]
            y_batch_train = data[i][1]
            loss = calc_loss(x_batch_train, y_batch_train, model, loss_function)
            if loss > largest_loss:
                largest_loss = loss
                largest_loss_idx = i
        return largest_loss_idx
    else:
        loss = calc_loss(data[idx][0], data[idx][1], model, loss_function)
        return idx


losses = []
def sorting_batch_selector(data, idx, model, loss_function):
    global losses
    if idx == 0:
        for i in range(len(data)):
            x_batch_train = data[i][0]
            y_batch_train = data[i][1]
            losses.append([i, float(calc_loss(x_batch_train, y_batch_train, model, loss_function))])
        losses = sorted(losses, key=lambda x:x[1], reverse=True)


    return_idx = losses[idx][0]
    if idx == len(data)-1:
        losses.clear()
    
    return return_idx

In [14]:
tf.random.set_seed(42)
np.random.seed(42)

model = build_model()
train(model, X_train, y_train, batch_selector=sorting_batch_selector, epochs=epochs)
evaluate(model, X_test, y_test)

	  0% | 0/45 [00:00<?, ?it/s]

sorting



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Epoch 1/20	Loss: 38.369	Metrics: 7.2289: 	Validation metrics: 6.7987: 	100% | 45/45 [00:00<00:00, 153.18it/s]
Epoch 2/20	Loss: 11.717	Metrics: 6.2145: 	Validation metrics: 5.1483: 	100% | 45/45 [00:00<00:00, 376.50it/s]
Epoch 3/20	Loss: 8.8421	Metrics: 4.7150: 	Validation metrics: 3.8345: 	100% | 45/45 [00:00<00:00, 346.62it/s]
Epoch 4/20	Loss: 2.0045	Metrics: 3.7056: 	Validation metrics: 3.3208: 	100% | 45/45 [00:00<00:00, 353.63it/s]
Epoch 5/20	Loss: 2.7182	Metrics: 3.6447: 	Validation metrics: 3.1772: 	100% | 45/45 [00:00<00:00, 309.90it/s]
Epoch 6/20	Loss: 1.9004	Metrics: 3.5040: 	Validation metrics: 3.1053: 	100% | 45/45 [00:00<00:00, 283.02it/s]