# Test for time length of epochs & sample rate 

### Imports

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import matplotlib.pyplot as plt
import time
from training_functions import *
import logging

In [3]:
logging.basicConfig(level=logging.INFO)

## Epoch Length Tests with a sample rate of 20

#### Get the data from ES and create a pandas dataframe

We take datapoints in the range of the 9th and 13th of June with a sample rate of 20. 

In [4]:
q_one = es_to_df(start_date="2020-06-09",end_date="2020-06-13", s_rate=20, tier="censhare", host='localhost', port=9200)
q_two = es_to_df(start_date="2020-06-09",end_date="2020-06-13", s_rate=20, tier="pic", host='localhost', port=9200)

INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.391s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.344s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.451s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.166s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.387s]
INFO:root:ES to Df: 2.11 time elapsed
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.115s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.749s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.732s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.141s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.189s]
INFO:root:ES to Df: 2.56 time elapsed


#### Create the feature enriched dataset for each item

In [5]:
X, y, maxlen = create_dataset_train(q_one, q_two)

INFO:root:3215 items in the first and second queue
INFO:root:11177 items in the second queue only
INFO:root:14392 items in the whole dataset
INFO:root:Create dataset: 19.75 time elapsed


#### Scaling

Scaling contains dummy inputs, which are only used for naming purposes of the scaler, which isn´t used here for testing the MAE

In [6]:
X_scaled, y_scaled = scale(X, y, start_date="2020-11-03",end_date="2020-11-07", 
                           epochs='50', steps='720', s_rate='20', model_name='test')

INFO:root:Scale: 68.85 time elapsed


#### Padding

In [7]:
X_train, X_test, y_train, y_test = pad_split(X_scaled, y_scaled, maxlen, test_size=0.02)

INFO:root:Pad Split: 0.26 time elapsed


In [8]:
X_train_sampled, X_test_sampled, y_train_sampled, y_test_sampled = downsample(X_train, X_test, y_train, y_test, rate=1)

INFO:root:Downsample: 0.06 time elapsed


Shape of the trainings samples (number of samples, timesteps, features)

In [9]:
X_train_sampled.shape

(14104, 720, 6)

### Epoch Length Test with approx. 14000 items

In [13]:
# Build the model
n_steps = X_train_sampled.shape[1]  # number of steps
n_features = X_train_sampled.shape[2]  # number of features

# Create model layer
model = Sequential()
model.add(Masking(mask_value=0.0, input_shape=(n_steps, n_features)))  # Masking Layer for padding
model.add(LSTM(20, return_sequences=True))
model.add(Dropout(0.1))
model.add(LSTM(20, return_sequences=True))
model.add(Dense(1, input_dim=20))  # Dense Layer to generate 1Dimensional Outputs
model.compile(loss='mae', optimizer='adam', metrics=['mae'])

# Define CallBacks
early_stop = EarlyStopping(monitor='mae', mode='min', patience=10)
mcp_save = ModelCheckpoint(f'models/mcp_lstm_2epochs_20neurons_14000items.h5', 
                            save_best_only=True, monitor='mae', mode='min')

# Start training
model.fit(X_train_sampled, y_train_sampled, epochs=2, validation_split=0.2,
          callbacks=[early_stop, mcp_save])

# Save the model in models directory
model.save(f'models/lstm_lstm_2epochs_20neurons_14000items.h5')

Epoch 1/2
Epoch 2/2


### Epoch Length Test with approx. 11000 items

#### Padding

In [23]:
X_train, X_test, y_train, y_test = pad_split(X_scaled, y_scaled, maxlen, test_size=0.25)

INFO:root:Pad Split: 0.23 time elapsed


In [24]:
X_train_sampled, X_test_sampled, y_train_sampled, y_test_sampled = downsample(X_train, X_test, y_train, y_test, rate=1)

INFO:root:Downsample: 0.06 time elapsed


In [25]:
X_train_sampled.shape

(10794, 720, 6)

In [27]:
# Build the model
n_steps = X_train_sampled.shape[1]  # number of steps
n_features = X_train_sampled.shape[2]  # number of features

# Create model layer
model = Sequential()
model.add(Masking(mask_value=0.0, input_shape=(n_steps, n_features)))  # Masking Layer for padding
model.add(LSTM(20, return_sequences=True))
model.add(Dropout(0.1))
model.add(LSTM(20, return_sequences=True))
model.add(Dense(1, input_dim=20))  # Dense Layer to generate 1Dimensional Outputs
model.compile(loss='mae', optimizer='adam', metrics=['mae'])

# Define CallBacks
early_stop = EarlyStopping(monitor='mae', mode='min', patience=10)
mcp_save = ModelCheckpoint(f'models/mcp_lstm_2epochs_20neurons_11000items.h5', 
                            save_best_only=True, monitor='mae', mode='min')

# Start training
model.fit(X_train_sampled, y_train_sampled, epochs=2, validation_split=0.2,
          callbacks=[early_stop, mcp_save])

# Save the model in models directory
model.save(f'models/lstm_lstm_2epochs_20neurons_11000items.h5')

Epoch 1/2
Epoch 2/2


### Epoch Length Test with approx. 7000 items

#### Padding

In [31]:
X_train, X_test, y_train, y_test = pad_split(X_scaled, y_scaled, maxlen, test_size=0.5)

INFO:root:Pad Split: 0.26 time elapsed


In [32]:
X_train_sampled, X_test_sampled, y_train_sampled, y_test_sampled = downsample(X_train, X_test, y_train, y_test, rate=1)

INFO:root:Downsample: 0.06 time elapsed


In [33]:
X_train_sampled.shape

(7196, 720, 6)

In [35]:
# Build the model
n_steps = X_train_sampled.shape[1]  # number of steps
n_features = X_train_sampled.shape[2]  # number of features

# Create model layer
model = Sequential()
model.add(Masking(mask_value=0.0, input_shape=(n_steps, n_features)))  # Masking Layer for padding
model.add(LSTM(20, return_sequences=True))
model.add(Dropout(0.1))
model.add(LSTM(20, return_sequences=True))
model.add(Dense(1, input_dim=20))  # Dense Layer to generate 1Dimensional Outputs
model.compile(loss='mae', optimizer='adam', metrics=['mae'])

# Define CallBacks
early_stop = EarlyStopping(monitor='mae', mode='min', patience=10)
mcp_save = ModelCheckpoint(f'models/mcp_lstm_2epochs_20neurons_7000items.h5', 
                            save_best_only=True, monitor='mae', mode='min')

# Start training
model.fit(X_train_sampled, y_train_sampled, epochs=2, validation_split=0.2,
          callbacks=[early_stop, mcp_save])

# Save the model in models directory
model.save(f'models/lstm_lstm_2epochs_20neurons_7000items.h5')

Epoch 1/2
Epoch 2/2


### Epoch Length Test with approx. 3500 items

#### Padding

In [39]:
X_train, X_test, y_train, y_test = pad_split(X_scaled, y_scaled, maxlen, test_size=0.75)

INFO:root:Pad Split: 0.25 time elapsed


In [40]:
X_train_sampled, X_test_sampled, y_train_sampled, y_test_sampled = downsample(X_train, X_test, y_train, y_test, rate=1)

INFO:root:Downsample: 0.06 time elapsed


In [38]:
X_train_sampled.shape

(3598, 720, 6)

In [41]:
# Build the model
n_steps = X_train_sampled.shape[1]  # number of steps
n_features = X_train_sampled.shape[2]  # number of features

# Create model layer
model = Sequential()
model.add(Masking(mask_value=0.0, input_shape=(n_steps, n_features)))  # Masking Layer for padding
model.add(LSTM(20, return_sequences=True))
model.add(Dropout(0.1))
model.add(LSTM(20, return_sequences=True))
model.add(Dense(1, input_dim=20))  # Dense Layer to generate 1Dimensional Outputs
model.compile(loss='mae', optimizer='adam', metrics=['mae'])

# Define CallBacks
early_stop = EarlyStopping(monitor='mae', mode='min', patience=10)
mcp_save = ModelCheckpoint(f'models/mcp_lstm_2epochs_20neurons_3500items.h5', 
                            save_best_only=True, monitor='mae', mode='min')

# Start training
model.fit(X_train_sampled, y_train_sampled, epochs=2, validation_split=0.2,
          callbacks=[early_stop, mcp_save])

# Save the model in models directory
model.save(f'models/lstm_lstm_2epochs_20neurons_3500items.h5')

Epoch 1/2
Epoch 2/2


## Sample Rate Test 

### Epoch Length Test Sample Rate 1 

In [4]:
q_one = es_to_df(start_date="2020-06-09",end_date="2020-06-13", s_rate=1, tier="censhare", host='localhost', port=9200)
q_two = es_to_df(start_date="2020-06-09",end_date="2020-06-13", s_rate=1, tier="pic", host='localhost', port=9200)

INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.092s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.080s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.091s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.128s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.210s]
INFO:root:ES to Df: 1.12 time elapsed
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.116s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.785s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.658s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.166s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.196s]
INFO:root:ES to Df: 3.75 time elapsed


#### Create the feature enriched dataset for each item

In [5]:
X, y, maxlen = create_dataset_train(q_one, q_two)

INFO:root:5598 items in the first and second queue
INFO:root:11644 items in the second queue only
INFO:root:17242 items in the whole dataset
INFO:root:Create dataset: 56.96 time elapsed


#### Scaling

In [6]:
X_scaled, y_scaled = scale(X, y, start_date="2020-11-03",end_date="2020-11-07", 
                           epochs='50', steps='720', s_rate='20', model_name='test')

INFO:root:Scale: 81.99 time elapsed


#### Padding

In [7]:
X_train, X_test, y_train, y_test = pad_split(X_scaled, y_scaled, maxlen, test_size=0.0001)

INFO:root:Pad Split: 19.08 time elapsed


In [8]:
X_train_sampled, X_test_sampled, y_train_sampled, y_test_sampled = downsample(X_train, X_test, y_train, y_test, rate=1)

INFO:root:Downsample: 8.24 time elapsed


In [9]:
X_train_sampled.shape

(17240, 14400, 6)

In [10]:
# Build the model
n_steps = X_train_sampled.shape[1]  # number of steps
n_features = X_train_sampled.shape[2]  # number of features

# Create model layer
model = Sequential()
model.add(Masking(mask_value=0.0, input_shape=(n_steps, n_features)))  # Masking Layer for padding
model.add(LSTM(20, return_sequences=True))
model.add(Dropout(0.1))
model.add(LSTM(20, return_sequences=True))
model.add(Dense(1, input_dim=20))  # Dense Layer to generate 1Dimensional Outputs
model.compile(loss='mae', optimizer='adam', metrics=['mae'])

# Define CallBacks
early_stop = EarlyStopping(monitor='mae', mode='min', patience=10)
mcp_save = ModelCheckpoint(f'models/mcp_lstm_2epochs_20neurons_10000items_1srate.h5', 
                            save_best_only=True, monitor='mae', mode='min')

# Start training
model.fit(X_train_sampled, y_train_sampled, epochs=1, validation_split=0.2,
          callbacks=[early_stop, mcp_save])

# Save the model in models directory
model.save(f'models/lstm_lstm_2epochs_20neurons_10000items_1srate.h5')



### Epoch Length Test Sample Rate 5

In [11]:
q_one = es_to_df(start_date="2020-06-09",end_date="2020-06-13", s_rate=5, tier="censhare", host='localhost', port=9200)
q_two = es_to_df(start_date="2020-06-09",end_date="2020-06-13", s_rate=5, tier="pic", host='localhost', port=9200)

INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.135s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.363s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.387s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.229s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.306s]
INFO:root:ES to Df: 1.93 time elapsed
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.115s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.853s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.713s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.153s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.195s]
INFO:root:ES to Df: 2.82 time elapsed


#### Create the feature enriched dataset for each item

In [12]:
X, y, maxlen = create_dataset_train(q_one, q_two)

INFO:root:4343 items in the first and second queue
INFO:root:10937 items in the second queue only
INFO:root:15280 items in the whole dataset
INFO:root:Create dataset: 26.72 time elapsed


#### Scaling

In [13]:
X_scaled, y_scaled = scale(X, y, start_date="2020-11-03",end_date="2020-11-07", 
                           epochs='50', steps='720', s_rate='20', model_name='test')

INFO:root:Scale: 70.59 time elapsed


#### Padding

In [14]:
X_train, X_test, y_train, y_test = pad_split(X_scaled, y_scaled, maxlen, test_size=0.0001)

INFO:root:Pad Split: 0.73 time elapsed


In [15]:
X_train_sampled, X_test_sampled, y_train_sampled, y_test_sampled = downsample(X_train, X_test, y_train, y_test, rate=1)

INFO:root:Downsample: 0.28 time elapsed


In [16]:
X_train_sampled.shape

(15278, 2880, 6)

In [17]:
# Build the model
n_steps = X_train_sampled.shape[1]  # number of steps
n_features = X_train_sampled.shape[2]  # number of features

# Create model layer
model = Sequential()
model.add(Masking(mask_value=0.0, input_shape=(n_steps, n_features)))  # Masking Layer for padding
model.add(LSTM(20, return_sequences=True))
model.add(Dropout(0.1))
model.add(LSTM(20, return_sequences=True))
model.add(Dense(1, input_dim=20))  # Dense Layer to generate 1Dimensional Outputs
model.compile(loss='mae', optimizer='adam', metrics=['mae'])

# Define CallBacks
early_stop = EarlyStopping(monitor='mae', mode='min', patience=10)
mcp_save = ModelCheckpoint(f'models/mcp_lstm_2epochs_20neurons_10000items_5srate.h5', 
                            save_best_only=True, monitor='mae', mode='min')

# Start training
model.fit(X_train_sampled, y_train_sampled, epochs=1, validation_split=0.2,
          callbacks=[early_stop, mcp_save])

# Save the model in models directory
model.save(f'models/lstm_lstm_2epochs_20neurons_10000items_5srate.h5')



### Epoch Length Test Sample Rate 10

In [18]:
q_one = es_to_df(start_date="2020-06-09",end_date="2020-06-13", s_rate=10, tier="censhare", host='localhost', port=9200)
q_two = es_to_df(start_date="2020-06-09",end_date="2020-06-13", s_rate=10, tier="pic", host='localhost', port=9200)

INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.094s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.094s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.089s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.121s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.208s]
INFO:root:ES to Df: 0.91 time elapsed
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.119s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:1.132s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.971s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.146s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.212s]
INFO:root:ES to Df: 3.25 time elapsed


#### Create the feature enriched dataset for each item

In [19]:
X, y, maxlen = create_dataset_train(q_one, q_two)

INFO:root:3858 items in the first and second queue
INFO:root:10940 items in the second queue only
INFO:root:14798 items in the whole dataset
INFO:root:Create dataset: 22.15 time elapsed


#### Scaling

In [20]:
X_scaled, y_scaled = scale(X, y, start_date="2020-11-03",end_date="2020-11-07", 
                           epochs='50', steps='720', s_rate='20', model_name='test')

INFO:root:Scale: 67.88 time elapsed


#### Padding

In [21]:
X_train, X_test, y_train, y_test = pad_split(X_scaled, y_scaled, maxlen, test_size=0.0001)

INFO:root:Pad Split: 0.41 time elapsed


In [22]:
X_train_sampled, X_test_sampled, y_train_sampled, y_test_sampled = downsample(X_train, X_test, y_train, y_test, rate=1)

INFO:root:Downsample: 0.13 time elapsed


In [23]:
X_train_sampled.shape

(14796, 1440, 6)

In [24]:
# Build the model
n_steps = X_train_sampled.shape[1]  # number of steps
n_features = X_train_sampled.shape[2]  # number of features

# Create model layer
model = Sequential()
model.add(Masking(mask_value=0.0, input_shape=(n_steps, n_features)))  # Masking Layer for padding
model.add(LSTM(20, return_sequences=True))
model.add(Dropout(0.1))
model.add(LSTM(20, return_sequences=True))
model.add(Dense(1, input_dim=20))  # Dense Layer to generate 1Dimensional Outputs
model.compile(loss='mae', optimizer='adam', metrics=['mae'])

# Define CallBacks
early_stop = EarlyStopping(monitor='mae', mode='min', patience=10)
mcp_save = ModelCheckpoint(f'models/mcp_lstm_2epochs_20neurons_10000items_10srate.h5', 
                            save_best_only=True, monitor='mae', mode='min')

# Start training
model.fit(X_train_sampled, y_train_sampled, epochs=1, validation_split=0.2,
          callbacks=[early_stop, mcp_save])

# Save the model in models directory
model.save(f'models/lstm_lstm_2epochs_20neurons_10000items_10srate.h5')



### Epoch Length Test Sample Rate 20

In [25]:
q_one = es_to_df(start_date="2020-06-09",end_date="2020-06-13", s_rate=20, tier="censhare", host='localhost', port=9200)
q_two = es_to_df(start_date="2020-06-09",end_date="2020-06-13", s_rate=20, tier="pic", host='localhost', port=9200)

INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.098s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.087s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.082s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.108s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.215s]
INFO:root:ES to Df: 1.58 time elapsed
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.106s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.664s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.657s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.149s]
INFO:elasticsearch:POST http://localhost:9200/queues/_search [status:200 request:0.214s]
INFO:root:ES to Df: 2.37 time elapsed


#### Create the feature enriched dataset for each item

In [26]:
X, y, maxlen = create_dataset_train(q_one, q_two)

INFO:root:3215 items in the first and second queue
INFO:root:11177 items in the second queue only
INFO:root:14392 items in the whole dataset
INFO:root:Create dataset: 20.00 time elapsed


#### Scaling

In [27]:
X_scaled, y_scaled = scale(X, y, start_date="2020-11-03",end_date="2020-11-07", 
                           epochs='50', steps='720', s_rate='20', model_name='test')

INFO:root:Scale: 66.02 time elapsed


#### Padding

In [28]:
X_train, X_test, y_train, y_test = pad_split(X_scaled, y_scaled, maxlen, test_size=0.0001)

INFO:root:Pad Split: 0.26 time elapsed


In [29]:
X_train_sampled, X_test_sampled, y_train_sampled, y_test_sampled = downsample(X_train, X_test, y_train, y_test, rate=1)

INFO:root:Downsample: 0.07 time elapsed


In [30]:
X_train_sampled.shape

(14390, 720, 6)

In [31]:
# Build the model
n_steps = X_train_sampled.shape[1]  # number of steps
n_features = X_train_sampled.shape[2]  # number of features

# Create model layer
model = Sequential()
model.add(Masking(mask_value=0.0, input_shape=(n_steps, n_features)))  # Masking Layer for padding
model.add(LSTM(20, return_sequences=True))
model.add(Dropout(0.1))
model.add(LSTM(20, return_sequences=True))
model.add(Dense(1, input_dim=20))  # Dense Layer to generate 1Dimensional Outputs
model.compile(loss='mae', optimizer='adam', metrics=['mae'])

# Define CallBacks
early_stop = EarlyStopping(monitor='mae', mode='min', patience=10)
mcp_save = ModelCheckpoint(f'models/mcp_lstm_2epochs_20neurons_10000items_20srate.h5', 
                            save_best_only=True, monitor='mae', mode='min')

# Start training
model.fit(X_train_sampled, y_train_sampled, epochs=1, validation_split=0.2,
          callbacks=[early_stop, mcp_save])

# Save the model in models directory
model.save(f'models/lstm_lstm_2epochs_20neurons_10000items_20srate.h5')

