In [1]:
from crontab import CronTab
import datetime as dt
import numpy as np
import os
import pandas as pd
from pathlib import Path
from sqlalchemy import select, text
from sqlalchemy.orm import sessionmaker
import sys
import tensorflow as tf
from time import strftime
import timeit

# Add path of subdirectory containing own modules
modules_path = os.path.join(os.getcwd(), 'data_collect_app')
if modules_path not in sys.path:
    sys.path.append(modules_path)

import finrail_db

# Load tensorboard
%load_ext tensorboard

2024-03-09 16:06:41.819424: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-09 16:06:41.936284: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-09 16:06:41.937908: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Define directory for tensorboard log files
def dir_logs(parent_dir='tf_log'):
    return Path(parent_dir) / strftime('%Y_%m_%d_%H_%M_%S')


In [3]:
def tweak_train(df_):
    '''Function takes DataFrame as returned from SQL-query and returns processed DataFrame
    Transformations:
        - DataType: update to all columns
        - Introducing columns "commuter" and "long_distance" by grouping by date and train category
          and then unstacking ones
        - pushing the date information from index to own column
        - Renaming and setting back nested column names
        
    '''
    return (df_
    .astype({
        'date': 'datetime64',
        'train_cat': 'category',
        'total_length': np.float32
    })
    .groupby(['date', 'train_cat'])
    .max().unstack()
    .reset_index()
    .set_axis(['date', 'commuter', 'long_distance'], axis=1)
           )
# Creates tables in finrail db, returns database engine
engine = finrail_db.create_tables(db_str='mysql+mysqlconnector://root:admin123@localhost:5000/finrail')

# Open fire and read stored SQL query to variable
with open('sql_query.txt', 'r') as w:
    sql_query_str = w.read()
    
# Open SQL connection and send query. This query will:
# 1. Sum length of all wagon in a journey section
# 2. Choose maximum length of all wagons among journey sections for each train
# 3. Sum length of wagons for all trains per day, grouped by train category (Commuter, Long-distance)
with engine.connect() as connection:
    df = pd.read_sql_query(text(sql_query_str), connection)

# Apply tweak_train to output of SQL query to obtain desired time series
df = tweak_train(df)

In [5]:
def timeseries_window(data, seq_length, shift=1, stride=1):
    '''Function takes dataset and returns dataset containing windows with data from input dataset.
    Parameters:
        data <tf.data.Dataset> input dataset
        seq_length <int> defines length of windows in output dataset
        shift <int> defines how many time steps of gap are between two consecutive windows
        stride <int> defines how many time steps are between two consecutive output data points
        
    Return:
        <tf.data.Dataset> Dataset containing windows of seq_length based on input dataset data
    '''
    data = data.window(size=seq_length, shift=shift, stride=stride, drop_remainder=True)
    data = data.flat_map(lambda x: x) # flatten nested Dataset structure returned by .window()
    return data.batch(seq_length) # batch of size seq_length will give one window in each batch

def timeseries_dataset_seq2seq(data, forecast_length=1, seq_length=7):
    '''Function takes Dataset and returns Dataset with windows suitable to train a 
    sequence to sequence RNN
    Parameters:
        data <tf.data.Dataset> input dataset
        forecast_length <int> number of time steps to be forecasted into the future
        seq_length <int> length of sequences fed to RNN (number of consecutive time steps 
        in one training instance)
    '''
    data = timeseries_window(data, forecast_length+1) # First dimension one time step longer than
                                                      # forecast_length, as targets are generated as well
    data = timeseries_window(data, seq_length) # Second dimension consists of windows of size sequence length
    return data.map(lambda x: (x[:, 0], x[:, 1:])) # map to tuple (training instance, target)

In [6]:
def prepare_training_dataset(df_, column, row_split, forecast_length=14, seq_length=30, 
                             batch_size=32, seed=42, reshuffle_each_iteration=True):
    '''Function takes Dataframe and returns tf.data.Dataset with specs:
    Parameters:
        df_ <pd.Dataframe> Dataframe with time series data (np.float32) in columns
        column <string> name of column in DataFrame to use
        row_split <tuple of two int> defines row index between data is extracted from df_
        forecast_length <int> number of time steps to be forecasted into the future
        seq_length <int> length of sequences fed to RNN (number of consecutive time steps 
        batch_size <int> batch_size of returned Dataset
        seed <int> random seed for shuffling data
        reshuffle_each_iteration <boolean> Defines wheater Dataset is ot be reshuffled after each
        training epoch
    Return:
        <tf.data.Dataset> ready to feed to .fit() of an sequence to sequence RNN
    '''
    data = tf.data.Dataset.from_tensor_slices(df[column][:1847].values / 1E5)
    data = timeseries_dataset_seq2seq(data, forecast_length, seq_length)
    data = data.cache() # cache, so that previous transformation are only performed ones
    #data = data.shuffle(500, seed=seed, reshuffle_each_iteration=reshuffle_each_iteration)
    return data.batch(batch_size=batch_size).prefetch(1)

#training set until 2020 including
commuter_train = prepare_training_dataset(df, 'commuter', (0, 1847))
#validation set from 2021 to 2022 including
commuter_val = prepare_training_dataset(df, 'commuter', (1847, 2577), 
                                        batch_size=200, reshuffle_each_iteration=False)
#test set from 2023 to 2024-03-06
commuter_test = prepare_training_dataset(df, 'commuter', (2577, 3008), 
                                        batch_size=200, reshuffle_each_iteration=False)

In [7]:
#Input layer stack that defines input shape and will scale down inputs by a factor of 1E5
input_processing = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(None, 1)),
    tf.keras.layers.Normalization(mean=0, variance=1E10)
])

#Output layer that will scale up predictions by a factor of 1E5
output_processing = tf.keras.Sequential([
    tf.keras.layers.Normalization(mean=0, variance=1E-10)
])

#RNN laer stack for a sequence to sequence model for univariate time series
rnn_seq2seq = tf.keras.Sequential([
    tf.keras.layers.LSTM(32, return_sequences=True),
    tf.keras.layers.Dense(14, activation='linear')  
])

#Complete model including Input, Output and RNN layer stacks
rnn_seq2seq_complete = tf.keras.Sequential([
    input_processing,
    rnn_seq2seq,
    output_processing
])

#Model used during training, to avoid calculating scaling on every iteration
rnn_seq2seq_training = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(None, 1)),
    rnn_seq2seq
])


In [8]:
%tensorboard --logdir tf_log

In [18]:
# Define callback for Tensorboard update
current_dir = dir_logs()
callback_tensorboard = tf.keras.callbacks.TensorBoard(current_dir, histogram_freq=5)

optimizer = tf.keras.optimizers.Adam(learning_rate=5E-3)
rnn_seq2seq_training.compile(optimizer=optimizer, loss='mse', metrics=[Custom_Metric()])
rnn_seq2seq_training.fit(commuter_train, validation_data=commuter_val, 
                         epochs = 20, callbacks=[callback_tensorboard])#, verbose=0)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
 2/57 [>.............................] - ETA: 3s - loss: 0.0224

KeyboardInterrupt: 

In [16]:
class Custom_Metric(tf.keras.metrics.Metric):
    '''Metric calculating the root mean squared error (RMSE) for a sequence to sequence recurrent
    neuronal network (RNN) exclusively based on the last predicted vector of a sequence. 
    This is useful in situation, where a sequence to sequence RNN is trained, but for production 
    only the last predicted vector matters. This occurs for example in time series prediction.
    This metric allows to evaluate the model performance in time series prediction exclusivley on
    the parts of output that matters for production. Instead the loss of a sequence to sequence 
    model training takes all predicted vectors along a sequence into account.
    
    '''
    def __init__(self, **kwarg):
        '''Function hands over kwargs to parent class and initiates two weights, which will 
        hold the sum of squares and the total count of summed numbers.
        '''
        super().__init__(**kwarg)
        self.sum_of_squares = self.add_weight('sum_of_squares', initializer='zeros')
        self.sample_count = self.add_weight('sample_count', initializer='zeros')
    
    def update_state(self, y_true, y_pred, sample_weight=None):
        ''' Function will add to sum_of_squares and sample_counts every batch.'''
        self.sample_count.assign_add(tf.cast(tf.size(y_pred[-1, :]), tf.float32))
        self.sum_of_squares.assign_add(tf.reduce_sum(tf.math.square(y_true[-1, :] - y_pred[-1, :])))
    
    def result(self):
        '''Function will calculate the RMSE at the end of every epoch'''
        return tf.math.sqrt(self.sum_of_squares / self.sample_count)
                                    
    def reset_state(self):
        '''Function will reset all stateful variables to zero'''
        self.sample_count.assign(0)
        self.sum_of_squares.assign(0)

In [137]:
for i in commuter_train:
    print(tf.math.count_nonzero(i[1][:, -1, :]))
    print('new iteration')

tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=i

tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=i

tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=i

tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=i

tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=i

tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=i

tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=i

tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=i

tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=i

tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=i

tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=int64)
new iteration
tf.Tensor(14, shape=(), dtype=i

In [111]:
df['commuter'].head(44).values/1E5

array([0.443208  , 0.34981802, 0.80536795, 0.804724  , 0.802554  ,
       0.764166  , 0.777658  , 0.486704  , 0.363614  , 0.79706   ,
       0.7955    , 0.793966  , 0.344518  , 0.288142  , 0.35942   ,
       0.362026  , 0.80396396, 0.802948  , 0.79773396, 0.792576  ,
       0.382896  , 0.49272   , 0.361444  , 0.780898  , 0.78344   ,
       0.357708  , 0.737806  , 0.714888  , 0.49691802, 0.362318  ,
       0.77778   , 0.603158  , 0.61511487, 0.76327   , 0.74922603,
       0.4958    , 0.35984   , 0.74884   , 0.75616604, 0.740184  ,
       0.76503396, 0.677056  , 0.475842  , 0.360298  ], dtype=float32)

In [65]:
testing_data = tf.data.Dataset.from_tensor_slices(df['commuter'][1847: 2577] / 1E5)


def eval_seq2seq_model(model, data, forecast_length=1, seq_length=7, batch_size=1):
    data = data.window(forecast_length+seq_length, drop_remainder=True)
    data = data.flat_map(lambda x: x)
    data = data.batch(forecast_length+seq_length)
    #data = timeseries_window(data, forecast_length+seq_length)
    data = data.map(lambda x: (x[:seq_length], x[seq_length:]))

    data = data.batch(batch_size)
    # Predict and keep only last sequence of prediction
    prediction = tf.data.Dataset.from_tensor_slices(rnn_seq2seq.predict(data)[:, -1, :])

    prediction = prediction.batch(batch_size)
    data = tf.data.Dataset.zip(data, prediction)
    mse = np.zeros(14, dtype=np.float32)
    for i in data.as_numpy_iterator():
        sequences, target = i[0]
        pred = i[1]
        mse = np.sum(np.square(pred - target), axis=0) / batch_size
        mse += mse
    return np.sum(mse)
    #for i in data.as_numpy_iterator():
    #    result = rnn_seq2seq(i[0][:, :, np.newaxis])
    #    target = i[1][:, np.newaxis, :]
    #    print(result[0, -1, :] - target[0, :, :])
    #    print(np.sqrt(np.sum(np.square(result[:, -1, :] - target)) / batch_size / forecast_length))

mse = eval_seq2seq_model(rnn_seq2seq_training, testing_data, forecast_length=14, seq_length=30)

0.00041084472
5.5993965e-10
3.1179075e-05
0.0019652548
0.0007950299
0.0008659619
0.00012773777
0.00023350689
0.0016602537
0.00010652963
0.00034446447
2.7973712e-05
0.0005844618
0.0020749806
8.9359746e-05
0.0002938011


0.0005876022


In [66]:
mse

array([5.8760220e-04, 4.5989486e-04, 3.4316052e-03, 1.1808785e-02,
       2.2307457e-03, 4.8437752e-04, 1.2193810e-04, 8.2763209e-04,
       2.0214634e-06, 2.8416878e-04, 3.1612022e-03, 5.8806813e-03,
       1.3787013e-03, 1.1862561e-03], dtype=float32)

In [5]:
test_rnn = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(None, 1)),
    tf.keras.layers.LSTM(3, return_sequences=True)
])
test_rnn.compile(loss='mse', optimizer='adam')
x_training_data = np.random.rand(500, 1)
x_train = tf.data.Dataset.from_tensor_slices(x_training_data)
x_train = timeseries_dataset_seq2seq(x_train)
y_training_data = np.random.rand(500, 3)
y_train = tf.data.Dataset.from_tensor_slices(y_training_data)


test_rnn.fit(x=x_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7f83c8841f60>

In [47]:
test_rnn.predict(np.random.rand(1, 10, 1))
np.random.rand(1, 60, 1).shape



(1, 60, 1)

In [182]:
a = np.zeros(14, dtype=np.float32)

for i in range(22):
    b = np.random.rand(14)
    a += b
    print(b)
a

[0.67388077 0.18984745 0.61122116 0.31137044 0.50503867 0.18929037
 0.61636287 0.97836272 0.70692446 0.79188471 0.11336993 0.01475838
 0.83786145 0.50861334]
[0.81867838 0.79164872 0.11088158 0.29023907 0.57815831 0.0342671
 0.00434693 0.2741532  0.15625099 0.78368318 0.18649465 0.0013391
 0.85996041 0.29445972]
[0.01330934 0.24549398 0.08873156 0.71743076 0.21148537 0.72991601
 0.66889605 0.83391747 0.81458398 0.35312731 0.35237431 0.73738273
 0.96517253 0.52211691]
[0.08045153 0.92695746 0.30384688 0.22169256 0.85231981 0.36898274
 0.69632564 0.17785737 0.99774497 0.93419654 0.98740287 0.81108123
 0.90829649 0.51305405]
[0.23834562 0.90531048 0.00863958 0.08093868 0.22606262 0.85072973
 0.22908413 0.73069026 0.762691   0.32499139 0.65763599 0.64558172
 0.69650676 0.10202842]
[0.60028532 0.07613098 0.38184849 0.11085824 0.82751313 0.58460833
 0.13727838 0.87527711 0.51458954 0.46814201 0.28440379 0.12228945
 0.7698344  0.23207313]
[0.32205817 0.39806014 0.68102314 0.28397828 0.0140300

array([11.24642  , 13.268541 , 11.237595 ,  7.1986403,  8.0593405,
       10.96287  ,  9.327009 , 12.3163805, 11.230288 , 10.433632 ,
        8.914517 , 12.7504015, 11.564948 ,  8.877827 ], dtype=float32)

In [None]:
# This blocks evaluates all possible keys in the nested dictionary "wagon" in compositions of one day

properties_dict = dict()
for train in k.json():
    for journey in (train['journeySections']):
        for wagon in journey['wagons']:
            for i, prop in enumerate(wagon.keys()):
                try:
                    properties_dict[prop]
                except:
                    properties_dict[prop] = prop
print(properties_dict.keys())

In [None]:
r.json()

In [26]:
Session = sessionmaker(bind=engine)
session = Session()
session.add(bsp)
session.commit()

In [5]:
with open('test.txt', 'w') as w:
    w.write('haha')