In [1]:
import numpy as np
import pandas as pd

def load_timeseries(filename, params):
    """Load time series dataset"""

    series = pd.read_csv(r"/Users/shama/Desktop/flipr/ts.csv", sep=',', header=0, index_col=0, squeeze=True)
    data = series.values

    adjusted_window = params['window_size']+ 1

    # Split data into windows
    raw = []
    for index in range(len(data) - adjusted_window):
        raw.append(data[index: index + adjusted_window])

    # Normalize data
    result = normalize_windows(raw)

    raw = np.array(raw)
    result = np.array(result)

    # Split the input dataset into train and test
    split_ratio = round(params['train_test_split'] * result.shape[0])
    train = result[:int(split_ratio), :]
    np.random.shuffle(train)

    # x_train and y_train, for training
    x_train = train[:, :-1]
    y_train = train[:, -1]

    # x_test and y_test, for testing
    x_test = result[int(split_ratio):, :-1]
    y_test = result[int(split_ratio):, -1]

    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))  

    x_test_raw = raw[int(split_ratio):, :-1]
    y_test_raw = raw[int(split_ratio):, -1]

    # Last window, for next time stamp prediction
    last_raw = [data[-params['window_size']:]]
    last = normalize_windows(last_raw)
    last = np.array(last)
    last = np.reshape(last, (last.shape[0], last.shape[1], 1))

    return [x_train, y_train, x_test, y_test, x_test_raw, y_test_raw, last_raw, last]

def normalize_windows(window_data):
    """Normalize data"""

    normalized_data = []
    for window in window_data:
        normalized_window = [((float(p) / float(window[0])) - 1) for p in window]
        normalized_data.append(normalized_window)
    return normalized_data

In [2]:
import numpy as np
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential

def rnn_lstm(layers, params):
    """Build RNN (LSTM) model on top of Keras and Tensorflow"""

    model = Sequential()
    model.add(LSTM(input_shape=(layers[1], layers[0]), output_dim=layers[1], return_sequences=True))
    model.add(Dropout(params['dropout_keep_prob']))
    model.add(LSTM(layers[2], return_sequences=False))
    model.add(Dropout(params['dropout_keep_prob']))
    model.add(Dense(output_dim=layers[3]))
    model.add(Activation("tanh"))

    model.compile(loss="mean_squared_error", optimizer="rmsprop")
    return model

def predict_next_timestamp(model, history):
    """Predict the next time stamp given a sequence of history data"""

    prediction = model.predict(history)
    prediction = np.reshape(prediction, (prediction.size,))
    return prediction 

Using TensorFlow backend.


In [4]:
import sys
import json
from deeppavlov import configs, build_model
import data_helpers
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def train_predict():
    """Train and predict time series data"""

    # Load command line arguments 
    train_file = sys.argv[1]
    parameter_file = sys.argv[2]

    # Load training parameters
    params = json.loads(open(parameter_file).read())

    # Load time series dataset, and split it into train and test
    x_train, y_train, x_test, y_test, x_test_raw, y_test_raw,\
        last_window_raw, last_window = data_helpers.load_timeseries(train_file, params)

    # Build RNN (LSTM) model
    lstm_layer = [1, params['window_size'], params['hidden_unit'], 1]
    model = build_model.rnn_lstm(lstm_layer, params)

    # Train RNN (LSTM) model with train set
    model.fit(
        x_train,
        y_train,
        batch_size=params['batch_size'],
        epochs=params['epochs'],
        validation_split=params['validation_split'])

    # Check the model against test set
    predicted = build_model.predict_next_timestamp(model, x_test)        
    predicted_raw = []
    for i in range(len(x_test_raw)):
        predicted_raw.append((predicted[i] + 1) * x_test_raw[i][0])

    # Plot graph: predicted VS actual
    plt.subplot(111)
    plt.plot(predicted_raw, label='Actual')
    plt.plot(y_test_raw, label='Predicted')	
    plt.legend()
    plt.show()

    # Predict next time stamp 
    next_timestamp = build_model.predict_next_timestamp(model, last_window)
    next_timestamp_raw = (next_timestamp[0] + 1) * last_window_raw[0][0]
    print('The next time stamp forecasting is: {}'.format(next_timestamp_raw))

if __name__ == '__main__':
    # python3 train_predict.py ./data/sales.csv ./training_config.json_
    train_predict()

ModuleNotFoundError: No module named 'data_helpers'

In [None]:
pip install git+git://github.com/qevo/py_data_helper.git

In [None]:
pipenv install --dev