Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Code for CNN-TS method #10

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added CNN-TS/M4-Method-Description-CNN-TS.pdf
Binary file not shown.
163 changes: 163 additions & 0 deletions CNN-TS/environment.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
absl-py==0.2.0
alabaster==0.7.10
anaconda-client==1.6.3
anaconda-navigator==1.6.2
anaconda-project==0.6.0
asn1crypto==0.22.0
astor==0.6.2
astroid==1.4.9
astropy==1.3.2
Babel==2.4.0
backports.shutil-get-terminal-size==1.0.0
beautifulsoup4==4.6.0
bitarray==0.8.1
blaze==0.10.1
bleach==1.5.0
bokeh==0.12.5
boto==2.46.1
Bottleneck==1.2.1
cffi==1.10.0
chardet==3.0.3
click==6.7
cloudpickle==0.2.2
clyent==1.2.2
colorama==0.3.9
comtypes==1.1.2
contextlib2==0.5.5
cryptography==1.8.1
cycler==0.10.0
Cython==0.25.2
cytoolz==0.8.2
dask==0.14.3
datashape==0.5.4
decorator==4.0.11
distributed==1.16.3
docutils==0.13.1
entrypoints==0.2.2
et-xmlfile==1.0.1
fastcache==1.0.2
Flask==0.12.2
Flask-Cors==3.0.2
gast==0.2.0
gevent==1.2.1
greenlet==0.4.12
grpcio==1.11.0
h5py==2.7.0
HeapDict==1.0.0
html5lib==0.9999999
idna==2.5
imagesize==0.7.1
ipykernel==4.6.1
ipython==5.3.0
ipython-genutils==0.2.0
ipywidgets==6.0.0
isort==4.2.5
itsdangerous==0.24
jdcal==1.3
jedi==0.10.2
Jinja2==2.9.6
jsonschema==2.6.0
jupyter==1.0.0
jupyter-client==5.0.1
jupyter-console==5.1.0
jupyter-core==4.3.0
Keras==2.1.6
lazy-object-proxy==1.2.2
lightgbm==2.1.1
llvmlite==0.18.0
locket==0.2.0
lxml==3.7.3
Markdown==2.6.11
MarkupSafe==0.23
matplotlib==2.0.2
menuinst==1.4.7
mistune==0.7.4
mpmath==0.19
msgpack-python==0.4.8
multipledispatch==0.4.9
navigator-updater==0.1.0
nbconvert==5.1.1
nbformat==4.3.0
networkx==1.11
nltk==3.2.3
nose==1.3.7
notebook==5.0.0
numba==0.33.0
numexpr==2.6.2
numpy==1.14.3
numpydoc==0.6.0
odo==0.5.0
olefile==0.44
openpyxl==2.4.7
packaging==16.8
pandas==0.20.1
pandocfilters==1.4.1
partd==0.3.8
path.py==10.3.1
pathlib2==2.2.1
patsy==0.4.1
pep8==1.7.0
pickleshare==0.7.4
Pillow==4.1.1
ply==3.10
prompt-toolkit==1.0.14
protobuf==3.5.2.post1
psutil==5.2.2
py==1.4.33
pycosat==0.6.2
pycparser==2.17
pycrypto==2.6.1
pycurl==7.43.0
pyflakes==1.5.0
Pygments==2.2.0
pylint==1.6.4
pyodbc==4.0.16
pyOpenSSL==17.0.0
pyparsing==2.1.4
pytest==3.0.7
python-dateutil==2.6.0
pytz==2017.2
PyWavelets==0.5.2
pywin32==220
PyYAML==3.12
pyzmq==16.0.2
QtAwesome==0.4.4
qtconsole==4.3.0
QtPy==1.2.1
requests==2.14.2
rope-py3k==0.9.4.post1
scikit-image==0.13.0
scikit-learn==0.18.1
scipy==0.19.0
seaborn==0.7.1
simplegeneric==0.8.1
singledispatch==3.4.0.3
six==1.11.0
snowballstemmer==1.2.1
sortedcollections==0.5.3
sortedcontainers==1.5.7
sphinx==1.5.6
spyder==3.1.4
SQLAlchemy==1.1.9
statsmodels==0.8.0
sympy==1.0
tables==3.2.2
tblib==1.3.2
tensorboard==1.8.0
tensorflow==1.8.0
termcolor==1.1.0
testpath==0.3
toolz==0.8.2
tornado==4.5.1
traitlets==4.3.2
unicodecsv==0.14.1
wcwidth==0.1.7
Werkzeug==0.14.1
widgetsnbextension==2.0.0
win-unicode-console==0.5
wrapt==1.10.10
xlrd==1.0.0
XlsxWriter==0.9.6
xlwings==0.10.4
xlwt==1.2.0
zict==0.1.2
38 changes: 38 additions & 0 deletions CNN-TS/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""Evaluation metrics, taken from the competition Github repo
https://github.com/M4Competition/M4-methods
The factor of 2 in SMAPE has been changed to 200 to be consistent with the R code.
"""
import numpy as np



def smape(a, b):
"""
Calculates sMAPE

:param a: actual values
:param b: predicted values
:return: sMAPE
"""
a = np.reshape(a, (-1,))
b = np.reshape(b, (-1,))
return np.mean(200.0 * np.abs(a - b) / (np.abs(a) + np.abs(b))).item() # BT changed 2 to 200


def mase(insample, y_test, y_hat_test, freq):
"""
Calculates MAsE

:param insample: insample data
:param y_test: out of sample target values
:param y_hat_test: predicted values
:param freq: data frequency
:return:
"""
y_hat_naive = []
for i in range(freq, len(insample)):
y_hat_naive.append(insample[(i - freq)])

masep = np.mean(abs(insample[freq:] - y_hat_naive))

return np.mean(abs(y_test - y_hat_test)) / masep
179 changes: 179 additions & 0 deletions CNN-TS/model_definitions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@

import keras as ks
import pandas as pd
import numpy as np
import metrics
from collections import namedtuple
import tensorflow as tf
import datetime as dt
import os
from utilities import *


def yearly_model(series_length):
input = ks.layers.Input((series_length,))
yearly_input = ks.layers.Reshape((series_length, 1))(input)
yearly_hidden1 = ks.layers.Dense(units=50, activation='relu')(ks.layers.Flatten()(yearly_input))
yearly_hidden2 = ks.layers.Dense(units=50, activation='relu')(yearly_hidden1)
yearly_output = ks.layers.Dense(units=1, activation='linear')(yearly_hidden2)
est = ks.Model(inputs=input, outputs=yearly_output)
est.compile(optimizer=ks.optimizers.Adam(lr=0.001), loss='mse', metrics=['mse'])
epochs = 250
batch_size = 1000
return est, epochs, batch_size


def quarterly_model(series_length):
input = ks.layers.Input((series_length,))
yearly_input = ks.layers.Reshape((series_length, 1))(input)
yearly_avg = ks.layers.AveragePooling1D(pool_size=4, strides=4, padding='valid')(yearly_input)
yearly_hidden1 = ks.layers.Dense(units=50, activation='relu')(ks.layers.Flatten()(yearly_avg))
yearly_hidden2 = ks.layers.Dense(units=50, activation='relu')(yearly_hidden1)
yearly_output = ks.layers.Dense(units=1, activation='linear')(yearly_hidden2)

yearly_avg_up = ks.layers.UpSampling1D(size=4)(yearly_avg)
periodic_diff = ks.layers.Subtract()([input, ks.layers.Flatten()(yearly_avg_up)])
periodic_input = ks.layers.Reshape((series_length // 4, 4, 1))(periodic_diff)
periodic_conv = ks.layers.Conv2D(filters=3, kernel_size=(1, 4), strides=(1, 4),
padding='valid')(periodic_input)
periodic_hidden1 = ks.layers.Dense(units=50, activation='relu')(ks.layers.Flatten()(periodic_conv))
periodic_hidden2 = ks.layers.Dense(units=50, activation='relu')(periodic_hidden1)
periodic_output = ks.layers.Dense(units=1, activation='linear')(periodic_hidden2)
output = ks.layers.Add()([yearly_output, periodic_output])
est = ks.Model(inputs=input, outputs=output)
est.compile(optimizer=ks.optimizers.Adam(lr=0.001), loss='mse', metrics=['mse'])
epochs = 250
batch_size = 1000
return est, epochs, batch_size


def monthly_model(series_length):
input = ks.layers.Input((series_length,))
yearly_input = ks.layers.Reshape((series_length, 1))(input)
yearly_avg = ks.layers.AveragePooling1D(pool_size=12, strides=12, padding='valid')(yearly_input)
yearly_hidden1 = ks.layers.Dense(units=20, activation='relu')(ks.layers.Flatten()(yearly_avg))
yearly_hidden2 = ks.layers.Dense(units=20, activation='relu')(yearly_hidden1)
yearly_output = ks.layers.Dense(units=1, activation='linear')(yearly_hidden2)

yearly_avg_up = ks.layers.UpSampling1D(size=12)(yearly_avg)
periodic_diff = ks.layers.Subtract()([input, ks.layers.Flatten()(yearly_avg_up)])
periodic_input = ks.layers.Reshape((series_length // 12, 12, 1))(periodic_diff)
periodic_conv = ks.layers.Conv2D(filters=3, kernel_size=(1, 12), strides=(1, 12),
padding='valid')(periodic_input)
periodic_hidden1 = ks.layers.Dense(units=20, activation='relu')(ks.layers.Flatten()(periodic_conv))
periodic_hidden2 = ks.layers.Dense(units=20, activation='relu')(periodic_hidden1)
periodic_output = ks.layers.Dense(units=1, activation='linear')(periodic_hidden2)
output = ks.layers.Add()([yearly_output, periodic_output])
est = ks.Model(inputs=input, outputs=output)
est.compile(optimizer=ks.optimizers.Adam(lr=0.001), loss='mse', metrics=['mse'])
epochs = 250
batch_size = 1000
return est, epochs, batch_size


def weekly_model(series_length):
if series_length == 52:
input = ks.layers.Input((series_length,))
hidden1 = ks.layers.Dense(units=20, activation='relu')(input)
hidden2 = ks.layers.Dense(units=20, activation='relu')(hidden1)
output = ks.layers.Dense(units=1, activation='linear')(hidden2)
est = ks.Model(inputs=input, outputs=output)
est.compile(optimizer=ks.optimizers.Adam(lr=0.001), loss='mse', metrics=['mse'])
epochs = 250
batch_size = 1000
else:
input = ks.layers.Input((series_length,))
yearly_input = ks.layers.Reshape((series_length, 1))(input)
yearly_avg = ks.layers.AveragePooling1D(pool_size=52, strides=52, padding='valid')(yearly_input)
yearly_hidden1 = ks.layers.Dense(units=20, activation='relu')(ks.layers.Flatten()(yearly_avg))
yearly_hidden2 = ks.layers.Dense(units=20, activation='relu')(yearly_hidden1)
yearly_output = ks.layers.Dense(units=1, activation='linear')(yearly_hidden2)

yearly_avg_up = ks.layers.UpSampling1D(size=52)(yearly_avg)
periodic_diff = ks.layers.Subtract()([input, ks.layers.Flatten()(yearly_avg_up)])
periodic_input = ks.layers.Reshape((series_length // 52, 52, 1))(periodic_diff)
periodic_conv = ks.layers.Conv2D(filters=3, kernel_size=(1, 52), strides=(1, 52),
padding='valid')(periodic_input)
periodic_hidden1 = ks.layers.Dense(units=20, activation='relu')(ks.layers.Flatten()(periodic_conv))
periodic_hidden2 = ks.layers.Dense(units=20, activation='relu')(periodic_hidden1)
periodic_output = ks.layers.Dense(units=1, activation='linear')(periodic_hidden2)
output = ks.layers.Add()([yearly_output, periodic_output])
est = ks.Model(inputs=input, outputs=output)
est.compile(optimizer=ks.optimizers.Adam(lr=0.001), loss='mse', metrics=['mse'])
epochs = 250
batch_size = 1000
return est, epochs, batch_size


def daily_model(series_length):
input = ks.layers.Input((series_length,))
weekly_input = ks.layers.Reshape((series_length, 1))(input)
weekly_avg = ks.layers.AveragePooling1D(pool_size=7, strides=7, padding='valid')(weekly_input)
weekly_hidden1 = ks.layers.Dense(units=20, activation='relu')(ks.layers.Flatten()(weekly_avg))
weekly_hidden2 = ks.layers.Dense(units=20, activation='relu')(weekly_hidden1)
weekly_output = ks.layers.Dense(units=1, activation='linear')(weekly_hidden2)

weekly_avg_up = ks.layers.UpSampling1D(size=7)(weekly_avg)
periodic_diff = ks.layers.Subtract()([input, ks.layers.Flatten()(weekly_avg_up)])
periodic_input = ks.layers.Reshape((series_length // 7, 7, 1))(periodic_diff)
periodic_conv = ks.layers.Conv2D(filters=3, kernel_size=(1, 7), strides=(1, 7),
padding='valid')(periodic_input)
periodic_hidden1 = ks.layers.Dense(units=20, activation='relu')(ks.layers.Flatten()(periodic_conv))
periodic_hidden2 = ks.layers.Dense(units=20, activation='relu')(periodic_hidden1)
periodic_output = ks.layers.Dense(units=1, activation='linear')(periodic_hidden2)
output = ks.layers.Add()([weekly_output, periodic_output])
est = ks.Model(inputs=input, outputs=output)
est.compile(optimizer=ks.optimizers.Adam(lr=0.001), loss='mse', metrics=['mse'])
epochs = 250
batch_size = 1000
return est, epochs, batch_size


def hourly_model(series_length):
input = ks.layers.Input((series_length,))
weekly_input = ks.layers.Reshape((series_length, 1))(input)
weekly_avg = ks.layers.AveragePooling1D(pool_size=168, strides=168, padding='valid')(weekly_input)
weekly_hidden1 = ks.layers.Dense(units=20, activation='relu')(ks.layers.Flatten()(weekly_avg))
weekly_hidden2 = ks.layers.Dense(units=20, activation='relu')(weekly_hidden1)
weekly_output = ks.layers.Dense(units=1, activation='linear')(weekly_hidden2)
weekly_avg_up = ks.layers.UpSampling1D(size=168)(weekly_avg)

daily_diff = ks.layers.Subtract()([input, ks.layers.Flatten()(weekly_avg_up)])
daily_diff_input = ks.layers.Reshape((series_length // 7, 7, 1))(daily_diff)
daily_diff_conv = ks.layers.Conv2D(filters=3, kernel_size=(1, 7), strides=(1, 7),
padding='valid')(daily_diff_input)
daily_diff_hidden1 = ks.layers.Dense(units=20, activation='relu')(ks.layers.Flatten()(daily_diff_conv))
daily_diff_hidden2 = ks.layers.Dense(units=20, activation='relu')(daily_diff_hidden1)
daily_diff_output = ks.layers.Dense(units=1, activation='linear')(daily_diff_hidden2)

daily_avg = ks.layers.AveragePooling1D(pool_size=24, strides=24, padding='valid')(weekly_input)
daily_avg_up = ks.layers.UpSampling1D(size=24)(daily_avg)
hourly_diff = ks.layers.Subtract()([input, ks.layers.Flatten()(daily_avg_up)])
hourly_diff_input = ks.layers.Reshape((series_length // 24, 24, 1))(hourly_diff)
hourly_diff_conv = ks.layers.Conv2D(filters=3, kernel_size=(1, 24), strides=(1, 24),
padding='valid')(hourly_diff_input)
hourly_diff_hidden1 = ks.layers.Dense(units=20, activation='relu')(ks.layers.Flatten()(hourly_diff_conv))
hourly_diff_hidden2 = ks.layers.Dense(units=20, activation='relu')(hourly_diff_hidden1)
hourly_diff_output = ks.layers.Dense(units=1, activation='linear')(hourly_diff_hidden2)

output = ks.layers.Add()([weekly_output, daily_diff_output, hourly_diff_output])

est = ks.Model(inputs=input, outputs=output)
est.compile(optimizer=ks.optimizers.Adam(lr=0.001), loss='mse', metrics=['mse'])
epochs = 250
batch_size = 1000
return est, epochs, batch_size


def get_model_params():
# define the model parameters
Model = namedtuple('Model', ['freq_name', 'horizon', 'freq', 'model_constructor', 'training_lengths',
'cycle_length', 'augment'])
yearly = Model('Yearly', 6, 1, yearly_model, [10, 20, 30], 1, False)
quarterly = Model('Quarterly', 8, 4, quarterly_model, [20, 48, 100], 4, False)
monthly = Model('Monthly', 18, 12, monthly_model, [48, 120, 240], 12, False)
weekly = Model('Weekly', 13, 1, weekly_model, [52, 520, 1040], 52, True)
daily = Model('Daily', 14, 1, daily_model, [98], 7, True)
hourly = Model('Hourly', 48, 24, hourly_model, [672], 7*24, True)
return [yearly, quarterly, monthly, weekly, daily, hourly]

Loading