In [624]:
import pandas as pd
import numpy as np
from scipy.special import factorial
import torch as pt
import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.arima_model import ARIMA
import ujson as js
import mysql.connector as mdb

pt.set_default_tensor_type('torch.FloatTensor')
#pt.set_default_tensor_type('torch.cuda.FloatTensor')
pd.set_option('display.max_rows', 2000)

In [625]:
with open('conn_params.json') as file:
    conn_params = js.load(file)

In [626]:
conn = mdb.connect(**conn_params)

In [627]:
sql_code = """
SELECT MarketRecord.record_date,
	MarketRecord.type_id,
	Types.type_name,
	MarketRecord.region_id,
	MapRegions.region_name,
	MarketRecord.order_count,
	MarketRecord.volume,
	MarketRecord.low_price,
	MarketRecord.avg_price,
	MarketRecord.high_price
FROM MarketRecord
JOIN Types
	ON MarketRecord.type_id = Types.type_id
JOIN MapRegions
	ON MarketRecord.region_id = MapRegions.region_id
WHERE MarketRecord.type_id IN (%s)
	AND MarketRecord.region_id IN (%s)
;"""

In [628]:
dataset = pd.read_sql(
    sql_code % (
        ','.join([str(x) for x in [34,35,36,37,38,39,40]]),
        10000002
    ),
    conn
)
conn.close()
dataset['record_date'] = pd.to_datetime(dataset['record_date'])

In [629]:
group_cols = ['type_name']
idx_col = 'record_date'
data_cols = ['volume', 'avg_price']

root_data = pd.DataFrame(
    index = pd.date_range(
        dataset[idx_col].min(),
        dataset[idx_col].max()
    )
)
working_data = root_data.copy()

In [630]:
for group, data in dataset.groupby(group_cols):
    group = group if type(group) is tuple else (group,)
    for data_col in data_cols:
        data_item = root_data.join(
            data.set_index(idx_col)[data_col],
            how = 'left'
        )
        working_data.insert(
            len(working_data.columns),
            '%s|%s' % (
                '_'.join([str(x) for x in group]),
                data_col
            ),
            data_item
        )

In [631]:
working_data[[
    col for col in working_data.columns if col.endswith('|avg_price')
]] = working_data[[
    col for col in working_data.columns if col.endswith('|avg_price')
]].fillna(method = 'ffill').fillna(method = 'bfill')
working_data[[
    col for col in working_data.columns if col.endswith('|volume')
]] = working_data[[
    col for col in working_data.columns if col.endswith('|volume')
]].fillna(0)

In [632]:
data_tensor = pt.from_numpy(working_data[[col for col in working_data.columns if col.endswith('|avg_price')]].values).type(pt.Tensor)

In [651]:
derivs = 2

In [652]:
coefs = pt.zeros(derivs+1)

In [653]:
state = [pt.stack([data_tensor[0,:], *([pt.zeros(data_tensor.shape[1])] * (derivs))], dim=0)]
state_actual = [pt.stack([data_tensor[0,:], *([pt.zeros(data_tensor.shape[1])] * (derivs))], dim=0)]
updater = np.stack([1 / factorial(i) for i in range(derivs+1)])
updater = pt.from_numpy(np.stack([
    updater,
    *[
        np.pad(updater[:-j], (j,0), 'constant', constant_values = 0)
        for j in range(1,derivs+1)
    ]
], axis = 0)).type(pt.Tensor)

coefs.requires_grad = True
coefs_sig = pt.sigmoid(coefs)

In [654]:
for new_vals in data_tensor[1:,:]:
    predict_state = updater @ state[-1]
    new_state = [(coefs_sig[0] * new_vals) + ((1-coefs_sig[0]) * predict_state[0,:])]
    for d in range(1,derivs+1):
        new_state.append((
                coefs_sig[d] * (new_state[-1] - state[-1][d-1,:])
            ) + (
                (1-coefs_sig[d]) * predict_state[d,:]
        ))
    new_state = pt.stack(new_state, dim=0)
    state.append(new_state)
state = pt.stack(state, dim=0)

In [669]:
error

tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [2.0213e-06, 1.1797e-02, 4.6587e-07,  ..., 2.4682e-06, 6.2760e-06,
         3.2342e-05],
        [1.0286e-04, 3.0239e-04, 2.9788e-05,  ..., 9.3546e-06, 1.4369e-06,
         8.4897e-04],
        ...,
        [1.8163e-04, 2.3261e-04, 2.3374e-04,  ..., 4.7611e-06, 1.7411e-05,
         2.8250e-06],
        [1.1644e-05, 1.6144e-04, 7.5846e-05,  ..., 3.3312e-05, 4.0938e-04,
         1.5098e-06],
        [6.4968e-06, 1.0625e-04, 2.3361e-04,  ..., 2.6374e-04, 6.0002e-05,
         6.0084e-05]], grad_fn=<DivBackward0>)