In [None]:
from DataLoader import (
    config,
    loader
)

from copy import deepcopy

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import (
    TruncatedSVD,
    PCA
)
from sklearn.metrics import (
    mean_absolute_error,
    mean_squared_error
)

import pmdarima as pm
from statsmodels.tsa.arima.model import ARIMA

Read and transfrom

In [None]:
data_path = '../data/raw/month_csv_acceleration.csv'
df = pd.read_csv(data_path, skiprows=config.COUNT_SKIP, sep=';')

df = loader.fill_empty(loader.transform_header(df))
splitted = loader.split(df.columns[1:])
group = loader.group(splitted, df)

TKEO: $\psi [x(n)] = x^{2} (n) - x(n + 1)x(n - 1)$

In [None]:
def tkeo_operator(data, k = 1):
    """ 
    Teager-Kaiser Energy operator
    """
    npnts = len(data[0])
    nsignals = len(data)
    filt_data = deepcopy(data)
    for i in range(nsignals):
        for n in range(k, npnts-k):
            filt_data[i][n] = data[i][n]**2-data[i][n-1]*data[i][n+1]
    return filt_data

def normilize(signal: np.ndarray):
    """
    MinMaxScaler + Teager-Kaiser Operator + Scaler
    """
    # scalers = [MinMaxScaler, StandardScaler]
    scaler = MinMaxScaler(feature_range=(0, 1))
    signal = scaler.fit_transform(signal)
    print(f'norm1 max: {signal.max()}, min: {signal.min()}')
    signal = tkeo_operator(signal)
    print(f'tkeo max: {signal.max()}, min: {signal.min()}')
    signal = scaler.fit_transform(signal)
    print(f'norm2 max: {signal.max()}, min: {signal.min()}')
    return signal


In [None]:
# len(dict['Н'][key]) // 2 - number of signals, 
# [-len(dict['Н'][key]) // 2:] - select signals
sns.lineplot(normilize(group['Н']['ПЗН'][-1].reshape(-1, 1)))
plt.title('Normilized signal')

In [None]:
tmp = pd.concat([df['date'], pd.DataFrame((normilize(group['Н']['ПЗН'][-1].reshape(-1, 1))))], axis=1)
sns.lineplot(tmp.groupby(tmp.date.dt.floor('d'))[0].mean())
# Plot params
plt.xticks(rotation=90)
plt.legend(labels=['acceleration_day'])
plt.title('Compressed by day')
# Only 28 data points

In [None]:
expanded = pd.DataFrame(tmp.groupby(tmp.date.dt.floor('h'))[0].apply(list).tolist()).ffill(axis=1)
# expanded[expanded.isna().sum(axis=1) == expanded.shape[1] - 1]

Compare series in start, mid & end

In [None]:
plt.plot(expanded.iloc[0, :], label='start')
plt.plot(expanded.iloc[300, :], label='mid')
plt.plot(expanded.iloc[600, :], label='end')
plt.legend()

Compare mean and svd

In [None]:
expanded = expanded.ffill(axis=1)
expanded['compressed_tsvd'] = TruncatedSVD(n_components=1, algorithm='arpack', random_state=42).fit_transform(expanded)

In [None]:
plt.plot(tmp.groupby(tmp.date.dt.floor('h'))[0].mean().reset_index(drop=True), label='mean')
plt.plot(expanded['compressed_tsvd'], label='tsvd')
# Plot params
plt.xticks(rotation=90)
plt.legend()
plt.title('Compressed by hour')
plt.yscale('log')

TRAIN

Split data

In [None]:
def split_series(data, test_size):
    train_set, test_set = np.split(data, [int(test_size *len(data))])
    return train_set, test_set

In [None]:
compressed_signal = tmp.groupby(tmp.date.dt.floor('h'))[0].mean().values
y_train, y_test = split_series(compressed_signal, 0.8)

Fit model

In [None]:
model = pm.auto_arima(y_train, start_p=1, start_q=1, max_p=6, max_q=6,
                      start_P=0, 
                      trace=True,
                      n_jobs=4,  # depends on cpu
                      error_action='ignore',
                      suppress_warnings=True,
                      stepwise=False, random=True, random_state=42,
                      n_fits=100)

model.summary()

In [None]:
pred = model.predict(89)
print(f'MAE: {mean_absolute_error(y_test[:89], pred)}, RMSE: {mean_squared_error(y_test[:89], pred, squared=False)}')

In [None]:
sns.lineplot(y_test[:89])
sns.lineplot(pred)

Fit model. Use $F_{t-1}$

In [None]:
history = [y for y in y_train]
predictions = []

for i in range(1, len(y_test)):
  # print(i)
  model = ARIMA(history[-50:], order=(0, 2, 2)) # holt's smoothing
  model = model.fit()

  yhat = model.forecast()[0]
  predictions.append(yhat)

  obs = y_test[i]
  history.append(obs)

In [None]:
plt.plot(y_test, label='test')
plt.plot(predictions, label='pred')
plt.legend()

In [None]:
print(f'MAE: {mean_absolute_error(y_test[:-1], predictions)}, RMSE: {mean_squared_error(y_test[:-1], predictions, squared=False)}')