#### Proof of concept

Идея:

* Создадим разные секретные модели. Даже не обязательно, чтобы они показывали высокую точность.
* Создадим одну модель для остатков.
* Убедимся в том, что локлаьные модели для остатков повышают скор соответсвующих секретных моделей.
* Убедимся в том, что модель для остатоков получает буст от федеративного обучения.



In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import Image
from IPython.display import Markdown as md
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import datetime
from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt

import warnings

warnings.filterwarnings("ignore")


df = pd.read_csv("Datasets/all_stocks_5yr.csv")
df["date"] = pd.to_datetime(df["date"])

# https://www.kaggle.com/code/avikumart/timeseries-stock-price-analysis-forecasting/input
df = df[df.Name == "AAPL"]

In [16]:
y = df.close

In [42]:
def return_metric(y_true, y_pred):
    assert len(y_true) == len(y_pred)
    c = len(y_true)-1
    cur = y_true[:c]
    next = y_true.shift(-1)[:c]
    # return y_pred[1:].values-cur.values
    # return (next-cur)
    return sum((next-cur)*np.sign(y_pred[1:].values-cur.values))

In [2]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.date, y=df.close, name="close"))
# fig.update_layout(    xaxis_title="№ month")

In [3]:
d = datetime.date(2015, 9, 27)
test_finish = datetime.date(2017, 3, 27)
df_train = df[df.date.dt.date < d]
df_test = df[(df.date.dt.date >= d) & (df.date.dt.date < test_finish)]

In [4]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_train.date, y=df_train.close, name="Train"))
fig.add_trace(go.Scatter(x=df_test.date, y=df_test.close, name="Test"))

In [5]:
df["next"] = df.close.shift(-1)
df["prev"] = df.close.shift(1)

In [6]:
from sklearn.metrics import mean_absolute_error as mae

### Prev day

In [45]:
(mae(df.loc[df_test.index].close, df.loc[df_test.index].prev),
 return_metric(df.loc[df_test.index].close, df.loc[df_test.index].prev))

(1.0673138297872335, 0.0)

In [8]:
df["ema_0.05"] = df.close.ewm(alpha=0.05).mean()
df["ema_0.1"] = df.close.ewm(alpha=0.1).mean()

In [46]:
(
    mae(df.loc[df_test.index].close, df.loc[df_test.index]["ema_0.05"]),
    return_metric(df.loc[df_test.index].close, df.loc[df_test.index]["ema_0.05"]),
)

(4.43656863979577, -39.499999999999915)

In [47]:
(
    mae(df.loc[df_test.index].close, df.loc[df_test.index]["ema_0.1"]),
    return_metric(df.loc[df_test.index].close, df.loc[df_test.index]["ema_0.1"]),
)

(2.9081286494513785, -46.239999999999895)

In [11]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_train.date, y=df_train.close, name="Train"))
fig.add_trace(go.Scatter(x=df_test.date, y=df_test.close, name="Test"))
fig.add_trace(go.Scatter(x=df_test.date, y=df.loc[df_test.index].prev, name="prev day"))
fig.add_trace(
    go.Scatter(x=df_test.date, y=df.loc[df_test.index]["ema_0.1"], name="ema 0.1")
)

### Holt

In [12]:
def seq_fit_predict():
    pred = []
    for i in range(len(df_train), len(df_train)+len(df_test)):
        fit3 = Holt(df.close.iloc[:i], damped_trend=True, initialization_method="estimated").fit(
            smoothing_level=0.8, smoothing_trend=0.2)
        fcast3 = fit3.forecast(1)
        pred.append(fcast3.iloc[-1])
    return pred


holt_pred = seq_fit_predict()
df_test['holt'] = holt_pred

In [48]:
(
    mae(df_test.close, df_test.holt),
    return_metric(df_test.close, df_test.holt)
)

(1.078452891452233, 11.379999999999939)

In [14]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_train.date, y=df_train.close, name="Train"))
fig.add_trace(go.Scatter(x=df_test.date, y=df_test.close, name="Test"))
fig.add_trace(go.Scatter(x=df_test.date,
              y=df.loc[df_test.index].prev, name="prev day"))
fig.add_trace(go.Scatter(x=df_test.date,
              y=df.loc[df_test.index]["ema_0.1"], name="ema 0.1"))
fig.add_trace(go.Scatter(x=df_test.date,
              y=df_test.holt, name="Holt"))