In [None]:
from IPython.core.display import display
import pandas as pd
from sklearn.linear_model import LinearRegression

from modules.datasets import add_bias_noise, make_time_series_data
from modules.performance import draw_figures

# Generatesample data.
n_samples = 1000
n_samples_train = 800
n_features = 10
y_bias = 5
y_noise = 0.1

x = make_time_series_data(
    n_samples=n_samples,
    n_features=n_features,
    random_state=0,
    start="2021-01-01 00:00:00",
)
y = pd.DataFrame(
    0.5 * x.iloc[:, 0] + 0.4 * x.iloc[:, 1] - 0.3 * x.iloc[:, 2], columns=["target"]
)
y = add_bias_noise(y, bias=y_bias, noise=y_noise, random_state=1)

display(x.head())
display(y.head())

# Split the data into train set and test set.
data = {"x": {}, "y": {}, "y_pred": {}}
data["x"]["train"] = x.iloc[:n_samples_train, :]
data["x"]["test"] = x.iloc[n_samples_train:, :]
data["y"]["train"] = y.iloc[:n_samples_train, :]
data["y"]["test"] = y.iloc[n_samples_train:, :]

# Training a model.
reg = LinearRegression()
reg.fit(data["x"]["train"], data["y"]["train"].values.reshape(-1))
display(
    pd.DataFrame([reg.coef_], index=["Coefficient"], columns=data["x"]["train"].columns)
)
display(
    pd.DataFrame(
        [reg.intercept_], index=["Intercept"], columns=data["y"]["train"].columns
    )
)

# Predict y from x with the created model.
for type_data in ["train", "test"]:
    data["y_pred"][type_data] = reg.predict(data["x"][type_data]).reshape(-1, 1)

# Visualize performance.
data_list = [
    {
        "name": "Training",
        "actual": data["y"]["train"].values,
        "estimated": data["y_pred"]["train"],
    },
    {
        "name": "Test",
        "actual": data["y"]["test"].values,
        "estimated": data["y_pred"]["test"],
    },
]
fig, axes = draw_figures(data_list, "Training - Test")
fig.savefig("./sample_fig.png")
