In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
from statsmodels.tsa.ar_model import AutoReg
from statsmodels.tsa.arima.model import ARIMA
from matplotlib import pyplot as plt
from sklearn.decomposition import PCA

In [None]:
data = pd.read_csv("../data/coviddata.csv")
data

In [None]:
case_numbers = data.groupby(["Bundesland", "Meldedatum"]).sum()["AnzahlFall"].unstack(level=0).fillna(0)
case_numbers

In [None]:
SEGMENT_SIZE = 14
data_size = case_numbers.shape[0]
n_segments = int(np.floor(data_size / SEGMENT_SIZE))
trim_data_size = SEGMENT_SIZE * n_segments
values = case_numbers["Baden-Württemberg"].values[:trim_data_size].reshape((SEGMENT_SIZE, n_segments))

In [None]:
#test_model = AutoReg(values[:,1],
#                     lags=4, trend="n",
#                     old_names=False).fit()
test_model = ARIMA(values[:,1],
                   order=(2, 0, 0), trend="n").fit()
pred = test_model.predict(start=0, end=SEGMENT_SIZE)
plt.plot(pred, label="prediction")
plt.plot(values[:, 0], label="reality")
plt.legend()

In [None]:
test_model.params

In [None]:
model_params = []
for segment in range(values.shape[1]):
    model = ARIMA(values[:,segment],
                  order=(2, 0, 0), 
                  trend="n")
    fitted_model = model.fit()
    model_params.append(fitted_model.params)

# Organisation and PCA
N_BINS = 3
model_params = np.array(model_params)
param_df = pd.DataFrame(model_params) #, columns=["intercept", "param1", "param2", "param3", "param4"]
param_df.index.name = "Segment ID"
pca = PCA(n_components=3)
transformed_params = pd.DataFrame(pca.fit_transform(param_df))
transformed_params["cases"] = pd.cut(np.sum(values, axis=0), N_BINS)

param_df["cases"] = pd.cut(np.sum(values, axis=0), N_BINS)

In [None]:
sns.pairplot(param_df, hue="cases")

In [None]:
sns.pairplot(transformed_params, hue="cases")