In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
from statsmodels.tsa.ar_model import AutoReg
from statsmodels.tsa.arima.model import ARIMA
from matplotlib import pyplot as plt
from sklearn.decomposition import PCA
from scipy.optimize import minimize
from matplotlib import cm

In [None]:
def exp_model(x, u):
    return np.exp(u[0] + u[1] * x) 

def opt_func(u, x, y):
    return np.sum(np.power(exp_model(x, u) - y, 2))

In [None]:
def generate_models(values, segment_size, init_values=[-2, 1]):
    model_params = []
    for segment in range(values.shape[0]):
        res = minimize(opt_func, np.array(init_values),
                       args=(np.linspace(0, segment_size - 1, segment_size), 
                             values[segment, :]),
                       method="nelder-mead")
        model_params.append(res.x)
        
    return model_params

In [None]:
def model_vis_plot(segment_size, model_func, model_params, values):
    fig, ax = plt.subplots(2,1, figsize=(15, 9))
    for seg in range(values.shape[0]):
        ax[0].plot(np.linspace(SEGMENT_SIZE * seg, SEGMENT_SIZE * (seg + 1) - 1, SEGMENT_SIZE ), model_func(np.linspace(0, SEGMENT_SIZE - 1, SEGMENT_SIZE) , model_params[seg]))

    ax[1].plot(values.reshape(values.size))
    ax[0].set_title("Models")
    ax[1].set_title("Ground Truth")
    plt.tight_layout()
    plt.show()

In [None]:
data = pd.read_csv("../data/coviddata.csv")
data

In [None]:
case_numbers = data.groupby(["Bundesland", "Meldedatum"]).sum()["AnzahlFall"].unstack(level=0).dropna()
case_numbers = case_numbers.rolling(7).mean()
first_day = pd.to_datetime(case_numbers.index[0])
case_numbers = case_numbers.dropna()
case_numbers.index = pd.to_datetime(case_numbers.index)
case_numbers = case_numbers.sort_index()
case_numbers

In [None]:
dates = pd.read_table("../data/massnahmensdaten_bw.txt", names=["Dates"])

In [None]:
dates = pd.to_datetime(dates["Dates"]) - first_day
dates = dates.apply(lambda x: x.days)

In [None]:
SEGMENT_SIZE = 4
data_size = case_numbers.shape[0]
n_segments = int(np.floor(data_size / SEGMENT_SIZE))
trim_data_size = SEGMENT_SIZE * n_segments
values = case_numbers["Berlin"].values[:trim_data_size].reshape((n_segments, SEGMENT_SIZE))

In [None]:
model_params = generate_models(values, SEGMENT_SIZE)

In [None]:
model_vis_plot(SEGMENT_SIZE, exp_model, model_params, values)

In [None]:
model_params = np.array(model_params)
param_df = pd.DataFrame(model_params)#, columns=["param1", "param2"])#, "param3"])
param_df.index.name = "Segment ID"
stds = param_df.std()
means = param_df.mean()
no_outliers = param_df[(np.abs(param_df[[i for i in range(2)]] - means) < stds).any(axis=1)]

In [None]:
fig, ax1 = plt.subplots(figsize=(16, 9))
ax2 = ax1.twinx()
ax1.plot(no_outliers.index * SEGMENT_SIZE, no_outliers[1].rolling(4).mean(), color="tab:orange")
ax1.scatter(no_outliers[1].index * SEGMENT_SIZE, no_outliers[1], marker="x", s=10, color="tab:orange")
ax1.set_xlabel("segment")
ax1.set_ylabel("param0")
#ax2.plot(np.mean(values[(np.abs(param_df[[i for i in range(2)]] - means) < stds).any(axis=1)], axis=1))
#ax2.set_ylabel("mean cases")
ax2.plot(values.reshape(values.size))
ax1.axhline(0, color="black", ls="-")
#ax1.vlines(dates, ymin=-1, ymax=1)
plt.show()

In [None]:
fig, ax1 = plt.subplots(figsize=(16, 9))
ax2 = ax1.twinx()
ax1.plot(no_outliers.index * SEGMENT_SIZE, no_outliers[0].rolling(3).mean(), color="tab:orange")
ax1.scatter(no_outliers[0].index * SEGMENT_SIZE, no_outliers[0], marker="x", s=10, color="tab:orange")
ax1.set_xlabel("segment")
ax1.set_ylabel("param1")
ax2.plot(values.reshape(values.size))
plt.show()