# PO法
$ K = P^f H^T (H P^f H^T + R)^{-1} $に対して

$ P^a =(I - KH) P^f(I - KH)^T + K R K^T = P^f (I - KH) P^f$ となるように

$X_a = X_f + K(y^o + \epsilon - H X_f)$と観測に摂動を与えて更新.



In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

# モジュールの読み込み
import sys
sys.path.append('./')
from utils import make_lorenz96, rk4, error_series_kf, plot_error_KF, estimate_error_with_params

from kalman_filters import EnsembleKalmanFilter as ExKF

In [None]:
# Lorenz96の設定
J = 40
F = 8
lorenz = make_lorenz96(F)

# 同化step
# 時間発展は0.01ごとに行う
dt = 0.05

# モデルの遷移関数(非線形)
# 0.01ずつ時間発展させる
# dtは同化step
def M(x, dt):
    for i in range(int(dt/0.01)):
        x = rk4(0, x, 0.01, lorenz)
    return x

# 単位行列
I = np.identity(J)

# 観測
H = I

# モデル誤差共分散, 最初は完全モデルを仮定
Q = np.zeros((J, J))

# 観測誤差共分散, 後で定数倍の変化をさせる.
R = I

# 観測値と真値
end_step = 500 # 開発用
y = np.load('data/obs_atr.npy')
true = np.load('data/true_atr.npy')

# KFの初期値
np.random.seed(1)
x_0 = true[np.random.randint(len(true)-1)]
P_0 = 25*I


localization なし
inflation なし


In [None]:
# 実行済
# po = EnKF(M, H, Q, R, y, x_0, P_0, dim_x=J, dim_y=J, N=40, alpha=1, localization=False)
# po.forward_estimation()
# np.save('data/po/po.npy', po.x)

In [None]:
po_result = np.load('data/po/po.npy')
plot_error_KF(true[1:], y[1:], [po_result[1:]])

localization あり
inflation なし


In [None]:
# 実行済
# po = EnKF(M, H, Q, R, y, x_0, P_0, dim_x=J, dim_y=J, N=40, alpha=1, localization=True)
# po.forward_estimation()
# np.save('data/po/po.npy', po.x)

In [None]:
po_loc_result = np.load('data/po/po_loc.npy')
plot_error_KF(true[1:], y[1:], [po_loc_result[1:]])

localization なし
inflation あり


In [None]:
# 実行済
# po_inf = EnKF(M, H, Q, R, y, x_0, P_0, dim_x=J, dim_y=J, N=40, alpha=1.1, localization=False)
# po_inf.forward_estimation()
# np.save('data/po/po_inf.npy', po_inf.x)

In [None]:
po_inf_results = np.load('data/po/po_inf.npy')
plot_error_KF(true[1:], y[1:], [po_inf_results[1:]])

localizationあり, inflationあり

In [None]:
# 実行済
# po_loc_inf = EnKF(M, H, Q, R, y, x_0, P_0, dim_x=J, dim_y=J, N=40, alpha=1.1, localization=True)
# po_loc_inf.forward_estimation()
# np.save('data/po/po_loc_inf.npy', po_loc_inf.x)

In [None]:
po_loc_inf_results= np.load('data/po/po_loc_inf.npy')
plot_error_KF(true[1:], y[1:], [po_loc_inf_result[1:]])

##  KFとの比較


In [None]:
exkf_opti_result = np.load('data/exkf/exkf_opti.npy')
po_loc_inf_result = np.load('data/po/po_loc_inf.npy')
plot_error_KF(true[1:], y[1:], [exkf_opti_result[1:], po_loc_inf_result[1:]], legends=['ExKF', 'EnKF(PO)'])

## パラメータチューニング
$ N = 10, 20, 30, 40, 50 $でそれぞれ最適な$\alpha$を求める

-> 56min26s

In [None]:
%%time
# params_N = np.arange(1, 6)*10
# optimal_alpha_by_N = []
# for N in params_N:
#     results_po_inf = []
#     params_alpha = []
#     for k in range(20):
#         inf = 0.05*k
#         params_alpha.append(1+inf)
#         po_inf = EnKF(M, H, Q, R, y, x_0, P_0, N=N, alpha=1 + inf)
#         po_inf.forward_estimation()
#         results_po_inf.append(po_inf.x)
#     optimal_alpha, optimal_idx = estimate_error_with_params(true, results_po_inf, params_alpha, 'alpha', plot=False)
#     optimal_alpha_by_N.append(optimal_alpha)
#     optimal_result = results_po_inf[optimal_idx]
#     np.save('data/po/po_{}ensembles_inflation_parametrized.npy'.format(N), np.array(results_po_inf))
#     np.save('data/po/po_{}ensembles_optimal.npy'.format(N), optimal_result)
# np.save('data/po/po_optimal_alpha_for_10to50members.npy', optimal_alpha_by_N)

In [None]:
params_N = np.arange(1, 6)*10
optimal_results_by_N = [np.load('data/po/po_{}ensembles_optimal.npy'.format(N)) for N in params_N]
_ = estimate_error_with_params(true, optimal_results_by_N[:], params_N[:], 'N')
# _ = estimate_error_with_params(true, optimal_results_by_N[2:], params_N[2:], 'N')

## 観察
- 20memberまではdivergenceは起きていない．
- 50memberでもrmseが0.27程度なので精度が低い．

In [None]:
# params_N = np.arange(1,6)*10
# results = np.array([np.load('data/po/po_{}ensembles_inflation_parametrized.npy'.format(N)) for N in params_N])
# params_alpha = [1 + np.round(0.05*k, 3) for k in range(20)]

# T = true.shape[0] - 200
# true = np.load('data/true_atr.npy')
# errors = np.zeros((20, 5))
# for n in range(5):
#     for k in range(20):
#         errors[k, n] = error_series_kf(true, results[n,k], np.arange(T)+200).mean()
# df = pd.DataFrame(errors, index=params_alpha, columns=params_N)
# df.to_csv('data/po/po_N_alpha.csv')

In [None]:
%%time
N=8
po = EnKF(M, H, Q, R, y[:300], x_0, P_0, N=N, alpha=1.1, sigma=6)
po.forward_estimation()
_ = estimate_error_with_params(true, [po.x], [N], 'N')

In [None]:
%%time
params_sigma = np.arange(1,11)
params_alpha = []
errors = np.zeros((3, len(params_sigma)))
T = 100
for n in range(len(params_sigma)):
    sigma = params_sigma[n]
    results_po = []
    for k in range(3):
        inf = 0.2 + 0.01*k
        if n==0:
            params_alpha.append(1+inf)
        po = EnKF(M, H, Q, R, y[:300], x_0, P_0, N=10, alpha=1 + inf, sigma=sigma)
        po.forward_estimation()
        results_po.append(po.x)
        error = error_series_kf(true, po.x, np.arange(T)+200).mean()
        errors[k, n] = error
        print('complete: {}, {}, error: {}'.format(sigma,1+inf, error))
#     optimal_alpha, optimal_idx, optimal_error = estimate_error_with_params(true, results_po, params_alpha, 'alpha', plot=False)
#     optimal_alpha_by_N.append(optimal_alpha)
    # optimal_result = results_po[optimal_idx]
    # np.save('data/srf/srf_{}ensembles_inflation_parametrized.npy'.format(N), np.array(results_po))
    # np.save('data/srf/srf_{}ensembles_optimal.npy'.format(N), optimal_result)
df = pd.DataFrame(errors, index=params_alpha, columns=params_sigma)


## POのまとめ
### 最適結果:
- $ N=40 $(要検討)
- $ \alpha: 1.05$
- localization: 2
- 実行時間: 51.9s
- rmse: 0.2705
- memo: 
  - $N=20$では24s
  - $N=200$では4minかかる

### 問題
- 新たにノイズを導入している.
- アンサンブル数$N$回ノイズを加える計算がある．