In [3]:
# ===== 設定（ここだけ変更すればOK） =====
CSV_PATH = "monthly_1950-01_2020-07.csv"  # ファイルパス
P = 12                    # ラグ数
TEST_RATIO = 0.2
EXPLAIN_LAST = True       # Trueならテスト末尾を説明
EXPLAIN_DATE = None       # "2020-07-01" のように指定も可
LIME_SAMPLES = 8000
LIME_ALPHA = 0.5
SEED = 0
# ========================================

import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

# --- データ読み込み ---
df = pd.read_csv(CSV_PATH)
df["Date"] = pd.to_datetime(df["Date"])
df = df.sort_values("Date").set_index("Date")
y = df["Price"].astype(float)

# --- ラグ生成 ---
def make_lag_df(series, p):
    d = pd.DataFrame({"y": series})
    for k in range(1, p + 1):
        d[f"lag_{k}"] = series.shift(k)
    return d.dropna()

lagdf = make_lag_df(y, P)

X = lagdf.drop(columns=["y"]).values
target = lagdf["y"].values
dates = lagdf.index
feature_names = list(lagdf.drop(columns=["y"]).columns)

# --- 時系列分割 ---
n = len(lagdf)
split = int(n * (1 - TEST_RATIO))
X_train, X_test = X[:split], X[split:]
y_train, y_test = target[:split], target[split:]
dates_test = dates[split:]

# --- ARモデル学習 ---
model = LinearRegression()
model.fit(X_train, y_train)

pred = model.predict(X_test)

print("=== AR({}) テスト性能 ===".format(P))
print("MAE :", mean_absolute_error(y_test, pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, pred)))
print("R2  :", r2_score(y_test, pred))

# --- LIME風実装 ---
rng = np.random.default_rng(SEED)
sigma = X_train.std(axis=0)
sigma = np.where(sigma == 0, 1.0, sigma)

def lime_like(instance):
    d = len(instance)
    kernel_width = np.sqrt(d) * 0.75

    Z = rng.normal(loc=instance, scale=sigma, size=(LIME_SAMPLES, d))
    yZ = model.predict(Z)

    dist = np.sqrt(np.sum(((Z - instance)/sigma)**2, axis=1))
    w = np.exp(-(dist**2)/(kernel_width**2))

    scaler = StandardScaler()
    Zs = scaler.fit_transform(Z)
    inst_s = scaler.transform(instance.reshape(1,-1))

    sur = Ridge(alpha=LIME_ALPHA)
    sur.fit(Zs, yZ, sample_weight=w)

    contrib = sur.coef_ * inst_s.flatten()

    df_exp = pd.DataFrame({
        "feature": feature_names,
        "value": instance,
        "contribution": contrib
    }).sort_values("contribution", key=lambda s: s.abs(), ascending=False)

    return df_exp

# --- 説明対象選択 ---
if EXPLAIN_LAST:
    instance = X_test[-1]
    explain_date = dates_test[-1]
    true_y = y_test[-1]
else:
    explain_date = pd.to_datetime(EXPLAIN_DATE)
    idx = np.where(dates_test == explain_date)[0][0]
    instance = X_test[idx]
    true_y = y_test[idx]

exp = lime_like(instance)

print("\n=== LIME風説明 ===")
print("対象:", explain_date.date())
print("真値:", true_y)
print("予測:", model.predict(instance.reshape(1,-1))[0])
print("\n寄与Top10:")
print(exp.head(10))


=== AR(12) テスト性能 ===
MAE : 35.34606663508952
RMSE: 47.0090982563451
R2  : 0.9741882046846961

=== LIME風説明 ===
対象: 2020-07-01
真値: 1846.51
予測: 1725.1429498625455

寄与Top10:
   feature    value  contribution
0    lag_1  1732.22     -0.933206
2    lag_3  1683.17     -0.503474
1    lag_2  1715.91     -0.464066
5    lag_6  1560.67     -0.418438
9   lag_10  1510.58      0.098103
6    lag_7  1479.13      0.051297
3    lag_4  1591.93      0.050281
10  lag_11  1500.41     -0.048209
7    lag_8  1470.79      0.046676
11  lag_12  1412.89     -0.043645
