In [5]:
# -*- coding: utf-8 -*-
"""
Shadow Planet Test - Multi-Model Loop (M8+2, M8+3, M0+3, M0+2)
Key Updates:
1. Iterates through 4 specific models with different Split Dates.
2. Uses Buffer Smoothing logic for all.
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import joblib
import os
import sys
from tqdm import tqdm

# --- Check Dependencies ---
try:
    from pybaselines.whittaker import asls
except ImportError:
    print("Error: pybaselines library not found. Please run: pip install pybaselines")
    sys.exit(1)

# --- 1. Configuration Area ---

# 基础路径
MODEL_DIR = '../../results/05_p_m_a_model/p_model_4'
SSN_FILE = '../../data/ready/ssn_daily_1849_2025.csv'
PLANET_POS = '../../data/ready/781_planets_dwarfs_asteroids_xyz.parquet'
PLANET_VEL = '../../data/ready/781_planets_dwarfs_asteroids_velocity.parquet'

# 全局时间配置
TRAIN_START = '1855-12-02'
TEST_END    = '2019-11-30'

# 模型列表与对应的文件名 (保持顺序对应)
MODEL_LABELS = ['M8+2', 'M8+3', 'M0+3', 'M0+2']
MODEL_FILES = [
    '12stars_Ridge_CV-R2_0.6880_OOT-SMOOTH-R2_0.4587_OOT-RAW-R2_0.3326_Params_alpha_0.1000.joblib',
    '15stars_Ridge_CV-R2_0.7291_OOT-SMOOTH-R2_0.4643_OOT-RAW-R2_0.3510_Params_alpha_0.0010.joblib',
    '21stars_Ridge_CV-R2_0.7281_OOT-SMOOTH-R2_0.5876_OOT-RAW-R2_0.4431_Params_alpha_0.1339.joblib',
    '25stars_Ridge_CV-R2_0.6885_OOT-SMOOTH-R2_0.6117_OOT-RAW-R2_0.4448_Params_alpha_0.3329.joblib'
]

# 每个模型的训练截止日期 (Split Date)
MODEL_SPLITS = {
    'M8+2': '1996-08-01', 
    'M8+3': '1986-08-01',
    'M0+3': '1986-09-01', 
    'M0+2': '1996-08-01'
}

# Experiment Parameters
N_TRIALS = 200          
LAMBDA_SMOOTH = 7e7     
MIN_SHIFT = 10          
MAX_SHIFT = 50         
RANDOM_SEED = 42        

# --- 2. Helper Functions ---

def get_smoothed_segment(df_ssn, start_date, end_date, lam, buffer_days=365*3):
    """
    带缓冲区的平滑函数 (Leakage-proof with buffer)
    """
    start_dt = pd.to_datetime(start_date)
    end_dt = pd.to_datetime(end_date)
    
    # 1. 扩展缓冲区
    buf_start = start_dt - pd.Timedelta(days=buffer_days)
    buf_end = end_dt + pd.Timedelta(days=buffer_days)
    
    # 2. 切片扩展数据
    segment_expanded = df_ssn.loc[buf_start:buf_end].copy()
    if len(segment_expanded) == 0:
        segment_expanded = df_ssn.loc[start_dt:buf_end].copy()
        
    # 3. 平滑
    smoothed_expanded, _ = asls(segment_expanded.values, lam=lam, p=0.5)
    s_expanded = pd.Series(smoothed_expanded, index=segment_expanded.index)
    
    # 4. 切回需要的精确时间段
    return s_expanded.loc[start_date:end_date].rename('SSN_Smooth_Segment')

def load_common_data():
    """
    加载所有模型通用的原始数据 (SSN 和 行星数据)
    """
    print("--- Loading Common Data (Planets & SSN) ---")
    
    # A. Load Planet Data
    df_pos = pd.read_parquet(PLANET_POS)
    df_vel = pd.read_parquet(PLANET_VEL)
    df_pos.index = pd.to_datetime(df_pos['date']) if 'date' in df_pos.columns else pd.to_datetime(df_pos.index)
    df_vel.index = pd.to_datetime(df_vel['date']) if 'date' in df_vel.columns else pd.to_datetime(df_vel.index)
    df_planets = df_pos.join(df_vel, how='inner', lsuffix='_pos', rsuffix='_vel').sort_index()
    
    # B. Load SSN Data
    df_ssn = pd.read_csv(SSN_FILE)
    df_ssn.columns = [c.lower().strip() for c in df_ssn.columns]
    date_col = next((c for c in df_ssn.columns if 'date' in c or 'day' in c), None)
    val_col = next((c for c in df_ssn.columns if 'ssn' in c or 'sunspot' in c), df_ssn.columns[1])
    df_ssn[date_col] = pd.to_datetime(df_ssn[date_col])
    df_ssn = df_ssn.set_index(date_col)[val_col].asfreq('D').fillna(0)
    
    return df_planets, df_ssn

def prepare_model_specific_data(model_path, split_date, df_planets, df_ssn):
    """
    为特定模型加载参数并生成特定的 Train/Test 目标变量
    """
    # 1. Load Model Params
    if not os.path.exists(model_path):
        raise FileNotFoundError(f"Model file not found: {model_path}")
    
    saved_data = joblib.load(model_path)
    features = saved_data.get('features')
    best_params = saved_data.get('best_params')
    alpha = best_params.get('model__alpha', best_params.get('alpha', 1.0))
    
    # 2. Determine Dates based on Split
    train_end = split_date
    test_start = (pd.to_datetime(split_date) + pd.Timedelta(days=1)).strftime('%Y-%m-%d')
    
    print(f"   -> Split Date: {train_end}")
    print(f"   -> Test Start: {test_start}")

    # 3. Generate Ground Truth (With Buffer)
    y_train_target = get_smoothed_segment(df_ssn, TRAIN_START, train_end, LAMBDA_SMOOTH)
    y_test_target = get_smoothed_segment(df_ssn, test_start, TEST_END, LAMBDA_SMOOTH)
    
    return df_planets[features], y_train_target, y_test_target, alpha

# --- 3. Main Execution Loop ---
def run_multi_model_shadow_test():
    plt.style.use('seaborn-v0_8-whitegrid')
    
    # 1. 加载一次通用数据
    df_planets_all, df_ssn_all = load_common_data()
    
    # 2. 遍历模型
    for label, filename in zip(MODEL_LABELS, MODEL_FILES):
        print(f"\n{'='*60}")
        print(f"PROCESSING MODEL: {label}")
        print(f"File: {filename}")
        print(f"{'='*60}")
        
        # 设置随机种子 (确保每个模型的可复现性)
        np.random.seed(RANDOM_SEED) 
        
        full_model_path = os.path.join(MODEL_DIR, filename)
        split_date = MODEL_SPLITS[label]
        
        # A. 准备该模型的数据
        try:
            X_raw, y_train_gt, y_test_gt, alpha = prepare_model_specific_data(
                full_model_path, split_date, df_planets_all, df_ssn_all
            )
        except Exception as e:
            print(f"Skipping {label} due to error: {e}")
            continue

        # B. 复现 Baseline (Retrain on specific split)
        print(f"\n--- 1. Verifying Baseline ({label}) ---")
        
        X_train_orig = X_raw.loc[X_raw.index.intersection(y_train_gt.index)]
        y_train_final = y_train_gt.loc[X_train_orig.index]
        
        X_test_orig = X_raw.loc[X_raw.index.intersection(y_test_gt.index)]
        y_test_final = y_test_gt.loc[X_test_orig.index]
        
        pipe = Pipeline([('scaler', StandardScaler()), ('model', Ridge(alpha=alpha))])
        pipe.fit(X_train_orig, y_train_final)
        baseline_score = pipe.score(X_test_orig, y_test_final)
        print(f"Baseline Score: {baseline_score:.4f}")

        # C. 运行 Shadow Tests
        print(f"\n--- 2. Running {N_TRIALS} Shadow Tests ---")
        shadow_scores = []
        
        for _ in tqdm(range(N_TRIALS), desc=f"Shadow {label}"):
            # Random Shift
            sign = np.random.choice([-1, 1])
            shift_days = np.random.randint(MIN_SHIFT*365, MAX_SHIFT*365)
            
            # Shift X
            X_shadow = X_raw.copy()
            X_shadow.index = X_shadow.index + pd.Timedelta(days=sign * shift_days)
            
            # Slice Training
            idx_train = X_shadow.index.intersection(y_train_gt.index)
            if len(idx_train) < 365*20: continue
            X_train_s = X_shadow.loc[idx_train]
            y_train_s = y_train_gt.loc[idx_train]
            
            # Slice Test
            idx_test = X_shadow.index.intersection(y_test_gt.index)
            if len(idx_test) < 365*5: continue
            X_test_s = X_shadow.loc[idx_test]
            y_test_s = y_test_gt.loc[idx_test]
            
            # Train & Score
            pipe_s = Pipeline([('scaler', StandardScaler()), ('model', Ridge(alpha=alpha))])
            pipe_s.fit(X_train_s, y_train_s)
            score_s = pipe_s.score(X_test_s, y_test_s)
            shadow_scores.append(score_s)

        # D. 结果统计与绘图
        scores_arr = np.array(shadow_scores)
        if len(scores_arr) == 0:
            print("No valid shadow trials found.")
            continue
            
        mean_shadow = np.mean(scores_arr)
        std_shadow = np.std(scores_arr)
        # P-Value logic: Proportion of shadow scores >= baseline
        p_value = np.sum(scores_arr >= baseline_score) / len(scores_arr)
        
        print(f"\n--- Final Results ({label}) ---")
        print(f"Original R2: {baseline_score:.4f}")
        print(f"Shadow Mean: {mean_shadow:.4f} (+/- {std_shadow:.4f})")
        print(f"Z-Score:     {(baseline_score - mean_shadow) / (std_shadow + 1e-9):.4f}")
        print(f"P-Value:     {p_value:.5f}")
        
        # Plotting
        plt.figure(figsize=(10, 6))
        sns.histplot(scores_arr, bins=30, kde=True, color='gray', alpha=0.5, label='Shadow (Null)')
        plt.axvline(x=baseline_score, color='#e74c3c', linestyle='-', linewidth=3, label=f'Baseline ({baseline_score:.3f})')
        plt.axvline(x=mean_shadow, color='#2c3e50', linestyle='--', linewidth=2, label=f'Mean ({mean_shadow:.3f})')
        
        margin = 0.1
        min_x = min(scores_arr.min(), baseline_score) - margin
        max_x = max(scores_arr.max(), baseline_score) + margin
        plt.xlim(min_x, max_x)
        
        plt.title(f'Shadow Test: {label} (Split {split_date})', fontsize=14)
        plt.legend()
        
        res_text = "PASSED" if p_value < 0.05 else "FAILED"
        col = 'green' if p_value < 0.05 else 'red'
        plt.gca().text(0.05, 0.9, f"{res_text}\nP={p_value:.4f}", transform=plt.gca().transAxes, 
                       fontsize=14, color=col, bbox=dict(facecolor='white', alpha=0.9))
        
        # Save specific file
        safe_label = label.replace('+', '_plus_')
        save_path = f'../../results/05_p_m_a_model/shadow_test_{safe_label}.png'
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f"Saved Plot: {os.path.abspath(save_path)}")
        plt.close() # Close plot to free memory

if __name__ == "__main__":
    run_multi_model_shadow_test()

--- Loading Common Data (Planets & SSN) ---

PROCESSING MODEL: M8+2
File: 12stars_Ridge_CV-R2_0.6880_OOT-SMOOTH-R2_0.4587_OOT-RAW-R2_0.3326_Params_alpha_0.1000.joblib
   -> Split Date: 1996-08-01
   -> Test Start: 1996-08-02

--- 1. Verifying Baseline (M8+2) ---


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Baseline Score: 0.4579

--- 2. Running 200 Shadow Tests ---


Shadow M8+2: 100%|███████████████████████████████████████████████████████████████████| 200/200 [01:09<00:00,  2.89it/s]



--- Final Results (M8+2) ---
Original R2: 0.4579
Shadow Mean: -0.8620 (+/- 1.0189)
Z-Score:     1.2954
P-Value:     0.08586


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Saved Plot: C:\solar-planetary-ceos-pma\results\05_p_m_a_model\shadow_test_M8_plus_2.png

PROCESSING MODEL: M8+3
File: 15stars_Ridge_CV-R2_0.7291_OOT-SMOOTH-R2_0.4643_OOT-RAW-R2_0.3510_Params_alpha_0.0010.joblib
   -> Split Date: 1986-08-01
   -> Test Start: 1986-08-02

--- 1. Verifying Baseline (M8+3) ---
Baseline Score: 0.4593

--- 2. Running 200 Shadow Tests ---


Shadow M8+3: 100%|███████████████████████████████████████████████████████████████████| 200/200 [00:28<00:00,  6.92it/s]
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations



--- Final Results (M8+3) ---
Original R2: 0.4593
Shadow Mean: -0.9551 (+/- 1.3968)
Z-Score:     1.0126
P-Value:     0.02500
Saved Plot: C:\solar-planetary-ceos-pma\results\05_p_m_a_model\shadow_test_M8_plus_3.png

PROCESSING MODEL: M0+3
File: 21stars_Ridge_CV-R2_0.7281_OOT-SMOOTH-R2_0.5876_OOT-RAW-R2_0.4431_Params_alpha_0.1339.joblib
   -> Split Date: 1986-09-01
   -> Test Start: 1986-09-02

--- 1. Verifying Baseline (M0+3) ---
Baseline Score: 0.5861

--- 2. Running 200 Shadow Tests ---


Shadow M0+3: 100%|███████████████████████████████████████████████████████████████████| 200/200 [00:50<00:00,  3.93it/s]
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations



--- Final Results (M0+3) ---
Original R2: 0.5861
Shadow Mean: -0.5196 (+/- 0.8463)
Z-Score:     1.3066
P-Value:     0.01500
Saved Plot: C:\solar-planetary-ceos-pma\results\05_p_m_a_model\shadow_test_M0_plus_3.png

PROCESSING MODEL: M0+2
File: 25stars_Ridge_CV-R2_0.6885_OOT-SMOOTH-R2_0.6117_OOT-RAW-R2_0.4448_Params_alpha_0.3329.joblib
   -> Split Date: 1996-08-01
   -> Test Start: 1996-08-02

--- 1. Verifying Baseline (M0+2) ---
Baseline Score: 0.6117

--- 2. Running 200 Shadow Tests ---


Shadow M0+2: 100%|███████████████████████████████████████████████████████████████████| 200/200 [01:14<00:00,  2.69it/s]


--- Final Results (M0+2) ---
Original R2: 0.6117
Shadow Mean: 0.1698 (+/- 0.3334)
Z-Score:     1.3251
P-Value:     0.03030
Saved Plot: C:\solar-planetary-ceos-pma\results\05_p_m_a_model\shadow_test_M0_plus_2.png



