In [None]:
import pandas as pd

df = pd.read_csv('data/data_full.csv')
print(df.shape)
df.head(2)

In [None]:
df['date'].value_counts()

## Split data

In [None]:
df_train = df[df['date'] != 20251028]
print(df_train.shape)
df_train.head(2)

In [None]:
df_test = df[df['date'] == 20251028]
print(df_test.shape)
df_test.head(2)

In [None]:
# Save split data

if True:
    df_train.to_csv('data_split/data_train.csv', index=False)
    df_test.to_csv('data_split/data_test.csv', index=False)

## Meta-learning

In [None]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score

In [None]:
# Split tasks and scale data

X_cols = ['R', 'G', 'B', 'time_of_day', 'weather_index', 'cloud_cover_percent']
Y_cols = ['I_445', 'I_515', 'I_630', 'I_clear']

grouped = df_train.groupby('day_of_year')

tasks = []
for day, df_day in grouped:
    X_day = df_day[X_cols].to_numpy()
    Y_day = df_day[Y_cols].to_numpy()
    if len(X_day) < 10:
        continue
    tasks.append((day, X_day, Y_day))

print(f"Total days (tasks): {len(tasks)}")

X_all = df_train[X_cols].to_numpy()
Y_all = df_train[Y_cols].to_numpy()

X_scaler = StandardScaler()
Y_scaler = StandardScaler()

X_all_scaled = X_scaler.fit_transform(X_all)
Y_all_scaled = Y_scaler.fit_transform(Y_all)

tasks_scaled = []
for day, _, _ in tasks:
    mask = df_train['day_of_year'] == day
    X_day = X_all_scaled[mask.to_numpy()]
    Y_day = Y_all_scaled[mask.to_numpy()]
    tasks_scaled.append((day, X_day, Y_day))

In [None]:
# Meta-learning config and loop

input_dim = len(X_cols)
output_dim = len(Y_cols)
k_shot = 10
meta_lr = 0.001
inner_lr = 0.05
meta_iters = 300
np.random.seed(0)

W_meta = np.random.randn(output_dim, input_dim) * 0.01
b_meta = np.zeros(output_dim)

for meta_iter in range(meta_iters):
    dW_meta = np.zeros_like(W_meta)
    db_meta = np.zeros_like(b_meta)
    
    for day, X_day, Y_day in tasks_scaled:
        n = len(X_day)
        if n < k_shot * 2:
            continue  # skip days with too few samples
        
        # Randomly sample train/val split within this day
        idx = np.random.permutation(n)
        idx_train = idx[:k_shot]
        idx_val = idx[k_shot:2*k_shot]
        
        X_train, Y_train = X_day[idx_train], Y_day[idx_train]
        X_val, Y_val = X_day[idx_val], Y_day[idx_val]
        
        # Inner adaptation
        Y_pred = X_train @ W_meta.T + b_meta
        grad_W = (2 / k_shot) * (Y_pred - Y_train).T @ X_train
        grad_b = (2 / k_shot) * np.sum(Y_pred - Y_train, axis=0)
        W_task = W_meta - inner_lr * grad_W
        b_task = b_meta - inner_lr * grad_b
        
        # Outer evaluation
        Y_val_pred = X_val @ W_task.T + b_task
        grad_W_meta = (2 / k_shot) * (Y_val_pred - Y_val).T @ X_val
        grad_b_meta = (2 / k_shot) * np.sum(Y_val_pred - Y_val, axis=0)
        
        dW_meta += grad_W_meta
        db_meta += grad_b_meta
    
    # Meta-update
    W_meta -= meta_lr * dW_meta / len(tasks_scaled)
    b_meta -= meta_lr * db_meta / len(tasks_scaled)
    
    if meta_iter % 50 == 0:
        loss_meta = np.mean((Y_val_pred - Y_val)**2)
        print(f"Iter {meta_iter:03d} | Meta-loss: {loss_meta:.6f}")

## Predition

In [None]:
k_shot = 10        
inner_lr = 0.05
np.random.seed(42)

# --- Prepare columns ---
X_cols = ['R','G','B','time_of_day','weather_index','cloud_cover_percent']
Y_cols = ['I_445','I_515','I_630','I_clear']

# --- Group test set by day ---
test_days = sorted(df_test['day_of_year'].unique())

results = []   # to store per-day metrics

for day_target in test_days:
    df_target = df_test[df_test['day_of_year'] == day_target]
    if len(df_target) < 10:  # skip very small days
        continue

    # --- Extract and scale ---
    X_target = df_target[X_cols].to_numpy()
    Y_target = df_target[Y_cols].to_numpy()
    X_target_scaled = X_scaler.transform(X_target)
    Y_target_scaled = Y_scaler.transform(Y_target)

    # --- Split into few-shot and eval sets ---
    nT = len(X_target_scaled)
    k = min(k_shot, nT // 2)
    idx = np.random.choice(nT, size=k, replace=False)
    mask = np.ones(nT, dtype=bool)
    mask[idx] = False
    X_adapt, Y_adapt = X_target_scaled[idx], Y_target_scaled[idx]
    X_eval,  Y_eval  = X_target_scaled[mask], Y_target_scaled[mask]

    # --- 1️⃣ BEFORE ADAPTATION ---
    Y_pred_before_scaled = X_eval @ W_meta.T + b_meta
    Y_pred_before = Y_scaler.inverse_transform(Y_pred_before_scaled)
    Y_eval_orig   = Y_scaler.inverse_transform(Y_eval)

    r2_before = [r2_score(Y_eval_orig[:, i], Y_pred_before[:, i]) for i in range(len(Y_cols))]
    rmse_before = np.sqrt(np.mean((Y_eval_orig - Y_pred_before)**2, axis=0))

    # --- 2️⃣ AFTER ADAPTATION ---
    Y_pred_adapt = X_adapt @ W_meta.T + b_meta
    grad_W = (2 / len(X_adapt)) * (Y_pred_adapt - Y_adapt).T @ X_adapt
    grad_b = (2 / len(X_adapt)) * np.sum(Y_pred_adapt - Y_adapt, axis=0)
    W_adapt = W_meta - inner_lr * grad_W
    b_adapt = b_meta - inner_lr * grad_b

    Y_pred_after_scaled = X_eval @ W_adapt.T + b_adapt
    Y_pred_after = Y_scaler.inverse_transform(Y_pred_after_scaled)

    r2_after = [r2_score(Y_eval_orig[:, i], Y_pred_after[:, i]) for i in range(len(Y_cols))]
    rmse_after = np.sqrt(np.mean((Y_eval_orig - Y_pred_after)**2, axis=0))

    results.append({
        'day': day_target,
        'r2_before': r2_before,
        'r2_after': r2_after,
        'rmse_before': rmse_before,
        'rmse_after': rmse_after
    })

    print(f"\nDay {day_target} | samples={nT} | few-shot={k}")
    print("R² before:", dict(zip(Y_cols, np.round(r2_before, 3))))
    print("R² after :", dict(zip(Y_cols, np.round(r2_after, 3))))
    print("RMSE before:", dict(zip(Y_cols, np.round(rmse_before, 3))))
    print("RMSE after :", dict(zip(Y_cols, np.round(rmse_after, 3))))


# --- Aggregate average R² across all test days (per channel) ---
r2_before_all = np.array([r['r2_before'] for r in results])  # shape (n_days, 4)
r2_after_all  = np.array([r['r2_after']  for r in results])  # shape (n_days, 4)
rmse_before_all = np.array([r['rmse_before'] for r in results])
rmse_after_all  = np.array([r['rmse_after']  for r in results])

avg_r2_before = np.mean(r2_before_all, axis=0)  # mean across days
avg_r2_after  = np.mean(r2_after_all, axis=0)
avg_rmse_before = np.mean(rmse_before_all, axis=0)
avg_rmse_after  = np.mean(rmse_after_all, axis=0)

print("\n===========================================")
print("Average per-channel R² before adaptation:")
print(dict(zip(Y_cols, np.round(avg_r2_before, 3))))
print("\nAverage per-channel R² after adaptation:")
print(dict(zip(Y_cols, np.round(avg_r2_after, 3))))
print("-------------------------------------------")
print("Average per-channel RMSE before adaptation:")
print(dict(zip(Y_cols, np.round(avg_rmse_before, 3))))
print("\nAverage per-channel RMSE after adaptation:")
print(dict(zip(Y_cols, np.round(avg_rmse_after, 3))))
print("===========================================")