# Temporal Lag Feature Engineering

**What**: Use past SST values to predict current conditions
**Why**: El Niño develops over 3-6 months, past values contain predictive signals
**How**: Create 1-week, 1-month, 3-month, 6-month, 12-month lag features

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Load the CSV file we just created
df = pd.read_csv(output_csv)
df['date'] = pd.to_datetime(df['date'])
df = df.set_index('date').sort_index()

print("Original data shape:", df.shape)
print("\nFirst few rows:")
print(df.head())

In [None]:
# === VISUALIZE TEMPORAL LAG FEATURES ===
import matplotlib.pyplot as plt

# Create subplot for current vs lag features
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10))

# Plot 1: Time series comparison
df_plot = df.dropna()  # Remove rows with NaN for plotting

ax1.plot(df_plot.index, df_plot['sst'], label='Current SST', color='black', linewidth=2)
ax1.plot(df_plot.index, df_plot['sst_lag_3m'], label='3 months ago', color='red', alpha=0.7)
ax1.plot(df_plot.index, df_plot['sst_lag_6m'], label='6 months ago', color='blue', alpha=0.7)
ax1.plot(df_plot.index, df_plot['sst_lag_12m'], label='12 months ago', color='green', alpha=0.7)

ax1.set_title('Niño 3.4 SST: Current vs Lag Features')
ax1.set_ylabel('SST (°C)')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Plot 2: Correlation analysis
correlations = []
for col in lag_cols:
    corr = df['sst'].corr(df[col])
    correlations.append(corr)

lag_names = ['1 week', '1 month', '3 months', '6 months', '12 months']
ax2.bar(lag_names, correlations, color=['skyblue', 'lightgreen', 'orange', 'red', 'purple'])
ax2.set_title('Predictive Power of Lag Features (Correlation with Current SST)')
ax2.set_ylabel('Correlation')
ax2.set_ylim(0, 1)

# Add correlation values on bars
for i, v in enumerate(correlations):
    ax2.text(i, v + 0.01, f'{v:.3f}', ha='center', va='bottom')

plt.tight_layout()
plt.show()

print("\\nCorrelation Summary:")
for i, col in enumerate(lag_cols):
    print(f"{lag_names[i]}: {correlations[i]:.3f}")
print(f"\\nBest predictor: {lag_names[correlations.index(max(correlations))]}")