In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df = pd.read_csv('../data/processed/solar_california_cleaned.csv', parse_dates=['timestamp'])

# Rolling stats
df['rolling_mean'] = df['generation'].rolling(window=24).mean()
df['rolling_std'] = df['generation'].rolling(window=24).std()

# Z-score
df['z_score'] = (df['generation'] - df['rolling_mean']) / df['rolling_std']
df['anomaly'] = df['z_score'].abs() > 3

# Visualize
plt.figure(figsize=(15,6))
plt.plot(df['timestamp'], df['generation'], label='Generation')
plt.scatter(df[df['anomaly']]['timestamp'], df[df['anomaly']]['generation'], color='red', label='Anomalies')
plt.title("Solar Generation with Anomalies (California)")
plt.xlabel("Time")
plt.ylabel("Generation (MW)")
plt.legend()
plt.tight_layout()
plt.savefig('../output/solar_anomalies_zscore.png')
plt.show()