# 01. Exploratory Data Analysis (EDA)

This notebook explores the processed GridPulse dataset (Load, Wind, Solar).

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
sns.set_theme(style="whitegrid")

In [None]:
# Load data
DATA_PATH = "../data/processed/features.parquet"

try:
    df = pd.read_parquet(DATA_PATH)
    print(f"Loaded data with shape: {df.shape}")
    display(df.head())
except FileNotFoundError:
    print(f"File not found: {DATA_PATH}. Please run the data pipeline first.")

## 1. Time Series Visualization

In [None]:
if 'timestamp' in df.columns:
    # Plot a 2-week sample
    sample = df.iloc[:168*2]
    
    plt.figure(figsize=(15, 6))
    plt.plot(sample['timestamp'], sample['load_mw'], label='Load')
    plt.plot(sample['timestamp'], sample['wind_mw'], label='Wind')
    plt.plot(sample['timestamp'], sample['solar_mw'], label='Solar')
    plt.title("2-Week Energy Profile (Sample)")
    plt.ylabel("MW")
    plt.legend()
    plt.show()

## 2. Feature Correlations

In [None]:
cols = ['load_mw', 'wind_mw', 'solar_mw', 'hour', 'dayofweek', 'season']
corr = df[[c for c in cols if c in df.columns]].corr()

plt.figure(figsize=(8, 6))
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt=".2f")
plt.title("Feature Correlation Matrix")
plt.show()