# 📊 EDA for Smart Grid Energy Data
This notebook performs exploratory data analysis (EDA) on synthetic smart grid datasets for region demand, solar/wind generation, and weather conditions.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import MinMaxScaler

In [None]:
# Simulate time-series data for one week (15-min intervals)
timestamps = pd.date_range(start='2024-01-01', periods=96*7, freq='15min')
np.random.seed(42)
data = pd.DataFrame({
    'timestamp': timestamps,
    'region_demand': np.random.normal(500, 50, size=len(timestamps)),
    'solar_gen': np.clip(np.sin(np.linspace(0, 14*np.pi, len(timestamps))) * 300 + np.random.normal(0, 30, len(timestamps)), 0, None),
    'wind_gen': np.clip(np.random.normal(150, 40, len(timestamps)), 0, None),
    'temperature': np.random.normal(30, 5, size=len(timestamps))
})

In [None]:
# Feature Engineering
data['hour'] = data['timestamp'].dt.hour
data['demand_variance'] = data['region_demand'].rolling(window=4).std().fillna(method='bfill')
data['moving_avg_demand'] = data['region_demand'].rolling(window=4).mean().fillna(method='bfill')
data['weather_score'] = data['temperature'] + 0.1 * data['wind_gen']

In [None]:
# Load Profile
plt.figure(figsize=(12, 4))
data.set_index('timestamp')['region_demand'].plot(title="Grid Load Profile")
plt.ylabel("Load (MW)")
plt.grid()
plt.show()

In [None]:
# Solar vs Wind Generation
plt.figure(figsize=(12, 4))
plt.plot(data['timestamp'], data['solar_gen'], label='Solar')
plt.plot(data['timestamp'], data['wind_gen'], label='Wind')
plt.title("Solar vs Wind Generation")
plt.ylabel("Generation (kWh)")
plt.legend()
plt.grid()
plt.show()

In [None]:
# Average Hourly Consumption
plt.figure(figsize=(10, 4))
data.groupby('hour')['region_demand'].mean().plot(kind='bar', title="Average Hourly Consumption")
plt.ylabel("Average Demand (MW)")
plt.grid()
plt.show()

In [None]:
# Correlation Heatmap
plt.figure(figsize=(6, 4))
sns.heatmap(data[['temperature', 'solar_gen', 'wind_gen', 'region_demand']].corr(), annot=True, cmap='coolwarm')
plt.title("Correlation Heatmap")
plt.tight_layout()
plt.show()

In [None]:
# Anomaly Detection using Isolation Forest
X = data[['region_demand', 'solar_gen', 'wind_gen']]
model = IsolationForest(contamination=0.01, random_state=42)
data['anomaly'] = model.fit_predict(X)

plt.figure(figsize=(12, 4))
anomalies = data[data['anomaly'] == -1]
plt.plot(data['timestamp'], data['region_demand'], label='Demand')
plt.scatter(anomalies['timestamp'], anomalies['region_demand'], color='red', label='Anomalies', s=10)
plt.title("Anomaly Detection in Demand")
plt.legend()
plt.grid()
plt.show()