# Multi-City EDA for Solar Forecasting
This notebook performs exploratory data analysis (EDA) on preprocessed NASA POWER data for five Saudi cities.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

sns.set(style="whitegrid")
%matplotlib inline

data_dir = '../data/processed'
cities = ['riyadh', 'tabuk', 'alula', 'abha', 'dammam']


In [None]:
for city in cities:
    print(f"\n📊 Analyzing data for {city.title()}...")
    filepath = os.path.join(data_dir, f"{city}_processed.csv")
    if not os.path.exists(filepath):
        print(f"❌ File not found: {filepath}")
        continue

    df = pd.read_csv(filepath, parse_dates=['datetime'], index_col='datetime')
    df['month'] = df.index.month
    df['hour'] = df.index.hour

    display(df.describe())

    # Daily trend
    df['ALLSKY_SFC_SW_DWN'].resample('D').mean().plot(figsize=(12, 4), title=f'{city.title()} - Daily Solar Irradiance')
    plt.ylabel('Wh/m²')
    plt.grid(True)
    plt.show()

    # Correlation heatmap
    plt.figure(figsize=(10, 6))
    sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
    plt.title(f'{city.title()} - Correlation Between Variables')
    plt.show()

    # Hourly humidity
    df.groupby('hour')['RH2M'].mean().plot()
    plt.title(f'{city.title()} - Hourly Average Humidity')
    plt.ylabel('%')
    plt.grid(True)
    plt.show()

    # Hourly wind speed
    df.groupby('hour')['WS2M'].mean().plot()
    plt.title(f'{city.title()} - Hourly Average Wind Speed')
    plt.ylabel('m/s')
    plt.grid(True)
    plt.show()
