# Multi-City Feature Engineering for Solar Forecasting
This notebook extracts and generates engineered features for each processed city dataset.

In [None]:
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import MinMaxScaler

data_dir = '../data/processed'
output_dir = '../data/features'
os.makedirs(output_dir, exist_ok=True)

cities = ['riyadh', 'tabuk', 'alula', 'abha', 'dammam']


In [None]:
for city in cities:
    print(f"🔧 Processing features for {city.title()}...")
    input_path = os.path.join(data_dir, f"{city}_processed.csv")
    output_path = os.path.join(output_dir, f"{city}_features.csv")
    
    if not os.path.exists(input_path):
        print(f"❌ Missing file: {input_path}")
        continue

    df = pd.read_csv(input_path, parse_dates=['datetime'], index_col='datetime')

    # Temporal Features
    df['hour'] = df.index.hour
    df['dayofweek'] = df.index.dayofweek
    df['month'] = df.index.month
    df['dayofyear'] = df.index.dayofyear

    # Lag Features
    df['ghi_lag1'] = df['ALLSKY_SFC_SW_DWN'].shift(1)
    df['ghi_lag24'] = df['ALLSKY_SFC_SW_DWN'].shift(24)

    # Rolling Mean Features
    df['ghi_roll3'] = df['ALLSKY_SFC_SW_DWN'].rolling(window=3).mean()
    df['ghi_roll24'] = df['ALLSKY_SFC_SW_DWN'].rolling(window=24).mean()

    # Drop NA rows from lag/rolling
    df.dropna(inplace=True)

    # Save
    df.to_csv(output_path)
    print(f"✅ Saved features to: {output_path}")
