In [47]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
import joblib
from tqdm import tqdm

In [48]:
df = pd.read_csv('data_solar.csv')

In [49]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
csv_file = 'Featured_Database.csv' # CHANGE THIS to your actual filename
MODEL_SAVE_PATH = "day_ahead_model.pth"
SCALER_PATH = "day_ahead_scaler.gz"
BATCH_SIZE = 32
EPOCHS = 50
LEARNING_RATE = 0.001

In [50]:

df["season_sin"] = np.sin(2 * np.pi * df["Season"] / 4)
df["season_cos"] = np.cos(2 * np.pi * df["Season"] / 4)
df["weekday_sin"] = np.sin(2 * np.pi * df["Day_of_the_week"] / 7)
df["weekday_cos"] = np.cos(2 * np.pi * df["Day_of_the_week"] / 7)
df["PV_potential"] = df["GHI"]*df["Temperature"]
df.drop(columns=['Season','Day_of_the_week'],inplace=True)
df['Time'] = pd.to_datetime(df['Time'], format='%Y-%m-%d-T%H:%M')
df['GHI_diff_5m'] = df['GHI'].diff(periods=1)
df['GHI_rolling_std_30m'] = df['GHI'].rolling(window=6).std()
df['hour_min'] = df['Time'].dt.strftime('%H:%M')
clear_sky_proxy = df.groupby('hour_min')['GHI'].transform('max')
df['Clearness_Index'] = df['GHI'] / (clear_sky_proxy + 1e-9) # Avoid div by zero
df.dropna(inplace=True)
df.drop(columns=['hour_min'],inplace=True)
df['target'] = df['PV_production'] 
df.drop(columns=['PV_production'],inplace=True)
df['target'] = df['target'].clip(0,1)


In [51]:
# 1. Reload the data to reset the dataframe (Fixes the "Already Indexed" bug)
 

# 2. Strip any hidden whitespace from column names (Fixes " Time" vs "Time")
df.columns = df.columns.str.strip()

# 3. Check if 'Time' is a column before processing
if 'Time' in df.columns:
    df['Time'] = pd.to_datetime(df['Time'])
    df.set_index('Time', inplace=True)
else:
    print("‚ö†Ô∏è 'Time' column not found. It might already be the index.")

# 4. Verify


In [52]:
df_hourly = df.resample('1h').mean()
df_hourly.dropna(inplace=True)


In [53]:
df_hourly

Unnamed: 0_level_0,DHI,DNI,GHI,Wind_speed,Humidity,Temperature,season_sin,season_cos,weekday_sin,weekday_cos,PV_potential,GHI_diff_5m,GHI_rolling_std_30m,Clearness_Index,target
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2019-01-01 00:00:00,0.0,0.0,0.0,2.814286,55.523143,1.617143,1.000000e+00,6.123234e-17,0.781831,0.623490,0.0,0.0,0.0,0.0,0.0
2019-01-01 01:00:00,0.0,0.0,0.0,2.770000,55.159000,1.405000,1.000000e+00,6.123234e-17,0.781831,0.623490,0.0,0.0,0.0,0.0,0.0
2019-01-01 02:00:00,0.0,0.0,0.0,2.690000,54.532833,1.186667,1.000000e+00,6.123234e-17,0.781831,0.623490,0.0,0.0,0.0,0.0,0.0
2019-01-01 03:00:00,0.0,0.0,0.0,2.655000,54.317833,0.985000,1.000000e+00,6.123234e-17,0.781831,0.623490,0.0,0.0,0.0,0.0,0.0
2019-01-01 04:00:00,0.0,0.0,0.0,2.628333,54.296667,0.811667,1.000000e+00,6.123234e-17,0.781831,0.623490,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-12-31 19:00:00,0.0,0.0,0.0,1.718333,64.786667,7.090000,-2.449294e-16,1.000000e+00,-0.433884,-0.900969,0.0,0.0,0.0,0.0,0.0
2021-12-31 20:00:00,0.0,0.0,0.0,1.658333,65.098167,6.751667,-2.449294e-16,1.000000e+00,-0.433884,-0.900969,0.0,0.0,0.0,0.0,0.0
2021-12-31 21:00:00,0.0,0.0,0.0,1.661667,65.315667,6.405000,-2.449294e-16,1.000000e+00,-0.433884,-0.900969,0.0,0.0,0.0,0.0,0.0
2021-12-31 22:00:00,0.0,0.0,0.0,1.675000,65.576167,6.045000,-2.449294e-16,1.000000e+00,-0.433884,-0.900969,0.0,0.0,0.0,0.0,0.0


In [54]:
feature_cols = ['DHI', 'DNI', 'GHI', 'Wind_speed', 'Humidity', 'Temperature', 
                'season_sin', 'season_cos', 'weekday_sin', 'weekday_cos', 
                'PV_potential', 'GHI_diff_5m', 'GHI_rolling_std_30m', 'Clearness_Index', 'target']

future_driver_cols = ['GHI', 'Temperature', 'Wind_speed', 'season_sin', 'season_cos', 'weekday_sin', 'weekday_cos']

scaler = MinMaxScaler()
df_scaled = pd.DataFrame(scaler.fit_transform(df_hourly[feature_cols]), 
                         columns=feature_cols, index=df_hourly.index)

# Save scaler for inference
joblib.dump(scaler, SCALER_PATH)
print(f"Scaler saved to {SCALER_PATH}")


Scaler saved to day_ahead_scaler.gz


In [55]:
def create_day_ahead_sequences(data, feature_cols, future_cols, lookback=24, horizon=24):
    X_past, X_future, y = [], [], []
    
    # We need to ensure we have enough data for past AND future
    # Loop from 'lookback' until 'len - horizon'
    for i in range(lookback, len(data) - horizon):
        # 1. Past Data (t-24 to t)
        past_slice = data.iloc[i-lookback : i][feature_cols].values
        
        # 2. Future Weather (t to t+24)
        # Note: In real training, we use actual future weather as a proxy for the forecast
        future_slice = data.iloc[i : i+horizon][future_cols].values
        
        # 3. Target (t to t+24)
        target_slice = data.iloc[i : i+horizon]['target'].values
        
        X_past.append(past_slice)
        X_future.append(future_slice)
        y.append(target_slice)
        
    return np.array(X_past), np.array(X_future), np.array(y)

print("Creating Sequences...")
X_past, X_future, y = create_day_ahead_sequences(df_scaled, feature_cols, future_driver_cols)

Creating Sequences...


In [56]:
split = int(len(X_past) * 0.9)
X_past_train, X_future_train, y_train = X_past[:split], X_future[:split], y[:split]
X_past_test, X_future_test, y_test = X_past[split:], X_future[split:], y[split:]

In [57]:
class SolarDayAheadDataset(Dataset):
    def __init__(self, x_past, x_future, y):
        self.x_past = torch.tensor(x_past, dtype=torch.float32)
        self.x_future = torch.tensor(x_future, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)
        
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, idx):
        return self.x_past[idx], self.x_future[idx], self.y[idx]
    
train_loader = DataLoader(SolarDayAheadDataset(X_past_train, X_future_train, y_train), batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(SolarDayAheadDataset(X_past_test, X_future_test, y_test), batch_size=BATCH_SIZE, shuffle=False)    

In [58]:
class DayAheadNet(nn.Module):
    def __init__(self, past_features, future_features, hidden_size=64):
        super(DayAheadNet, self).__init__()
        
        # Encoder (Processes Past History)
        self.lstm = nn.LSTM(input_size=past_features, hidden_size=hidden_size, batch_first=True)
        
        # Processor (Processes Future Weather)
        # We flatten the 24h future weather: 24 * num_future_features
        self.future_fc = nn.Linear(24 * future_features, hidden_size)
        
        # Combiner
        self.combine_fc = nn.Linear(hidden_size + hidden_size, hidden_size)
        
        # Decoder (Output 24 hours of generation)
        self.output_fc = nn.Linear(hidden_size, 24) 
        self.sigmoid = nn.Sigmoid() # Force 0-1 range
        
    def forward(self, past, future):
        # past: (Batch, 24, past_features)
        # future: (Batch, 24, future_features)
        
        # 1. Encode Past
        _, (hidden, _) = self.lstm(past)
        context = hidden[-1] # (Batch, hidden_size)
        
        # 2. Process Future
        # Flatten future: (Batch, 24*future_features)
        future_flat = future.reshape(future.shape[0], -1) 
        future_context = torch.relu(self.future_fc(future_flat))
        
        # 3. Combine
        combined = torch.cat((context, future_context), dim=1)
        combined = torch.relu(self.combine_fc(combined))
        
        # 4. Predict
        out = self.output_fc(combined)
        return self.sigmoid(out)

In [59]:
model = DayAheadNet(past_features=len(feature_cols), future_features=len(future_driver_cols)).to(DEVICE)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

# --- 5. TRAINING LOOP ---
print(f"Starting Training on {DEVICE}...")
best_loss = float('inf')

Starting Training on cpu...


In [60]:
for epoch in range(EPOCHS):
    model.train()
    train_loss = 0
    loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}", leave=False)
    
    for past_b, future_b, y_b in loop:
        past_b, future_b, y_b = past_b.to(DEVICE), future_b.to(DEVICE), y_b.to(DEVICE)
        
        optimizer.zero_grad()
        preds = model(past_b, future_b)
        loss = criterion(preds, y_b)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        loop.set_postfix(loss=loss.item())
    
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for past_b, future_b, y_b in test_loader:
            past_b, future_b, y_b = past_b.to(DEVICE), future_b.to(DEVICE), y_b.to(DEVICE)
            preds = model(past_b, future_b)
            loss = criterion(preds, y_b)
            val_loss += loss.item()
            
    avg_val_loss = val_loss / len(test_loader)
    print(f"Epoch {epoch+1} | Train Loss: {train_loss/len(train_loader):.5f} | Val Loss: {avg_val_loss:.5f}")
    
    if avg_val_loss < best_loss:
        best_loss = avg_val_loss
        torch.save(model.state_dict(), MODEL_SAVE_PATH)
        print(f"üî• Model saved with loss {best_loss:.5f}")

print("Training Complete.")        


                                                                           

Epoch 1 | Train Loss: 0.01941 | Val Loss: 0.02941
üî• Model saved with loss 0.02941


                                                                            

KeyboardInterrupt: 