# Solar PV Forecasting - Model Development

This notebook loads the cleaned dataset and prepares it for model training.

## 1. Import Libraries

In [None]:
import pandas as pd
import numpy as np

## 2. Load Cleaned Dataset

In [None]:
# Load the cleaned dataset with datetime index
df = pd.read_csv('data/solar_pv_clean_hourly.csv', index_col=0, parse_dates=True)

# Display dataset shape
print(f"Dataset Shape: {df.shape}")

# Display column names
print(f"\nColumn Names:")
print(df.columns.tolist())

# Display date range
print(f"\nDate Range:")
print(f"Start: {df.index.min()}")
print(f"End: {df.index.max()}")

In [None]:
# Display first few rows
df.head()

## 3. Train-Test Split

In [None]:
# Calculate split point for 80-20 split
split_idx = int(len(df) * 0.8)

# Split data chronologically (no shuffle)
train_df = df.iloc[:split_idx]
test_df = df.iloc[split_idx:]

# Separate features and target
X_train = train_df.drop('DC_POWER', axis=1)
y_train = train_df['DC_POWER']

X_test = test_df.drop('DC_POWER', axis=1)
y_test = test_df['DC_POWER']

# Print shapes
print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")

# Print date ranges for train and test sets
print(f"\nTrain date range: {X_train.index.min()} to {X_train.index.max()}")
print(f"Test date range: {X_test.index.min()} to {X_test.index.max()}")