# Solar Energy Prediction - Model Training

This notebook trains and evaluates regression models for solar energy prediction.


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import pickle
import os
from supabase import create_client
from dotenv import load_dotenv

load_dotenv()

supabase = create_client(os.getenv('SUPABASE_URL'), os.getenv('SUPABASE_KEY'))


In [None]:
# Load and prepare data
weather_result = supabase.table('weather_data').select('*').execute()
production_result = supabase.table('production_data').select('*').execute()

weather_df = pd.DataFrame(weather_result.data)
production_df = pd.DataFrame(production_result.data)

# Merge on timestamp
weather_df['timestamp'] = pd.to_datetime(weather_df['timestamp'])
production_df['timestamp'] = pd.to_datetime(production_df['timestamp'])

weather_df['timestamp_hour'] = weather_df['timestamp'].dt.floor('H')
production_df['timestamp_hour'] = production_df['timestamp'].dt.floor('H')

merged = pd.merge(weather_df, production_df, on='timestamp_hour', how='inner')

# Prepare features
feature_cols = ['temperature', 'humidity', 'wind_speed', 'cloud_cover', 'solar_irradiance', 'precipitation']
X = merged[feature_cols].fillna(merged[feature_cols].median()).values
y = merged['energy_output_kwh'].values

print(f"Dataset shape: {X.shape}")
print(f"Features: {feature_cols}")


In [None]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"Training samples: {len(X_train)}")
print(f"Test samples: {len(X_test)}")


In [None]:
# Train Random Forest
rf_model = RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42)
rf_model.fit(X_train, y_train)

rf_pred = rf_model.predict(X_test)
rf_mse = mean_squared_error(y_test, rf_pred)
rf_r2 = r2_score(y_test, rf_pred)
rf_mae = mean_absolute_error(y_test, rf_pred)

print(f"Random Forest - R²: {rf_r2:.4f}, MSE: {rf_mse:.4f}, MAE: {rf_mae:.4f}")


In [None]:
# Train Gradient Boosting
gb_model = GradientBoostingRegressor(n_estimators=100, random_state=42)
gb_model.fit(X_train, y_train)

gb_pred = gb_model.predict(X_test)
gb_mse = mean_squared_error(y_test, gb_pred)
gb_r2 = r2_score(y_test, gb_pred)
gb_mae = mean_absolute_error(y_test, gb_pred)

print(f"Gradient Boosting - R²: {gb_r2:.4f}, MSE: {gb_mse:.4f}, MAE: {gb_mae:.4f}")


In [None]:
# Save best model
best_model = rf_model if rf_r2 > gb_r2 else gb_model
model_name = 'rf_model' if rf_r2 > gb_r2 else 'gb_model'

os.makedirs('../models', exist_ok=True)
with open(f'../models/{model_name}.pkl', 'wb') as f:
    pickle.dump(best_model, f)

print(f"Saved best model: {model_name}")
