In [1]:
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Load the data
df = pd.read_csv('solar_weather.csv')

# Convert 'Time' column to datetime
df['Time'] = pd.to_datetime(df['Time'])
df.set_index('Time', inplace=True)

# Select features for the model
features = ['GHI', 'temp', 'pressure', 'humidity', 'wind_speed', 'rain_1h', 'snow_1h', 'clouds_all', 'isSun', 'sunlightTime', 'dayLength', 'SunlightTime/daylength', 'weather_type', 'hour', 'month']
X = df[features]
y = df['Energy delta[Wh]']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Fit the SARIMAX model
# You may need to adjust the order and seasonal_order parameters based on your data
model = SARIMAX(y_train, exog=X_train_scaled, order=(1, 1, 1), seasonal_order=(1, 1, 1, 24))
results = model.fit()

# Make predictions
predictions = results.forecast(steps=len(X_test), exog=X_test_scaled)

# Calculate RMSE
rmse = np.sqrt(((predictions - y_test) ** 2).mean())
print(f"Root Mean Square Error: {rmse}")

# Plot the results
plt.figure(figsize=(12, 6))
plt.plot(y_test.index, y_test.values, label='Actual')
plt.plot(y_test.index, predictions, label='Predicted')
plt.title('Energy Delta: Actual vs Predicted')
plt.xlabel('Time')
plt.ylabel('Energy Delta [Wh]')
plt.legend()
plt.show()

# Forecast for the next 6 years
future_dates = pd.date_range(start=df.index[-1], periods=6*365*24, freq='H')
future_exog = pd.DataFrame(index=future_dates, columns=features)

# Fill the future exogenous variables (you may need to adjust this based on your data)
future_exog['hour'] = future_dates.hour
future_exog['month'] = future_dates.month
# Fill other columns with appropriate values or use time series forecasting for each feature

# Scale the future exogenous variables
future_exog_scaled = scaler.transform(future_exog)

# Make predictions for the next 6 years
future_predictions = results.forecast(steps=len(future_exog), exog=future_exog_scaled)

# Plot the future predictions
plt.figure(figsize=(12, 6))
plt.plot(future_dates, future_predictions)
plt.title('Energy Delta Forecast for Next 6 Years')
plt.xlabel('Time')
plt.ylabel('Energy Delta [Wh]')
plt.show()

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
