In [29]:
# import dependencies
import pandas as pd
import numpy as np
import darts
from darts import TimeSeries
from darts.models import AutoARIMA
from darts.metrics import mae, rmse
import plotly.graph_objects as go
from darts.utils.timeseries_generation import datetime_attribute_timeseries

In [45]:
df = pd.read_csv("Enriched_data.csv")
df.head()

Unnamed: 0,timestamp,Forecast [MW],Actual Load [MW],temperature,humidity,wind_speed,dayofyear,dayofmonth,dayofweek,hour,holiday
0,2020-01-01 00:00:00,45898.75,43968.5,1.4,88.45,7.795,1,1,2,0,1
1,2020-01-01 01:00:00,43864.25,43047.0,1.155,89.4,8.665,1,1,2,1,1
2,2020-01-01 02:00:00,42938.0,41895.0,0.94,89.35,8.85,1,1,2,2,1
3,2020-01-01 03:00:00,42835.75,40701.75,0.655,89.0,8.53,1,1,2,3,1
4,2020-01-01 04:00:00,42953.0,40681.25,0.435,89.55,8.625,1,1,2,4,1


In [46]:
df.columns

Index(['timestamp', 'Forecast [MW]', 'Actual Load [MW]', 'temperature',
       'humidity', 'wind_speed', 'dayofyear', 'dayofmonth', 'dayofweek',
       'hour', 'holiday'],
      dtype='object')

In [31]:
# data filteration
relevant_cols = ["timestamp", "Actual Load [MW]"]
df_arima = df[relevant_cols].copy()
# df_arima.index = df_arima["timestamp"]
# df_arima.drop(columns="timestamp", inplace=True)

# Convert to a Darts TimeSeries
series = TimeSeries.from_dataframe(df_arima, time_col='timestamp', value_cols='Actual Load [MW]')

# Train, Validation, and Test Split
train_size = int(0.7 * len(series))  # 70% for training
test_size = int(0.3 * len(series))   # 30% for test
train_series = series[:train_size]
test_series = series[train_size:]

# Define a function for ARIMA model training and evaluation
def evaluate_arima(train, test, steps):
    """
    Train ARIMA model on the training set, validate on the validation set,
    and test on the test set.
    """
    # Train ARIMA model
    model = AutoARIMA()
    model.fit(train)
    
    # Predict on the test set
    test_forecast = model.predict(len(test))
    
    # Evaluate test predictions
    test_mae = mae(test, test_forecast)
    test_rmse = rmse(test, test_forecast)
    print(f"Test MAE: {test_mae:.2f}")
    print(f"Test RMSE: {test_rmse:.2f}")
    
    # Return results
    return test_forecast

In [32]:
# Evaluate ARIMA
test_forecast = evaluate_arima(train_series, test_series, steps=len(test_series))

Test MAE: 10131.10
Test RMSE: 12587.84


In [33]:
# Create figure with secondary axis
fig = go.Figure()

# Add Forecast line (primary y-axis)
fig.add_trace(go.Scatter(x=train_series.time_index, y=train_series.values().flatten(),
                         mode='lines', name='train data'))

# Add Actual Load line (primary y-axis)
fig.add_trace(go.Scatter(x=test_series.time_index, y=test_series.values().flatten(),
                         mode='lines', name='test data'))

# Add Actual Load line (primary y-axis)
fig.add_trace(go.Scatter(x=test_forecast.time_index, y=test_forecast.values().flatten(),
                         mode='lines', name='forecast'))

# Update layout for secondary y-axis
fig.update_layout(
    template='presentation',  # Dark background template
    title='Arima Pridiction',
    xaxis_title='Time',
    yaxis_title='Actual Load [MW]',
    xaxis=dict(showgrid=True, gridcolor='gray'),
    yaxis=dict(showgrid=True, gridcolor='gray'),
    plot_bgcolor='rgba(0, 0, 0, 0)',
    legend=dict(
        x=0,  # X-position (0 is left, 1 is right)
        y=1.1,  # Y-position (1 is top, 0 is bottom)
        traceorder='normal',
        orientation='h',  # Horizontal orientation
        bgcolor='rgba(0, 0, 0, 0)',  # Transparent background for legend
    )
)

# Show the plot
fig.show()

In [2]:
from pydantic import BaseModel, Field
import pandas as pd
from darts import TimeSeries
from darts.models import ARIMA, RNNModel
from sklearn.model_selection import TimeSeriesSplit
# from darts.utils.timeseries_generation import split_series
import os

class ModelConfig(BaseModel):
    forecast_horizon: int = Field(..., description="Forecasting horizon in hours")
    training_horizon: int = Field(..., description="Training horizon in hours")
    n_splits: int = Field(..., description="Number of train-test splits")
    model_name: str = Field(..., description="Name of the model")
    output_dir: str = Field(..., description="Directory to save CSV outputs")


# Load dataset
dataset = pd.read_csv("Enriched_data.csv")

# Define ARIMA Configuration
arima_config = ModelConfig(
    forecast_horizon=24,
    training_horizon=720,
    n_splits=5,
    model_name="ARIMA", #"LSTM", "BiLSTM", "HCNN", "CRCNN"
    output_dir="outputs"
)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def preprocess_data(dataset: pd.DataFrame, config: ModelConfig):
    
    """
    Preprocess the dataset into a Darts TimeSeries object.
    """

    if config.model_name == "ARIMA":
        return TimeSeries.from_dataframe(
            dataset, 
            "timestamp", 
            "Actual Load [MW]"
            )
    
    elif config.model_name == "LSTM":
        return TimeSeries.from_dataframe(
            dataset, 
            "timestamp", 
            ["Actual Load [MW]", 
            "temperature",
            "humidity", 
            "wind_speed", 
            "dayofyear",
            "dayofmonth", 
            "dayofweek",
            "hour", 
            "holiday",]
            )

In [43]:
from pydantic import BaseModel, Field
import pandas as pd
import numpy as np
from darts import TimeSeries
from darts.models import ARIMA, RNNModel
from sklearn.model_selection import TimeSeriesSplit
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dense
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

def preprocess_data(dataset: pd.DataFrame, config: ModelConfig):
    # Normalizing the dataset
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(dataset[['Actual Load [MW]', 'temperature', 'humidity', 
                                                'wind_speed', 'dayofyear', 'dayofmonth', 
                                                'dayofweek', 'hour', 'holiday']].values)
    
    # # Create the time series data
    # X, y = [], []
    # for i in range(config.training_horizon, len(scaled_data) - config.forecast_horizon):
    #     X.append(scaled_data[i-config.training_horizon:i])
    #     y.append(scaled_data[i + config.forecast_horizon, 0])  # Predicting 'Actual Load [MW]'
    # scaled_data = pd.DataFrame(scaled_data)
    return scaled_data, scaler

In [15]:
from darts import TimeSeries
from sklearn.preprocessing import MinMaxScaler
from darts.dataprocessing.transformers import Scaler

def preprocess_data(dataset: pd.DataFrame, config: ModelConfig):
    # Normalizing the dataset using Darts' Scaler
    scaler = MinMaxScaler(feature_range=(-1, 1))
    transformer = Scaler(scaler)
    
    # Create a TimeSeries object for the dataset
    ts = TimeSeries.from_dataframe(dataset, time_col="timestamp", value_cols=['Actual Load [MW]', 'temperature', 'humidity', 
                                                                              'wind_speed', 'dayofyear', 'dayofmonth', 
                                                                              'dayofweek', 'hour', 'holiday'])
    
    # Fit and transform the data using Darts' Scaler
    scaled_ts = transformer.fit_transform(ts)
    
    # Return the scaled TimeSeries object and the scaler for inverse scaling
    return scaled_ts, transformer


In [44]:
# Load dataset
dataset = pd.read_csv("Enriched_data.csv")

# Define ARIMA Configuration
arima_config = ModelConfig(
    forecast_horizon=24,
    training_horizon=720,
    n_splits=5,
    model_name="ARIMA", #"LSTM", "BiLSTM", "HCNN", "CRCNN"
    output_dir="outputs"
)

In [45]:
data, scalar = preprocess_data(dataset, arima_config)

In [46]:
data[:240, 1:].shape

(240, 8)

In [48]:
data.shape

(43583, 9)

In [51]:
X = data[43583 - 240-24-240 : 43583 - 240, 1:]
y = data[43583 - 240-24-240 : 43583 - 240, 0]

In [52]:
X.shape

(264, 8)

In [55]:
from pydantic import BaseModel, Field
import pandas as pd
import numpy as np
from darts import TimeSeries
from darts.models import ARIMA, RNNModel
from sklearn.model_selection import TimeSeriesSplit
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dense
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split


class ModelConfig(BaseModel):
    forecast_horizon: int = Field(..., description="Forecasting horizon in hours")
    training_horizon: int = Field(..., description="Training horizon in hours")
    n_splits: int = Field(..., description="Number of train-test splits")
    model_name: str = Field(..., description="Name of the model")
    output_dir: str = Field(..., description="Directory to save CSV outputs")


def preprocess_data(dataset: pd.DataFrame, config: ModelConfig):
    # Normalizing the dataset
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(dataset[['Actual Load [MW]', 'temperature', 'humidity', 
                                                'wind_speed', 'dayofyear', 'dayofmonth', 
                                                'dayofweek', 'hour', 'holiday']].values)
    
    return scaled_data, scaler

def build_model(config: ModelConfig, input_shape: tuple):
    model = Sequential()
    
    if config.model_name == "LSTM":
        model.add(LSTM(units=2, return_sequences=False, input_shape=input_shape))
    elif config.model_name == "BiLSTM":
        model.add(Bidirectional(LSTM(units=2, return_sequences=False), input_shape=input_shape))
    
    model.add(Dense(units=1))  # Output layer for predicting one value (Actual Load [MW])
    
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

def train_and_forecast(dataset: pd.DataFrame, config: ModelConfig):
    """
    Train the LSTM or BiLSTM model and forecast based on the provided dataset and configuration.
    """
    scaled_data, scaler = preprocess_data(dataset, config)

    forecasts = []
    
    # Custom sliding window split logic
    total_length = len(scaled_data)
    train_size = config.training_horizon
    test_size = config.forecast_horizon
    splits_start = []

    for i in reversed(range(config.n_splits)):
        train_start = total_length - train_size - test_size - i * test_size
        train_end = train_start + train_size
        test_start = train_end
        test_end = test_start + test_size

        if train_start < 0 or test_end > total_length:
            break

        # train_indices = np.arange(train_start, train_end)
        # test_indices = np.arange(test_start, test_end)
        splits_start.append((train_start, test_start))

    for train_start, test_start in splits_start:
        X = scaled_data[train_start : train_start + train_size + test_size , 1:]
        y = scaled_data[train_start : train_start + train_size + test_size , 0]
    
        # Train-test split (you can modify this if you want to do cross-validation)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, shuffle=False)
    
        model = build_model(config, input_shape=(X_train.shape[0], X_train.shape[1]))
        
        model.fit(X_train, y_train, epochs=10, batch_size=24, verbose=1)
        
        # Forecasting (using the test data)
        predictions = model.predict(X_test)
        
        # Inverse scaling to get the actual values
        predictions = scaler.inverse_transform(np.hstack((predictions, np.zeros((predictions.shape[0], X_test.shape[2]-1)))))
        
    
    return predictions



def save_forecast(forecasts: np.ndarray, dataset: pd.DataFrame, config: ModelConfig):
    """
    Save the forecast results to a CSV file, including timestamps.
    """
    # Get the timestamps from the dataset, assuming the dataset is ordered and contains the 'timestamp' column.
    forecast_timestamps = dataset.iloc[-len(forecasts):]['timestamp'].values
    
    # Create a DataFrame for the forecasts and timestamps
    forecast_df = pd.DataFrame({
        'timestamp': forecast_timestamps,
        'forecast': forecasts.flatten()  # Ensure the forecast is a 1D array
    })
    
    # Ensure output directory exists
    os.makedirs(config.output_dir, exist_ok=True)
    
    # Define the output file path
    output_path = os.path.join(config.output_dir, f"{config.model_name}_forecasts.csv")
    
    # Save the DataFrame to a CSV file
    forecast_df.to_csv(output_path, index=False)
    
    print(f"Forecasts saved to {output_path}")


# Example usage:
config = ModelConfig(
    forecast_horizon=24,
    training_horizon=240,
    n_splits=5,
    model_name="LSTM",  # or "LSTM"
    output_dir="outputs"
)

# Load dataset
dataset = pd.read_csv("Enriched_data.csv")

# Assuming 'dataset' is your time series data as a pandas DataFrame
predictions = train_and_forecast(dataset, config)



Epoch 1/10


  super().__init__(**kwargs)


ValueError: Exception encountered when calling Sequential.call().

[1mInvalid input shape for input Tensor("data:0", shape=(None, 8), dtype=float32). Expected shape (None, 237, 8), but input has incompatible shape (None, 8)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(None, 8), dtype=float32)
  • training=True
  • mask=None

In [135]:
dataset.shape

(43583, 11)

In [56]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dense
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

def preprocess_data(dataset: pd.DataFrame, config: ModelConfig):
    # Normalizing the dataset
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(dataset[['Actual Load [MW]', 'temperature', 'humidity', 
                                                'wind_speed', 'dayofyear', 'dayofmonth', 
                                                'dayofweek', 'hour', 'holiday']].values)
    
    # Create the time series data
    X, y = [], []
    for i in range(config.training_horizon, len(scaled_data) - config.forecast_horizon):
        X.append(scaled_data[i-config.training_horizon:i])
        y.append(scaled_data[i + config.forecast_horizon, 0])  # Predicting 'Actual Load [MW]'
    
    return np.array(X), np.array(y), scaler

def build_model(config: ModelConfig, input_shape: tuple):
    model = Sequential()
    
    if config.model_name == "LSTM":
        model.add(LSTM(units=5, return_sequences=False, input_shape=input_shape))
    elif config.model_name == "BiLSTM":
        model.add(Bidirectional(LSTM(units=5, return_sequences=False), input_shape=input_shape))
    
    model.add(Dense(units=1))  # Output layer for predicting one value (Actual Load [MW])
    
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

def train_and_forecast(dataset: pd.DataFrame, config: ModelConfig):
    """
    Train the LSTM or BiLSTM model and forecast based on the provided dataset and configuration.
    """
    X, y, scaler = preprocess_data(dataset, config)
    
    # Train-test split (you can modify this if you want to do cross-validation)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
    
    model = build_model(config, input_shape=(X_train.shape[1], X_train.shape[2]))
    
    model.fit(X_train, y_train, epochs=2, batch_size=32, verbose=1)
    
    # Forecasting (using the test data)
    predictions = model.predict(X_test)
    
    # Inverse scaling to get the actual values
    predictions = scaler.inverse_transform(np.hstack((predictions, np.zeros((predictions.shape[0], X_test.shape[2]-1)))))
    
    return predictions

# Example usage:
config = ModelConfig(
    forecast_horizon=24,
    training_horizon=720,
    n_splits=5,
    model_name="BiLSTM",  # or "LSTM"
    output_dir="outputs"
)

# Assuming 'dataset' is your time series data as a pandas DataFrame
predictions = train_and_forecast(dataset, config)


  super().__init__(**kwargs)


Epoch 1/2
[1m1071/1071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m109s[0m 89ms/step - loss: 0.0250
Epoch 2/2
[1m1071/1071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 90ms/step - loss: 0.0085
[1m268/268[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 22ms/step


In [73]:
predictions[:,0].shape


(8568,)

In [132]:
def preprocess_data(dataset: pd.DataFrame, config: ModelConfig):
    # Normalizing the dataset
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(dataset[['Actual Load [MW]', 'temperature', 'humidity', 
                                                'wind_speed', 'dayofyear', 'dayofmonth', 
                                                'dayofweek', 'hour', 'holiday']].values)
    
    # Create the time series data
    X, y = [], []
    for i in range(config.training_horizon, len(scaled_data) - config.forecast_horizon):
        X.append(scaled_data[i-config.training_horizon:i])
        y.append(scaled_data[i + config.forecast_horizon, 0])  # Predicting 'Actual Load [MW]'
    
    return np.array(X), np.array(y), scaler

# Load the dataset
# Example usage:
config = ModelConfig(
    forecast_horizon=24,
    training_horizon=720,
    n_splits=5,
    model_name="BiLSTM",  # or "LSTM"
    output_dir="outputs"
)
dataset = pd.read_csv('Enriched_data.csv')
X, y, scaler = preprocess_data(dataset, config)

In [134]:
X.shape

(42839, 720, 9)

In [63]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt

# Load the dataset
dataset = pd.read_csv('Enriched_data.csv')

# Parse the timestamp column
dataset['timestamp'] = pd.to_datetime(dataset['timestamp'])
dataset.set_index('timestamp', inplace=True)

# Define the target variable and features
target_column = 'Actual Load [MW]'
feature_columns = ['temperature', 'humidity', 'wind_speed', 'dayofyear', 'dayofmonth', 'dayofweek', 'hour', 'holiday']

# Extract features and target variable
X = dataset[feature_columns]
y = dataset[target_column]

# Normalize the features using MinMaxScaler
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y.values.reshape(-1, 1))

In [70]:
X_scaled.shape

(43583, 8)

In [67]:
len(y_scaled)

43583

In [68]:
# Prepare the data for LSTM (supervised learning, creating sequences)
def create_sequences(X, y, seq_length=24):
    X_seq, y_seq = [], []
    for i in range(seq_length, len(X)):
        X_seq.append(X[i-seq_length:i])
        y_seq.append(y[i])
    return np.array(X_seq), np.array(y_seq)

# Create sequences (use 24 hours of previous data to predict the next hour)
seq_length = 24
X_seq, y_seq = create_sequences(X_scaled, y_scaled, seq_length)

In [72]:
X_seq.shape

(43559, 24, 8)

In [116]:
# Example usage:
config = ModelConfig(
    forecast_horizon=24,
    training_horizon=240,
    n_splits=5,
    model_name="BiLSTM",  # or "LSTM"
    output_dir="outputs"
)
# Load the dataset
dataset = pd.read_csv('Enriched_data.csv')

def df_splits(dataset: pd.DataFrame, config: ModelConfig):
    df_splits = []
    for i in reversed(range(config.n_splits)):
        start_row = dataset.shape[0] - config.training_horizon - config.forecast_horizon - i * config.forecast_horizon
        end_row = start_row + config.training_horizon #+ config.forecast_horizon
        df = dataset.iloc[start_row:end_row].copy()
        df_splits.append(df)
    return df_splits

splits = df_splits(dataset, config)

In [117]:
splits[0]

Unnamed: 0,timestamp,Forecast [MW],Actual Load [MW],temperature,humidity,wind_speed,dayofyear,dayofmonth,dayofweek,hour,holiday
43223,2024-12-05 23:00:00,52208.25,57316.75,4.690,82.70,22.645,340,5,3,23,0
43224,2024-12-06 00:00:00,48982.75,54171.25,4.810,83.65,22.530,341,6,4,0,0
43225,2024-12-06 01:00:00,47070.25,52450.00,5.185,84.85,22.695,341,6,4,1,0
43226,2024-12-06 02:00:00,45899.00,51276.50,5.450,86.45,22.380,341,6,4,2,0
43227,2024-12-06 03:00:00,46195.25,50857.25,5.640,87.50,22.910,341,6,4,3,0
...,...,...,...,...,...,...,...,...,...,...,...
43458,2024-12-15 18:00:00,53751.75,62357.75,6.055,86.90,23.495,350,15,6,18,0
43459,2024-12-15 19:00:00,52739.00,61399.75,6.340,87.10,23.405,350,15,6,19,0
43460,2024-12-15 20:00:00,50472.00,59068.25,6.570,86.90,23.640,350,15,6,20,0
43461,2024-12-15 21:00:00,48733.00,56573.75,6.900,86.55,23.480,350,15,6,21,0


In [None]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dense
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

def preprocess_data(dataset: pd.DataFrame, config: ModelConfig):
    # Normalizing the dataset
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(dataset[['Actual Load [MW]', 'temperature', 'humidity', 
                                                'wind_speed', 'dayofyear', 'dayofmonth', 
                                                'dayofweek', 'hour', 'holiday']].values)
    
    # Create the time series data
    X, y = [], []
    for i in range(1):
        X.append(scaled_data[i-config.training_horizon:i])
        y.append(scaled_data[i + config.forecast_horizon, 0])  # Predicting 'Actual Load [MW]'
    
    return np.array(X), np.array(y), scaler

def build_model(config: ModelConfig, input_shape: tuple):
    model = Sequential()
    
    if config.model_name == "LSTM":
        model.add(LSTM(units=5, return_sequences=False, input_shape=input_shape))
    elif config.model_name == "BiLSTM":
        model.add(Bidirectional(LSTM(units=5, return_sequences=False), input_shape=input_shape))
    
    model.add(Dense(units=1))  # Output layer for predicting one value (Actual Load [MW])
    
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

def train_and_forecast(dataset: pd.DataFrame, config: ModelConfig):
    """
    Train the LSTM or BiLSTM model and forecast based on the provided dataset and configuration.
    """
    X, y, scaler = preprocess_data(dataset, config)
    
    # Train-test split (you can modify this if you want to do cross-validation)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, shuffle=False)
    
    model = build_model(config, input_shape=(X_train.shape[1], X_train.shape[2]))
    
    model.fit(X_train, y_train, epochs=2, batch_size=32, verbose=1)
    
    # Forecasting (using the test data)
    predictions = model.predict(X_test)
    
    # Inverse scaling to get the actual values
    predictions = scaler.inverse_transform(np.hstack((predictions, np.zeros((predictions.shape[0], X_test.shape[2]-1)))))
    
    return predictions




# Example usage:
config = ModelConfig(
    forecast_horizon=24,
    training_horizon=240,
    n_splits=5,
    model_name="LSTM",  # or "LSTM"
    output_dir="outputs"
)

# # Assuming 'dataset' is your time series data as a pandas DataFrame
# predictions = train_and_forecast(dataset, config)

predictions = []
for data in splits:
    X, y, scaler = preprocess_data(data, config)
    # Assuming 'dataset' is your time series data as a pandas DataFrame
    prediction = train_and_forecast(data, config)
    predictions.append(prediction)


ValueError: With n_samples=1, test_size=0.1 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

In [125]:
y

array([0.29506367])

In [119]:
predictions[0].shape

IndexError: list index out of range

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt




def preprocess_data(dataset: pd.DataFrame, config: ModelConfig):
    # Parse the timestamp column
    dataset['timestamp'] = pd.to_datetime(dataset['timestamp'])
    dataset.set_index('timestamp', inplace=True)

    # Define the target variable and features
    target_column = 'Actual Load [MW]'
    feature_columns = ['temperature'] #, 'humidity', 'wind_speed', 'dayofyear', 'dayofmonth', 'dayofweek', 'hour', 'holiday']

    # Extract features and target variable
    X = dataset[feature_columns]
    y = dataset[target_column]

    # Normalize the features using MinMaxScaler
    scaler_X = MinMaxScaler()
    scaler_y = MinMaxScaler()

    X_scaled = scaler_X.fit_transform(X)
    y_scaled = scaler_y.fit_transform(y.values.reshape(-1, 1))

    return X_scaled, y_scaled, scaler_X, scaler_y


def train_and_forecast(dataset: pd.DataFrame, config: ModelConfig):

    X_scaled, y_scaled, scaler_X, scaler_y = preprocess_data(dataset, config)

    # Prepare the data for LSTM (supervised learning, creating sequences)
    def create_sequences(X, y, seq_length = config.training_horizon, target_length = config.forecast_horizon):
        X_seq, y_seq = [], []
        for i in reversed(range(config.n_splits)):
            start = len(X) - seq_length - target_length - i * (target_length)
            end = start + seq_length
            X_seq.append(X[start:end])
            y_seq.append(y[end:end+target_length])  # Predict a sequence of 'target_length' time steps
        return np.array(X_seq), np.array(y_seq)

    # Create sequences (use 24 hours of previous data to predict the next 24 hours)
    seq_length = config.training_horizon # change the name to train horizon
    target_length = config.forecast_horizon
    X_seq, y_seq = create_sequences(X_scaled, y_scaled, seq_length, target_length)

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.3, shuffle=False)

    # Build the LSTM model
    model = Sequential()
    model.add(LSTM(units=64, return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dense(units=target_length))  # Predict a sequence of 'target_length' values

    model.compile(optimizer=Adam(), loss='mean_squared_error')

    # Train the model
    model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

    # Make predictions
    predictions_scaled = model.predict(X_test)

    # Inverse transform the scaled predictions and true values
    predictions = scaler_y.inverse_transform(predictions_scaled)
    predictions = predictions.flatten()

    # Reshape y_test to 2D for inverse transformation
    y_test_reshaped = y_test.reshape(-1, target_length)
    y_test_inverse = scaler_y.inverse_transform(y_test_reshaped)

    return predictions

# Load the dataset
dataset = pd.read_csv('Enriched_data.csv')

# Example usage:
config = ModelConfig(
    forecast_horizon=24,
    training_horizon=720,
    n_splits=1440,
    model_name="LSTM",  # or "LSTM"
    output_dir="outputs"
)

predictions = train_and_forecast(dataset, config)

Epoch 1/20


  super().__init__(**kwargs)


[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 162ms/step - loss: 0.1841 - val_loss: 0.0281
Epoch 2/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 149ms/step - loss: 0.0276 - val_loss: 0.0213
Epoch 3/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 147ms/step - loss: 0.0233 - val_loss: 0.0193
Epoch 4/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 149ms/step - loss: 0.0218 - val_loss: 0.0184
Epoch 5/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 149ms/step - loss: 0.0187 - val_loss: 0.0159
Epoch 6/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 149ms/step - loss: 0.0158 - val_loss: 0.0150
Epoch 7/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 150ms/step - loss: 0.0156 - val_loss: 0.0173
Epoch 8/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 149ms/step - loss: 0.0163 - val_loss: 0.0149
Epoch 9/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━

In [190]:
dataset = pd.read_csv('Enriched_data.csv')
dataset["timestamp"]

0        2020-01-01 00:00:00
1        2020-01-01 01:00:00
2        2020-01-01 02:00:00
3        2020-01-01 03:00:00
4        2020-01-01 04:00:00
                ...         
43578    2024-12-20 18:00:00
43579    2024-12-20 19:00:00
43580    2024-12-20 20:00:00
43581    2024-12-20 21:00:00
43582    2024-12-20 22:00:00
Name: timestamp, Length: 43583, dtype: object

In [184]:
def save_forecast(forecasts: np.ndarray, dataset: pd.DataFrame, config: ModelConfig):
    """
    Save the forecast results to a CSV file, including timestamps.
    """
    # Get the timestamps from the dataset, assuming the dataset is ordered and contains the 'timestamp' column.
    forecast_timestamps = dataset.iloc[-len(forecasts):]['timestamp'].values
    
    # Create a DataFrame for the forecasts and timestamps
    forecast_df = pd.DataFrame({
        'timestamp': forecast_timestamps,
        'forecast': forecasts  # Ensure the forecast is a 1D array
    })
    
    # Ensure output directory exists
    os.makedirs(config.output_dir, exist_ok=True)
    
    # Define the output file path
    output_path = os.path.join(config.output_dir, f"{config.model_name}_forecasts.csv")
    
    # Save the DataFrame to a CSV file
    forecast_df.to_csv(output_path, index=False)
    
    print(f"Forecasts saved to {output_path}")


# Load the dataset
dataset = pd.read_csv('Enriched_data.csv')

# Example usage:
config = ModelConfig(
    forecast_horizon=24,
    training_horizon=720,
    n_splits=1440,
    model_name="LSTM",  # or "LSTM"
    output_dir="outputs"
)

save_forecast(predictions, dataset, config)

(432, 24)

In [None]:
# Calculate Mean Absolute Error
mae = mean_absolute_error(y_test_inverse, predictions)
print(f'Mean Absolute Error (MAE): {mae}')

# Plot the predictions vs actual values for the first sample in the test set
plt.figure(figsize=(10, 6))
plt.plot(y_test_inverse[0], label='Actual Load [MW] (Next 24 hours)')
plt.plot(predictions[0], label='Predicted Load [MW] (Next 24 hours)')
plt.legend()
plt.title('Actual vs Predicted Load [MW] for Next 24 Hours')
plt.show()

In [142]:
predictions.shape

(8708, 24)

In [144]:
predictions.T.shape

(24, 8708)

In [145]:
predict = predictions.flatten()

In [146]:
predict.shape

(208992,)

In [177]:
# X_scaled, y_scaled, scaler_X, scaler_y = preprocess_data(dataset, config)
# Example usage:
config = ModelConfig(
    forecast_horizon=24,
    training_horizon=720,
    n_splits=1440,
    model_name="LSTM",  # or "LSTM"
    output_dir="outputs"
)
# Prepare the data for LSTM (supervised learning, creating sequences)
def create_sequences(X, y, seq_length = config.training_horizon, target_length = config.forecast_horizon):
    X_seq, y_seq = [], []
    for i in reversed(range(config.n_splits)):
        start = len(X) - seq_length - target_length - i * (target_length)
        end = start + seq_length
        X_seq.append(X[start:end])
        y_seq.append(y[end:end+target_length])  # Predict a sequence of 'target_length' time steps
    return np.array(X_seq), np.array(y_seq)

# Create sequences (use 24 hours of previous data to predict the next 24 hours)
seq_length = config.training_horizon # change the name to train horizon
target_length = config.forecast_horizon
X_seq, y_seq = create_sequences(X_scaled, y_scaled, seq_length, target_length)

In [176]:
X.shape

(43583, 8)

In [175]:
X_seq.shape

(1785, 720, 8)

In [153]:
for i in reversed(range(10)):
    print(i)

9
8
7
6
5
4
3
2
1
0


In [5]:
import numpy as np
splits = np.repeat(range(1, 1440 + 1), 24)

In [6]:
len(splits)

34560

In [8]:
10368/432

24.0