# Setup


In [4]:
import importlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator

# import modellib.backtest as bt
# import modellib.modelclass as mc
import modellib.evaluate as eval
import modellib.lstm as lstm

# Define Constants


In [5]:
REPO_PATH = "hf://datasets/Creatorin/solarpower_processed/"

SEQUENCE_LENGTH = 24
BATCH_SIZE = 8
TARGET_COLUMN = 'Leistung'

# Set random seed
np.random.seed(42)

# Load Data from Huggingface


In [6]:
# Def splits
splits = {'train': 'train_ts.csv', 'validation': 'val_ts.csv', 'test': 'test_ts.csv'}

# Load data
train_ts = pd.read_csv(REPO_PATH + splits["train"], index_col=0, date_format="%Y-%m-%d %H:%M:%S")
val_ts = pd.read_csv(REPO_PATH + splits["validation"], index_col=0, date_format="%Y-%m-%d %H:%M:%S")
test_ts = pd.read_csv(REPO_PATH + splits["test"], index_col=0, date_format="%Y-%m-%d %H:%M:%S")

# Copy train_ts to undo normalisation later
train_ts_copy = train_ts.copy()

# Confirm Shapes
print(f"Train Shape: {train_ts.shape}, Validation Shape: {val_ts.shape}, Test Shape: {test_ts.shape}")

FileNotFoundError: Creatorin/solarpower_processed/train_ts.csv (repository not found)

# Prepare Data for LSTM


In [None]:
# Create sequences
def create_sequences(series: pd.Series, target_column: str, sequence_length: int = 24, batch_size: int = 8) -> (np.ndarray, np.ndarray):
    features = series.values
    target = series[target_column].values
    
    data_gen = TimeseriesGenerator(
        features, 
        target, 
        sequence_length, 
        batch_size
    )
    
    X, y = [], []
    for i in range(len(data_gen)):
        x, y_batch = data_gen[i]
        X.append(x)
        y.append(y_batch)
        
    X = np.concatenate(X)
    y = np.concatenate(y)
        
    return X, y


In [None]:
# Create Sequences
X_train, y_train = create_sequences(train_ts, TARGET_COLUMN, SEQUENCE_LENGTH, BATCH_SIZE)
X_val, y_val = create_sequences(val_ts, TARGET_COLUMN, SEQUENCE_LENGTH, BATCH_SIZE)
X_test, y_test = create_sequences(test_ts, TARGET_COLUMN, SEQUENCE_LENGTH, BATCH_SIZE)

# Print the shapes
print("Training set - X shape:", X_train.shape, "y shape:", y_train.shape)
print("Validation set - X shape:", X_val.shape, "y shape:", y_val.shape)
print("Test set - X shape:", X_test.shape, "y shape:", y_test.shape)

In [None]:
# Create the model
input_shape = (X_train.shape[1], X_train.shape[2])
model = lstm.create_lstm_model(input_shape)

# Train the model
history = lstm.train_lstm_model(model, X_train, y_train, X_val, y_val)

# Evaluate the model
test_loss = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {test_loss}")

# Make predictions
predictions = model.predict(X_test)

# Get performance metrics
metrics = eval.evaluate_model(y_test, predictions)
print(metrics)



In [None]:
# Plot train val losses
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show()

# Plot predictions 
plt.plot(figsize=(10, 6))
plt.plot(y_test, label='True')
plt.plot(predictions, label='Predicted')
plt.legend()
plt.show()