In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import random
import os
import yaml
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

with open("config.yaml", 'r') as file:
    config = yaml.safe_load(file)

SEED = config['general']['seed']
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
tf.keras.utils.set_random_seed(SEED)
tf.config.experimental.enable_op_determinism()

file_path = config['op_price']['data']['historical_data_path']
df = pd.read_csv(file_path, parse_dates=['Date'], index_col='Date')

scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(df[['OP_Price']])

def create_sequences(data, seq_length):
    sequences = []
    labels = []
    for i in range(len(data) - seq_length):
        sequences.append(data[i:i+seq_length])
        labels.append(data[i+seq_length])
    return np.array(sequences), np.array(labels)

seq_length = config['op_price']['sequence']['seq_length']
X, y = create_sequences(data_scaled, seq_length)

split = int(config['op_price']['split_ratio']['train_test_split_ratio'] * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

def create_lstm_model():
    model = Sequential([
        LSTM(config['op_price']['model']['lstm_units'], return_sequences=True, input_shape=(seq_length, 1)),
        Dropout(config['op_price']['model']['dropout_rate']),
        LSTM(config['op_price']['model']['lstm_units'], return_sequences=False),
        Dropout(config['op_price']['model']['dropout_rate']),
        Dense(config['op_price']['model']['dense_units'], activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer=config['op_price']['model']['optimizer'], 
                  loss=config['op_price']['model']['loss_function'])
    return model

model = create_lstm_model()

history = model.fit(X_train, y_train, 
                    validation_data=(X_test, y_test), 
                    epochs=config['op_price']['model']['epochs'],
                    batch_size=config['op_price']['model']['batch_size'], 
                    verbose=0)

y_pred = model.predict(X_test)
y_test_actual = scaler.inverse_transform(y_test)
y_pred_actual = scaler.inverse_transform(y_pred)

r2 = r2_score(y_test_actual, y_pred_actual)
mae = mean_absolute_error(y_test_actual, y_pred_actual)
mse = mean_squared_error(y_test_actual, y_pred_actual)
rmse = np.sqrt(mse)
print(f"R-squared: {r2}, MAE: {mae}, RMSE: {rmse}")

plt.figure(figsize=(12, 6))
plt.plot(df.index[split+seq_length:], y_test_actual, label='Actual OP Price', color='blue')
plt.plot(df.index[split+seq_length:], y_pred_actual, label='Predicted OP Price', color='red', linestyle='dashed')
plt.xlabel('Date')
plt.ylabel('OP Price')
plt.legend()
plt.title('Actual vs Predicted OP Price')
plt.show()

future_dates = pd.date_range(start=config['op_price']['forecast']['forecast_start_date'], 
                             end=config['op_price']['forecast']['forecast_end_date'])
future_preds = []
last_sequence = data_scaled[-seq_length:].reshape(1, seq_length, 1)

for _ in range(len(future_dates)):
    pred = model.predict(last_sequence)[0]
    future_preds.append(pred)
    last_sequence = np.append(last_sequence[:, 1:, :], 
                              [[pred + np.random.normal(0, config['op_price']['forecast']['fluctuation_factor'])]], 
                              axis=1)

future_preds_actual = scaler.inverse_transform(future_preds)

plt.figure(figsize=(12, 6))
plt.plot(df.index, df['OP_Price'], label='Historical OP Price', color='blue')
plt.plot(future_dates, future_preds_actual, label='Predicted OP Price', color='red')
plt.xlabel('Date')
plt.ylabel('OP Price')
plt.legend()
plt.title('Future OP Price Prediction (2024-2026)')
plt.show()

future_df = pd.DataFrame({'Date': future_dates, 'Predicted_OP_Price': future_preds_actual.flatten()})
future_df.to_csv(config['op_price']['data']['prediction_output_path'], index=False)

print(f"Predicted OP Prices saved successfully in {config['op_price']['data']['prediction_output_path']}")
