In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import random
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import os

# Set seed for reproducibility
SEED = 42
np.random.seed(SEED)
random.seed(SEED)
tf.random.set_seed(SEED)
os.environ['PYTHONHASHSEED'] = str(SEED)

# Ensure TensorFlow uses deterministic operations
tf.keras.utils.set_random_seed(SEED)
tf.config.experimental.enable_op_determinism()

# Load the dataset
file_path = "../../Dataset/Historical_Data/OP_Price/OP_Price_Historical_Data.csv"
df = pd.read_csv(file_path, parse_dates=['Date'], index_col='Date')

# Normalize the OP Price data
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(df[['OP_Price']])

# Create sequences using a rolling window approach
def create_sequences(data, seq_length):
    sequences = []
    labels = []
    for i in range(len(data) - seq_length):
        sequences.append(data[i:i+seq_length])
        labels.append(data[i+seq_length])
    return np.array(sequences), np.array(labels)

seq_length = 30  # Using past 30 days to predict the next day
X, y = create_sequences(data_scaled, seq_length)

# Split the data into training and testing sets (80% train, 20% test)
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# Define the LSTM model
def create_lstm_model():
    model = Sequential([
        LSTM(100, return_sequences=True, input_shape=(seq_length, 1)),
        Dropout(0.2),
        LSTM(100, return_sequences=False),
        Dropout(0.2),
        Dense(50, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

model = create_lstm_model()

# Train the model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, batch_size=16, verbose=0)

# Evaluate the model
y_pred = model.predict(X_test)
y_test_actual = scaler.inverse_transform(y_test)
y_pred_actual = scaler.inverse_transform(y_pred)

r2 = r2_score(y_test_actual, y_pred_actual)
mae = mean_absolute_error(y_test_actual, y_pred_actual)
mse = mean_squared_error(y_test_actual, y_pred_actual)
rmse = np.sqrt(mse)
print(f"R-squared: {r2}, MAE: {mae}, RMSE: {rmse}")

# Visualizing actual vs predicted prices
plt.figure(figsize=(12, 6))
plt.plot(df.index[split+seq_length:], y_test_actual, label='Actual OP Price', color='blue')
plt.plot(df.index[split+seq_length:], y_pred_actual, label='Predicted OP Price', color='red', linestyle='dashed')
plt.xlabel('Date')
plt.ylabel('OP Price')
plt.legend()
plt.title('Actual vs Predicted OP Price')
plt.show()

# Forecast future values
future_dates = pd.date_range(start='2024-12-01', end='2026-12-31')
future_preds = []
last_sequence = data_scaled[-seq_length:].reshape(1, seq_length, 1)

for _ in range(len(future_dates)):
    pred = model.predict(last_sequence)[0]
    future_preds.append(pred)
    last_sequence = np.append(last_sequence[:, 1:, :], [[pred + np.random.normal(0, 0.05)]], axis=1)  # Adding fluctuation

# Transform predictions back to original scale
future_preds_actual = scaler.inverse_transform(future_preds)

# Plot future predictions
plt.figure(figsize=(12, 6))
plt.plot(df.index, df['OP_Price'], label='Historical OP Price', color='blue')
plt.plot(future_dates, future_preds_actual, label='Predicted OP Price', color='red')
plt.xlabel('Date')
plt.ylabel('OP Price')
plt.legend()
plt.title('Future OP Price Prediction (2024-2026)')
plt.show()

# Save predictions to CSV
future_df = pd.DataFrame({'Date': future_dates, 'Predicted_OP_Price': future_preds_actual.flatten()})
future_df.to_csv("../../Dataset/Prediction_Data/OP_Price/OP_Price_Future_Data.csv", index=False)

print("Predicted OP Prices saved successfully in '../../Dataset/Prediction_Data/OP_Price/OP_Price_Future_Data.csv'")
