In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
import random
import os
import datetime

# Set random seeds for reproducibility
def set_seeds(seed=42):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    tf.keras.utils.set_random_seed(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
    os.environ['TF_NUM_INTRAOP_THREADS'] = '1'
    os.environ['TF_NUM_INTEROP_THREADS'] = '1'

set_seeds(42)

# Load and prepare data
data = pd.read_csv("../../Dataset/Historical_Data/Liquidity_Activity_Ratio/LAR_Historical_Data.csv")
data['Date'] = pd.to_datetime(data['Date'], dayfirst=True)
data.set_index('Date', inplace=True)

# Extract and scale data
timeseries_data = data['LAR'].values.reshape(-1, 1)
scaler = MinMaxScaler(feature_range=(0, 1))
timeseries_data_scaled = scaler.fit_transform(timeseries_data)

def prepare_data(series, n_steps):
    X, y = [], []
    for i in range(len(series) - n_steps):
        X.append(series[i:i + n_steps])
        y.append(series[i + n_steps])
    return np.array(X), np.array(y)

def build_lstm_model(input_shape):
    model = Sequential([
        LSTM(100, return_sequences=True, input_shape=input_shape),
        Dropout(0.2),
        LSTM(100, return_sequences=True),
        Dropout(0.2),
        LSTM(50),
        Dense(25, activation='relu'),
        Dense(1, activation='relu')
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    return model

def generate_future_predictions(model, last_sequence, n_steps, n_features, scaler, 
                              batch_size=10, end_date=datetime.datetime(2026, 12, 31), 
                              retrain=True):
    predictions_dict = {}  
    current_date = data.index[-1]
    
    current_sequence = last_sequence.reshape(1, n_steps, n_features)
    all_data_scaled = timeseries_data_scaled.copy()
    
    while current_date < end_date:
        batch_predictions = []
        batch_dates = []
        
        # Generate predictions for the batch
        for _ in range(batch_size):
            current_date += pd.DateOffset(days=1)
            if current_date > end_date:
                break
                
            # Predict next value
            next_pred = model.predict(current_sequence, verbose=0)[0, 0]
            
            # Store prediction and date
            batch_predictions.append(next_pred)
            batch_dates.append(current_date)
            
            # Update sequence for next prediction
            current_sequence = np.roll(current_sequence, -1, axis=1)
            current_sequence[0, -1, 0] = next_pred
        
        if not batch_predictions:  # If no predictions were made, break
            break
            
        # Convert predictions back to original scale
        batch_predictions = np.array(batch_predictions).reshape(-1, 1)
        batch_predictions_inv = scaler.inverse_transform(batch_predictions)
        
        # Store predictions in dictionary
        for date, pred in zip(batch_dates, batch_predictions_inv.flatten()):
            predictions_dict[date] = pred
        
        if retrain:
            # Add new predictions to training data
            all_data_scaled = np.vstack([all_data_scaled, batch_predictions])
            
            # Prepare new training data
            X_new, y_new = prepare_data(all_data_scaled, n_steps)
            X_new = X_new.reshape((X_new.shape[0], X_new.shape[1], n_features))
            
            # Retrain model
            model.fit(X_new, y_new, epochs=5, batch_size=16, verbose=0)
            print(f"Model retrained - Current date: {current_date.strftime('%Y-%m-%d')}")
    
    # Convert dictionary to series
    predictions_series = pd.Series(predictions_dict, name='Predicted_LAR')
    predictions_series.index.name = 'Date'
    return predictions_series

# Prepare data for full training
n_steps = 16  # 60-day window
n_features = 1
X, y = prepare_data(timeseries_data_scaled, n_steps)
X = X.reshape((X.shape[0], X.shape[1], n_features))

# Train model on full dataset
print("Training model on complete dataset...")
model = build_lstm_model((n_steps, n_features))
early_stopping = EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)
history = model.fit(X, y, epochs=150, batch_size=16, callbacks=[early_stopping], verbose=1)

# Generate predictions until December 31st, 2026
print("\nGenerating predictions until December 31st, 2026...")
last_sequence = timeseries_data_scaled[-n_steps:]
future_predictions = generate_future_predictions(
    model=model,
    last_sequence=last_sequence,
    n_steps=n_steps,
    n_features=n_features,
    scaler=scaler,
    batch_size=10,  # 30-day batches
    end_date=datetime.datetime(2026, 12, 31),
    retrain=True
)

# Create visualization
plt.figure(figsize=(15, 7))
plt.plot(data.index, data['LAR'], label='Historical LAR', color='blue')
plt.plot(future_predictions.index, future_predictions.values, 
         label='Predicted LAR', color='red', linestyle='--')
plt.axvline(x=data.index[-1], color='green', linestyle=':', label='Prediction Start')
plt.title('LAR: Historical and Predicted Values until 2026')
plt.xlabel('Date')
plt.ylabel('LAR')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Save predictions
output_df = pd.DataFrame(future_predictions)
output_df.reset_index(inplace=True)
output_df.to_csv("../../Dataset/Prediction_Data/Liquidity_Activity_Ratio/LAR_Future_Data.csv", index=False)
print("\nPredictions saved to '../../Dataset/Prediction_Data/Liquidity_Activity_Ratio/LAR_Future_Data.csv'")

# Print summary statistics
print("\nPrediction Summary:")
print(f"Prediction Start Date: {data.index[-1].strftime('%Y-%m-%d')}")
print(f"Prediction End Date: {future_predictions.index[-1].strftime('%Y-%m-%d')}")
print(f"Number of days predicted: {len(future_predictions)}")
print(f"Initial LAR: {data['LAR'].iloc[-1]:.2f}")
print(f"Final Predicted LAR: {future_predictions.iloc[-1]:.2f}")
print(f"Minimum Predicted LAR: {future_predictions.min():.2f}")
print(f"Maximum Predicted LAR: {future_predictions.max():.2f}")