In [6]:
import os
import joblib
import numpy as np
import pandas as pd
from keras.models import load_model
from datetime import datetime, timedelta

# Define the threshold region (e.g., ±2% around the predicted price)
THRESHOLD_PERCENTAGE = 5

def load_and_preprocess_data(file_path, scaler_path, window_size=5):
    df_stock = pd.read_csv(file_path)

    # Convert 'Date' column to datetime
    df_stock['Date'] = pd.to_datetime(df_stock['Date'], format='%Y-%m-%d')

    # Remove '%' and convert 'Percent Change' to float, handling errors
    df_stock['Percent Change'] = df_stock['Percent Change'].str.replace('%', '').apply(pd.to_numeric, errors='coerce')

    # Remove commas from 'Volume' and convert to float
    df_stock['Volume'] = df_stock['Volume'].astype(str).str.replace(',', '').apply(pd.to_numeric, errors='coerce')

    # Create additional features
    df_stock['day_of_week'] = df_stock['Date'].dt.dayofweek
    df_stock['month'] = df_stock['Date'].dt.month

    # Load scaler
    if not os.path.exists(scaler_path):
        raise FileNotFoundError(f"Scaler file {scaler_path} not found.")
    scaler = joblib.load(scaler_path)

    # Select features and normalize
    features = ['Close', 'day_of_week', 'month']
    df_stock[features] = scaler.transform(df_stock[features])

    # Prepare sequence for prediction
    X = df_stock[features].iloc[-window_size:].values
    return np.expand_dims(X, axis=0)  # Shape: (1, window_size, features)

def predict_stock_price(model_path, scaler_path, file_path, threshold_percentage=THRESHOLD_PERCENTAGE):
    if not os.path.exists(model_path):
        raise FileNotFoundError(f"Model file {model_path} not found.")

    # Load model
    model = load_model(model_path)

    # Preprocess data
    X = load_and_preprocess_data(file_path, scaler_path)

    # Make prediction (normalized scale)
    predicted_price_norm = model.predict(X)[0][0]

    # Load scaler and apply inverse transformation
    scaler = joblib.load(scaler_path)
    
    # Only transform the "Close" value (we assume it was first in the feature list)
    predicted_price_original = scaler.inverse_transform(
        np.array([[predicted_price_norm, 0, 0]])  # Set other features to 0
    )[0][0]  # Extract only the Close price

    # Get last known actual price
    df_stock = pd.read_csv(file_path)
    actual_price = df_stock['Close'].iloc[-1]  # Assuming 'Close' is the last column

    # Calculate threshold region (±2% around predicted price)
    lower_bound = predicted_price_original * (1 - threshold_percentage / 100)
    upper_bound = predicted_price_original * (1 + threshold_percentage / 100)

    # **Enhanced Dynamic Threshold:**
    # You can also adjust the threshold region dynamically based on the recent price fluctuation
    # For example, you can check the recent percentage change to expand the range.
    recent_price_change = (df_stock['Close'].iloc[-1] - df_stock['Close'].iloc[-2]) / df_stock['Close'].iloc[-2] * 100
    dynamic_threshold_percentage = threshold_percentage + abs(recent_price_change) / 2  # Adding a factor of recent fluctuation
    lower_bound = actual_price * (1 - threshold_percentage / 100)
    upper_bound = actual_price * (1 + threshold_percentage / 100)

    # Check if actual price is within the threshold region
    is_accurate = lower_bound <= predicted_price_original <= upper_bound
    accuracy = (actual_price - predicted_price_original) / predicted_price_original * 100  # Accuracy in percentage
    if accuracy<0:
        accuracy=accuracy*-1

    last_date = df_stock['Date'].iloc[-1]  # Get the last date from the 'Date' column
    return predicted_price_original, actual_price, is_accurate, accuracy, last_date


if __name__ == "__main__":
    data_directory = '../NULB/'  # Update to your actual data path

    for file_name in os.listdir(data_directory):
        if file_name.endswith('.csv'):
            stock_name = os.path.splitext(file_name)[0]
            file_path = os.path.join(data_directory, file_name)

            model_path = os.path.join(data_directory, f"{stock_name}_tft_model.keras")
            scaler_path = os.path.join(data_directory, f"{stock_name}_scaler.pkl")

            try:
                predicted_price, actual_price, is_accurate, accuracy, last_column_date = predict_stock_price(
                    model_path, scaler_path, file_path
                )

                date_obj = datetime.strptime(last_column_date, '%Y-%m-%d')
                
                # Add one day
                new_date_obj = date_obj + timedelta(days=1)
                
                # Convert back to string if needed
                new_date_str = new_date_obj.strftime('%Y-%m-%d')

                print(f"Predicted price for {stock_name}: {predicted_price:.2f} for date: {new_date_str}")
                print(f"Actual price: {actual_price:.2f}")
                print(f"Accuracy: {accuracy:.2f}%")
                print(f"Threshold region: {predicted_price * (1 - THRESHOLD_PERCENTAGE / 100):.2f} to {predicted_price * (1 + THRESHOLD_PERCENTAGE / 100):.2f}")
                print(f"Prediction is within threshold: {is_accurate}")
            except Exception as e:
                print(f"Error processing {stock_name}: {e}")


  saveable.load_own_variables(weights_store.get(inner_path))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 252ms/step
Predicted price for NUBL: 764.79 for date: 2025-02-07
Actual price: 690.00
Accuracy: 9.78%
Threshold region: 726.55 to 803.02
Prediction is within threshold: False
