In [9]:
import os
import joblib
import numpy as np
import pandas as pd
from keras.models import load_model
from datetime import datetime, timedelta
import ta

# Define the threshold region (e.g., ±2% around the predicted price)
THRESHOLD_PERCENTAGE = 5
# Function for feature selection using Pearson correlation
def select_features(X, threshold=0.8):
    # Remove non-numeric columns (if any) for correlation computation
    X_numeric = X.select_dtypes(include=[np.number])
    
    col_corr = set()
    corr_matrix = X_numeric.corr()

    # Check for highly correlated features and add them to the set to be dropped
    for i in range(len(corr_matrix.columns)):
        for j in range(i):
            if abs(corr_matrix.iloc[i, j]) > threshold:
                colname = corr_matrix.columns[i]
                col_corr.add(colname)
    
    return X.drop(columns=col_corr)
def load_and_preprocess_data(file_path, scaler_path, window_size=5):
    df_stock = pd.read_csv(file_path)

    # Convert 'Date' column to datetime
    df_stock['Date'] = pd.to_datetime(df_stock['Date'], format='%Y-%m-%d')

    # Remove '%' and convert 'Percent Change' to float, handling errors
    df_stock['Percent Change'] = df_stock['Percent Change'].str.replace('%', '').apply(pd.to_numeric, errors='coerce')

    # Remove commas from 'Volume' and convert to float
    df_stock['Volume'] = df_stock['Volume'].astype(str).str.replace(',', '').apply(pd.to_numeric, errors='coerce')

    # Create additional features
    df_stock['day_of_week'] = df_stock['Date'].dt.dayofweek
    df_stock['month'] = df_stock['Date'].dt.month
    # Use 'Ltp' instead of 'Close' if necessary
    close_col = 'Close'
    high_col = 'High'
    low_col = 'Low'
    volume_col = 'Volume'

    # Technical indicators
    df_stock['Returns'] = df_stock[close_col].pct_change().fillna(0)
    df_stock['MACD'] = ta.trend.MACD(df_stock[close_col]).macd().fillna(0)
    df_stock['Signal'] = ta.trend.MACD(df_stock[close_col]).macd_signal().fillna(0)
    df_stock['RSI'] = ta.momentum.RSIIndicator(df_stock[close_col]).rsi().fillna(50)
    df_stock['SMA_20'] = df_stock[close_col].rolling(window=20, min_periods=1).mean()
    df_stock['SMA_50'] = df_stock[close_col].rolling(window=50, min_periods=1).mean()
    df_stock['EMA_20'] = ta.trend.EMAIndicator(df_stock[close_col], window=20).ema_indicator().fillna(0)

    # Volatility measures
    df_stock['Volatility'] = df_stock['Returns'].rolling(window=20).std().fillna(0)
    df_stock['ATR'] = ta.volatility.AverageTrueRange(df_stock[high_col], df_stock[low_col], df_stock[close_col]).average_true_range().fillna(0)

    # Volume-based indicators
    df_stock['Volume_SMA'] = df_stock[volume_col].rolling(window=20).mean().fillna(0)
    df_stock['Volume_Ratio'] = (df_stock[volume_col] / df_stock['Volume_SMA']).fillna(1)

    # Momentum indicators
    df_stock['MFI'] = ta.volume.MFIIndicator(df_stock[high_col], df_stock[low_col], df_stock[close_col], df_stock[volume_col]).money_flow_index().fillna(50)

    # Load scaler
    if not os.path.exists(scaler_path):
        raise FileNotFoundError(f"Scaler file {scaler_path} not found.")
    scaler = joblib.load(scaler_path)

    # Select features and normalize
    features = ['Close', 'day_of_week', 'month', 'Returns', 'MACD', 'Signal', 'RSI', 'SMA_20', 'SMA_50', 'EMA_20', 'Volatility', 'ATR', 'Volume_SMA', 'MFI']
    target='Close'
    # Remove highly correlated features
    df_selected = select_features(df_stock[features])

    # Drop 'Volume_Ratio' if present (or any other feature that should not be used)
    if "Volume_Ratio" in df_selected.columns:
        df_selected = df_selected.drop("Volume_Ratio", axis=1)

    # Select the final set of features after removing unwanted columns
    features = [col for col in df_selected.columns if col != [target,'Symbol','Date']]
    df_stock[features] = scaler.transform(df_stock[features])

    # Prepare sequence for prediction
    X = df_stock[features].iloc[-window_size:].values
    return np.expand_dims(X, axis=0)  # Shape: (1, window_size, features)

def predict_stock_price(model_path, scaler_path, file_path, threshold_percentage=THRESHOLD_PERCENTAGE):
    if not os.path.exists(model_path):
        raise FileNotFoundError(f"Model file {model_path} not found.")

    # Load model
    model = load_model(model_path)

    # Preprocess data
    X = load_and_preprocess_data(file_path, scaler_path)

    # Make prediction (normalized scale)
    predicted_price_norm = model.predict(X)[0][0]

    # Load scaler and apply inverse transformation
    scaler = joblib.load(scaler_path)
    
    # Only transform the "Close" value (we assume it was first in the feature list)
    # Get the number of features from the scaler
    num_features = scaler.n_features_in_
    
    # Create an array with the correct shape
    input_array = np.zeros((1, num_features))  # Shape (1, num_features)
    input_array[0, 0] = predicted_price_norm  # Set only the first column (Close price)
    
    # Apply inverse transformation
    predicted_price_original = scaler.inverse_transform(input_array)[0][0]


    # Get last known actual price
    df_stock = pd.read_csv(file_path)
    actual_price = df_stock['Close'].iloc[-1]  # Assuming 'Close' is the last column

    # Calculate threshold region (±2% around predicted price)
    lower_bound = predicted_price_original * (1 - threshold_percentage / 100)
    upper_bound = predicted_price_original * (1 + threshold_percentage / 100)

    # **Enhanced Dynamic Threshold:**
    # You can also adjust the threshold region dynamically based on the recent price fluctuation
    # For example, you can check the recent percentage change to expand the range.
    recent_price_change = (df_stock['Close'].iloc[-1] - df_stock['Close'].iloc[-2]) / df_stock['Close'].iloc[-2] * 100
    dynamic_threshold_percentage = threshold_percentage + abs(recent_price_change) / 2  # Adding a factor of recent fluctuation
    lower_bound = actual_price * (1 - threshold_percentage / 100)
    upper_bound = actual_price * (1 + threshold_percentage / 100)

    # Check if actual price is within the threshold region
    is_accurate = lower_bound <= predicted_price_original <= upper_bound
    accuracy = (actual_price - predicted_price_original) / predicted_price_original * 100  # Accuracy in percentage
    if accuracy<0:
        accuracy=accuracy*-1

    last_date = df_stock['Date'].iloc[-1]  # Get the last date from the 'Date' column
    return predicted_price_original, actual_price, is_accurate, accuracy, last_date


if __name__ == "__main__":
    data_directory = '../NULB/'  # Update to your actual data path

    for file_name in os.listdir(data_directory):
        if file_name.endswith('.csv'):
            stock_name = os.path.splitext(file_name)[0]
            file_path = os.path.join(data_directory, file_name)

            model_path = os.path.join(data_directory, f"{stock_name}_tft_model.keras")
            scaler_path = os.path.join(data_directory, f"{stock_name}_scaler.pkl")

            try:
                predicted_price, actual_price, is_accurate, accuracy, last_column_date = predict_stock_price(
                    model_path, scaler_path, file_path
                )

                date_obj = datetime.strptime(last_column_date, '%Y-%m-%d')
                
                # Add one day
                new_date_obj = date_obj + timedelta(days=1)
                
                # Convert back to string if needed
                new_date_str = new_date_obj.strftime('%Y-%m-%d')

                print(f"Predicted price for {stock_name}: {predicted_price:.2f} for date: {new_date_str}")
                print(f"Actual price: {actual_price:.2f}")
                print(f"Accuracy: {accuracy:.2f}%")
                print(f"Threshold region: {predicted_price * (1 - THRESHOLD_PERCENTAGE / 100):.2f} to {predicted_price * (1 + THRESHOLD_PERCENTAGE / 100):.2f}")
                print(f"Prediction is within threshold: {is_accurate}")
            except Exception as e:
                print(f"Error processing {stock_name}: {e}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 253ms/step
Predicted price for NUBL: 730.81 for date: 2025-02-07
Actual price: 690.00
Accuracy: 5.58%
Threshold region: 694.27 to 767.35
Prediction is within threshold: False
