In [1]:

def intelligent_stock_prediction(Ticker):
    
    # notebooks/Project_3a.ipynb

    # Import system references
    import sys
    import os

    # Ensure project_root is in the system path
    current_dir = os.getcwd()
    project_root = os.path.abspath(os.path.join(current_dir, '..'))
    if project_root not in sys.path:
        sys.path.append(project_root)
        
    #print("Project root added to sys.path:", project_root in sys.path)
    #print(sys.path)


    #setup Logging
    import logging

    # Configure logging
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s',
        handlers=[
            logging.StreamHandler()
        ]
    )

    logger = logging.getLogger(__name__)

    # Configure logging
    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')


    # Import necessary libraries
    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    import seaborn as sns

    from datetime import datetime
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import MinMaxScaler
    from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

    import torch
    import torch.nn as nn
    import torch.optim as optim

    import tensorflow as tf
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import LSTM, Dense
    from tensorflow.keras.optimizers import Adam
    from sklearn.preprocessing import MinMaxScaler

    # Import the fetch_stock_data function
    from scripts import fetch_stock_data, transform_stock_data_to_delta, transform_with_history
    from scripts import prepare_data_for_training, create_time_series_windows, lstm_model, prepare_sliding_window_data
    import sys
    from datetime import datetime

    # If running from the command line, check if a valid ticker argument was provided
    if len(sys.argv) > 1 and sys.argv[1].isalpha():  # Ensure it's likely a ticker
        Ticker = sys.argv[1].upper()  # Stock ticker should be the first argument after the script name

    # If no valid ticker was found, prompt the user for one
    if not Ticker:
        Ticker = input("Please enter a stock ticker symbol (e.g., 'AAPL', 'SPY', 'QQQ', etc.): ").upper()

    # Define start and end dates
    start_date = '2021-01-01'
    end_date = datetime.today().strftime('%Y-%m-%d')  # Default to today's date if not specified

    # Print the ticker and date range to confirm
    print(f"Fetching data for {Ticker} from {start_date} to {end_date}")

    # Fetch stock data
    stock_data_df = fetch_stock_data(Ticker, start_date, end_date)
    #stock_data_df = fetch_stock_data(Ticker, start_date)

    # Check if data is fetched successfully
    if not stock_data_df.empty:
        # Display the first few rows
        display(stock_data_df.head())
    else:
        print("No data to display.")
    # Transform Stock Data to Deltas
    columns_to_exclude = ['Adj Close']  # Drop 'Adj Close'
    columns_to_keep = []  # Keep 'Volume' but exclude from delta calculation
    columns_to_calculate = ['Open', 'High', 'Low', 'Close', 'Volume']  # Calculate deltas for 'Open' and 'Close'

    transformed_data_df = transform_stock_data_to_delta(
        stock_data_df, 
        columns_to_exclude=columns_to_exclude, 
        columns_to_calculate=columns_to_calculate, 
        columns_to_keep=columns_to_keep
    )

    #transformed_data_df = transform_stock_data_to_delta(stock_data_df)
    #transformed_data_df = transform_stock_data_to_delta(stock_data_df, exclude=['Volume'])

    # Display Transformed Data
    if not transformed_data_df.empty:
        display(transformed_data_df.head())
    else:
        print("No transformed data to display.")

    # Prepare the data by removing non-delta columns
    delta_only_df = prepare_sliding_window_data(transformed_data_df)

    # Display the first few rows of the resulting DataFrame
    display(delta_only_df.head())

    # Define parameters
    n_timesteps = 100  # Number of timesteps (sequence length)

    # Step 1: Create the time series windows (X and y)
    X, y = create_time_series_windows(delta_only_df, 'Close_delta', n_timesteps)

    # Display the first few occurrences of the X and y arrays
    print("First 5 entries of X:\n", X[:5])  # Display first 5 rows
    print("First 5 entries of y:\n", y[:5])  # Display first 5 target values

    # Step 2: Split the data into training and testing sets (80/20 split)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)

    # Display the shapes of the training and testing sets
    print("X_train shape:", X_train.shape)
    print("X_test shape:", X_test.shape)
    print("y_train shape:", y_train.shape)
    print("y_test shape:", y_test.shape)

    # Initialize the scaler for X and y values
    scaler_X = MinMaxScaler()
    scaler_y = MinMaxScaler()

    # Reshape X data to 2D for scaling, keeping the last dimension as features
    n_samples_train = X_train.shape[0]
    n_samples_test = X_test.shape[0]

    X_train_reshaped = X_train.reshape(-1, X_train.shape[-1])  # Reshape to 2D: [samples * timesteps, features]
    X_test_reshaped = X_test.reshape(-1, X_test.shape[-1])

    # Apply scaling to X features (fit on X_train, transform both X_train and X_test)
    X_train_scaled = scaler_X.fit_transform(X_train_reshaped).reshape(n_samples_train, X_train.shape[1], X_train.shape[2])
    X_test_scaled = scaler_X.transform(X_test_reshaped).reshape(n_samples_test, X_test.shape[1], X_test.shape[2])

    # Reshape y values to 2D (required by MinMaxScaler)
    y_train = y_train.reshape(-1, 1)
    y_test = y_test.reshape(-1, 1)

    # Apply scaling to y values
    y_train_scaled = scaler_y.fit_transform(y_train)
    y_test_scaled = scaler_y.transform(y_test)


    # Define parameters
    #n_timesteps = 25
    n_features = X_train_scaled.shape[2]

    # Define the number of layers and units per layer
    num_layers = 3
    units_per_layer = [50, 100, 50]  # 3 layers with 50, 100, and 50 units, respectively

    # Call the LSTM model
    y_pred_scaled, history, model = lstm_model(X_train_scaled, y_train_scaled, X_test_scaled, y_test_scaled, n_timesteps, n_features, num_layers, units_per_layer, 
                            learning_rate=0.00001, epochs=70, batch_size=28)
    # Output the predictions
    #print("Predictions from LSTM model:", y_pred_scaled)

    # Extract the loss data from the history object
    train_loss = history.history['loss']
    val_loss = history.history['val_loss']


    epochs = range(1, len(train_loss) + 1)
        
    plt.figure(figsize=(10, 6))

    # Plot training loss
    plt.plot(epochs, train_loss, label='Training Loss')

    # Plot validation loss
    plt.plot(epochs, val_loss, label='Validation Loss')

    plt.title('Training and Validation Loss Over Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.show()
    # Step 1: Inverse transform the predicted and actual y values to the original scale
    y_pred_original = scaler_y.inverse_transform(y_pred_scaled)
    y_test_original = scaler_y.inverse_transform(y_test_scaled)

    # Display the first few occurrences of prediction and test scaled and unscaled arrays
    print("First 5 entries of y_pred_scaled", y_pred_scaled[:5])  # Display first 5 rows
    print("First 5 entries of y_pred_original", y_pred_original[:5])  # Display first 5 rows
    print("First 5 entries of y_test_scaled", y_test_scaled[:5])  # Display first 5 target values
    print("First 5 entries of y_test_original", y_test_original[:5])  # Display first 5 target values

    # Mean Squared Error
    mse = mean_squared_error(y_test_original, y_pred_original)
    # Root Mean Squared Error
    rmse = np.sqrt(mse)
    # Mean Absolute Error
    mae = mean_absolute_error(y_test_original, y_pred_original)
    # R-squared
    r2 = r2_score(y_test_original, y_pred_original)

    # Print the metrics
    print(f"Mean Squared Error (MSE): {mse:.6f}")
    print(f"Root Mean Squared Error (RMSE): {rmse:.6f}")
    print(f"Mean Absolute Error (MAE): {mae:.6f}")
    print(f"R-squared (R²): {r2:.6f}")
    plt.figure(figsize=(10, 6))

    plt.plot(y_test_original, label='Actual Values', color='blue', linewidth=2)
    plt.plot(y_pred_original, label='Predicted Values', color='red', linestyle='--', linewidth=2)

    plt.title('Predicted vs Actual Values')
    plt.xlabel('Time Steps')
    plt.ylabel('Value')
    plt.legend()

    plt.show()
    # Create a DataFrame to compare actual and predicted values
    comparison_df = pd.DataFrame({'Actual': y_test_original.flatten(), 'Predicted': y_pred_original.flatten()})
    # Calculate the difference (error)
    comparison_df['Difference'] = comparison_df['Actual'] - comparison_df['Predicted']
    #print(comparison_df.head(25))
    # Get the sign of the actual and predicted values
    actual_sign = np.sign(comparison_df['Actual'])
    predicted_sign = np.sign(comparison_df['Predicted'])
    #print(actual_sign.head(25))
    #print(predicted_sign.head(25))
    # Check where the signs match
    sign_matches = actual_sign == predicted_sign
    print(sign_matches.head(100))

    # Calculate the percentage of sign matches
    percentage_match = sign_matches.mean() * 100

    print(f"Percentage of Sign Matches: {percentage_match :.2f}%")
    # Get the prediction for tomorrow's Close_delta

    """
    Predict the next Close_delta based on the most recent n_timesteps of data.
        
    Using:
    - model: The trained LSTM model.
    - delta_only_df (pd.DataFrame): The DataFrame containing all historical data, including the latest Close_delta.
    - n_timesteps (int): The number of timesteps (sequence length) used in the model.
    - scaler_X: The scaler used to scale the features (X).
    - scaler_y: The scaler used to scale the target (y).

    Returns:
    - float: The predicted Close_delta for the next day.
    """
    # Step 1: Extract the last n_timesteps rows from the data (to be used as input for prediction)
    last_window = delta_only_df[-n_timesteps:].values.reshape(1, n_timesteps, delta_only_df.shape[1])

    # Step 2: Scale the input features
    last_window_scaled = scaler_X.transform(last_window.reshape(-1, last_window.shape[-1])).reshape(1, n_timesteps, -1)

    # Step 3: Use the model to predict the next Close_delta (scaled)
    predicted_close_delta_scaled = model.predict(last_window_scaled)

    # Step 4: Inverse transform the prediction to get the original scale of Close_delta
    predicted_close_delta = scaler_y.inverse_transform(predicted_close_delta_scaled)

    predicted_close_delta = predicted_close_delta[0][0]  # Return the predicted value

    # Decision logic: Buy if positive, Sell if negative
    if predicted_close_delta > 0:
        output_text = f"Predicted Close increase for {Ticker} tomorrow is {(predicted_close_delta * 100):.2f}%. Suggestion: BUY with {percentage_match :.2f}% confidence"
    else:
        output_text = f"Predicted Close decrease for {Ticker} tomorrow is {(predicted_close_delta * 100):.2f}%. Suggestion: SELL with {percentage_match :.2f}% confidence"

    print(output_text)

    return output_text

import gradio as gr

# Create Gradio interface with the updated syntax
iface = gr.Interface(
    fn=intelligent_stock_prediction, 
    inputs=gr.Textbox(label="Stock Ticker"),  # Custom label for input
    outputs=gr.Textbox(label="Prediction"),  # Custom label for output
    title="Intelligent Stock Prediction",
    description="Enter a stock ticker symbol (e.g., AAPL, TSLA) to get an analysis."
)

#iface = gr.Interface(fn=intelligent_stock_prediction, inputs="text", outputs="text", title="Intelligent Stock Prediction")
iface.launch()

* Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.




2024-10-30 18:03:16,260 - INFO - End date provided: 2024-10-30
2024-10-30 18:03:16,261 - INFO - Fetching data for ticker: QQQ from 2021-01-01 to 2024-10-30


Fetching data for QQQ from 2021-01-01 to 2024-10-30


2024-10-30 18:03:22,293 - INFO - Resetting index to make 'Date' a column.
2024-10-30 18:03:22,295 - INFO - Flattening multi-level column names (removing ticker symbol).
2024-10-30 18:03:22,295 - INFO - Successfully fetched and simplified data for ticker 'QQQ'.


Price,Date,Adj Close,Close,High,Low,Open,Volume
0,2021-01-04 00:00:00+00:00,302.171936,309.309998,315.290009,305.179993,315.109985,45305900
1,2021-01-05 00:00:00+00:00,304.663055,311.859985,312.140015,308.290009,308.290009,29323400
2,2021-01-06 00:00:00+00:00,300.442719,307.540009,311.880005,305.980011,307.0,52809600
3,2021-01-07 00:00:00+00:00,307.71106,314.980011,315.839996,310.25,310.279999,30394800
4,2021-01-08 00:00:00+00:00,311.667603,319.029999,319.390015,315.079987,317.339996,33955800


2024-10-30 18:03:22,310 - INFO - Starting transformation of stock data to deltas.
2024-10-30 18:03:22,311 - INFO - Dropping columns: ['Adj Close']
2024-10-30 18:03:22,312 - INFO - Calculating deltas for specified columns: ['Open', 'High', 'Low', 'Close', 'Volume']
2024-10-30 18:03:22,313 - INFO - Transforming column: Open
2024-10-30 18:03:22,314 - INFO - Transforming column: High
2024-10-30 18:03:22,318 - INFO - Transforming column: Low
2024-10-30 18:03:22,320 - INFO - Transforming column: Close
2024-10-30 18:03:22,321 - INFO - Transforming column: Volume
2024-10-30 18:03:22,323 - INFO - Dropping the first row with NaN values after delta calculation.
2024-10-30 18:03:22,325 - INFO - Successfully transformed stock data to deltas.


Price,Date,Close,High,Low,Open,Volume,Open_delta,High_delta,Low_delta,Close_delta,Volume_delta
1,2021-01-05 00:00:00+00:00,311.859985,312.140015,308.290009,308.290009,29323400,-0.0216,-0.01,0.0102,0.0082,-0.3528
2,2021-01-06 00:00:00+00:00,307.540009,311.880005,305.980011,307.0,52809600,-0.0042,-0.0008,-0.0075,-0.0139,0.8009
3,2021-01-07 00:00:00+00:00,314.980011,315.839996,310.25,310.279999,30394800,0.0107,0.0127,0.014,0.0242,-0.4244
4,2021-01-08 00:00:00+00:00,319.029999,319.390015,315.079987,317.339996,33955800,0.0228,0.0112,0.0156,0.0129,0.1172
5,2021-01-11 00:00:00+00:00,314.420013,317.190002,313.75,315.980011,32746400,-0.0043,-0.0069,-0.0042,-0.0145,-0.0356


Price,Open_delta,High_delta,Low_delta,Close_delta,Volume_delta
1,-0.0216,-0.01,0.0102,0.0082,-0.3528
2,-0.0042,-0.0008,-0.0075,-0.0139,0.8009
3,0.0107,0.0127,0.014,0.0242,-0.4244
4,0.0228,0.0112,0.0156,0.0129,0.1172
5,-0.0043,-0.0069,-0.0042,-0.0145,-0.0356


First 5 entries of X:
 [[[-2.160e-02 -1.000e-02  1.020e-02  8.200e-03 -3.528e-01]
  [-4.200e-03 -8.000e-04 -7.500e-03 -1.390e-02  8.009e-01]
  [ 1.070e-02  1.270e-02  1.400e-02  2.420e-02 -4.244e-01]
  ...
  [ 1.430e-02  3.400e-03  1.490e-02  1.400e-03 -3.680e-02]
  [-1.200e-03 -6.000e-04  3.600e-03  3.500e-03 -1.866e-01]
  [ 0.000e+00 -2.000e-04 -8.000e-04 -3.700e-03  9.770e-02]]

 [[-4.200e-03 -8.000e-04 -7.500e-03 -1.390e-02  8.009e-01]
  [ 1.070e-02  1.270e-02  1.400e-02  2.420e-02 -4.244e-01]
  [ 2.280e-02  1.120e-02  1.560e-02  1.290e-02  1.172e-01]
  ...
  [-1.200e-03 -6.000e-04  3.600e-03  3.500e-03 -1.866e-01]
  [ 0.000e+00 -2.000e-04 -8.000e-04 -3.700e-03  9.770e-02]
  [ 1.300e-03  3.100e-03  2.800e-03  3.200e-03  7.140e-02]]

 [[ 1.070e-02  1.270e-02  1.400e-02  2.420e-02 -4.244e-01]
  [ 2.280e-02  1.120e-02  1.560e-02  1.290e-02  1.172e-01]
  [-4.300e-03 -6.900e-03 -4.200e-03 -1.450e-02 -3.560e-02]
  ...
  [ 0.000e+00 -2.000e-04 -8.000e-04 -3.700e-03  9.770e-02]
  [ 1.300e-

  super().__init__(**kwargs)


Epoch 1/70
25/25 - 8s - 329ms/step - loss: 0.1935 - val_loss: 0.1813
Epoch 2/70
25/25 - 3s - 104ms/step - loss: 0.1843 - val_loss: 0.1725
Epoch 3/70
25/25 - 3s - 107ms/step - loss: 0.1759 - val_loss: 0.1647
Epoch 4/70
25/25 - 3s - 106ms/step - loss: 0.1681 - val_loss: 0.1573
Epoch 5/70
25/25 - 2s - 97ms/step - loss: 0.1607 - val_loss: 0.1500
Epoch 6/70
25/25 - 3s - 104ms/step - loss: 0.1532 - val_loss: 0.1424
Epoch 7/70
25/25 - 3s - 114ms/step - loss: 0.1453 - val_loss: 0.1345
Epoch 8/70
25/25 - 3s - 120ms/step - loss: 0.1369 - val_loss: 0.1259
Epoch 9/70
25/25 - 3s - 104ms/step - loss: 0.1278 - val_loss: 0.1166
Epoch 10/70
25/25 - 3s - 117ms/step - loss: 0.1181 - val_loss: 0.1068
Epoch 11/70
25/25 - 3s - 125ms/step - loss: 0.1077 - val_loss: 0.0962
Epoch 12/70
25/25 - 3s - 122ms/step - loss: 0.0964 - val_loss: 0.0848
Epoch 13/70
25/25 - 3s - 109ms/step - loss: 0.0841 - val_loss: 0.0721
Epoch 14/70
25/25 - 3s - 132ms/step - loss: 0.0706 - val_loss: 0.0587
Epoch 15/70
25/25 - 3s - 115ms

  plt.show()
  plt.show()
