In [24]:
import os
import random
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error


In [25]:
# Set seed for reproducibility
seed = 42
np.random.seed(seed)
random.seed(seed)
tf.random.set_seed(seed)

In [26]:
# Load the historical data
data = pd.read_csv('historical_data.csv')

In [27]:
# Load the sentiment scores data
sentiment_data = pd.read_csv('daily_sentiment_scores_both.csv')

In [28]:
# Convert date columns to datetime format
data['Date'] = pd.to_datetime(data['Date'])
sentiment_data['datetime'] = pd.to_datetime(sentiment_data['datetime'])

In [29]:
# Merge sentiment scores (neutral, positive, negative) with historical data
data = pd.merge(data, sentiment_data[['datetime', 'neutral', 'positive', 'negative']], left_on='Date', right_on='datetime', how='left')

# Drop extra datetime column
data.drop(columns=['datetime'], inplace=True)

# Fill NaN sentiment scores with 0 (assume neutral sentiment if missing)
data[['neutral', 'positive', 'negative']] = data[['neutral', 'positive', 'negative']].fillna(0)

In [30]:
# Define tickers to loop through
tickers = ['TSLA', 'AAPL', 'AXP','TMUS']

In [31]:
# Dictionary to store results
results = {}

In [32]:
# Loop through each ticker
for ticker in tickers:
    print(f"\n Training model for {ticker}...")

    # Filter data for the current ticker
    ticker_data = data[data['Ticker'] == ticker].copy()

    # Sort by date
    ticker_data.sort_values('Date', inplace=True)

    # Select relevant columns
    numerical_features = ['Open', 'High', 'Low', 'Volume']
    numerical_features_with_sentiment = numerical_features + ['neutral', 'positive', 'negative']

    # Scale numerical features
    scaler_features = MinMaxScaler(feature_range=(0, 1))
    ticker_data[numerical_features] = scaler_features.fit_transform(ticker_data[numerical_features])

    # Separate scaler for 'Close' price
    scaler_close = MinMaxScaler(feature_range=(0, 1))
    ticker_data[['Close']] = scaler_close.fit_transform(ticker_data[['Close']])

    # Preprocessing function
    def preprocess_data(data, n_steps=10):
        X, y = [], []
        feature_columns = numerical_features_with_sentiment + ['Close']

        values = data[feature_columns].values
        for i in range(n_steps, len(values)):
            X.append(values[i - n_steps:i, :])  # Last n_steps rows as features
            y.append(values[i, -1])  # Predict 'Close' price

        return np.array(X), np.array(y)

    # Preprocess the data
    X, y = preprocess_data(ticker_data, n_steps=10)

    # Skip if there isn’t enough data
    if X.shape[0] == 0:
        print(f"Not enough data for {ticker}. Skipping...")
        continue

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Build the LSTM model
    model = Sequential([
        LSTM(70, activation="tanh", return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
        LSTM(30, activation="tanh", return_sequences=True),
        LSTM(10, activation="tanh", return_sequences=False),
        Dense(1, activation='relu'),
    ])

    # Compile the model
    model.compile(optimizer='adam', loss='mse')

    # Train the model
    history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.1, verbose=0)  # Silent training

    # Make predictions
    predictions = model.predict(X_test)

    # Inverse transform only the Close price
    y_test_scaled = scaler_close.inverse_transform(y_test.reshape(-1, 1)).flatten()
    predictions_scaled = scaler_close.inverse_transform(predictions.reshape(-1, 1)).flatten()

    # Evaluate the model
    mae = mean_absolute_error(y_test_scaled, predictions_scaled)
    mape = mean_absolute_percentage_error(y_test_scaled, predictions_scaled)

    # Define a percentage threshold for accuracy
    threshold_percentage = 5  # 5% tolerance
    percentage_errors = np.abs((y_test_scaled - predictions_scaled) / y_test_scaled) * 100
    acc = np.mean(percentage_errors <= threshold_percentage) * 100
    acc2 = (1 - mape) * 100

    # Store results
    results[ticker] = {
        "Mean Absolute Error": mae,
        "Mean Absolute Percentage Error (%)": mape * 100,
        "Accuracy (5% Threshold)": acc,
        "Overall Accuracy (%)": acc2,
    }



 Training model for TSLA...


  super().__init__(**kwargs)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 209ms/step

 Training model for AAPL...


  super().__init__(**kwargs)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 259ms/step

 Training model for AXP...


  super().__init__(**kwargs)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 187ms/step

 Training model for TMUS...


  super().__init__(**kwargs)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 198ms/step


In [33]:
# Step 12: Print final results
print("Model Performance Metrics:")

for ticker, metrics in results.items():
    # Get highest and lowest close prices from the original dataset
    stock_data = data[data['Ticker'] == ticker]  # Filter data for the current ticker
    highest_close = stock_data['Close'].max()
    lowest_close = stock_data['Close'].min()

    print(f"\n{ticker} Performance:")
    for metric, value in metrics.items():
        print(f"{metric}: {value:.2f}")

    # Print highest and lowest closing prices
    print(f"Highest Closing Price: ${highest_close:.2f}")
    print(f"Lowest Closing Price: ${lowest_close:.2f}")


Model Performance Metrics:

TSLA Performance:
Mean Absolute Error: 9.41
Mean Absolute Percentage Error (%): 4.18
Accuracy (5% Threshold): 63.64
Overall Accuracy (%): 95.82
Highest Closing Price: $317.54
Lowest Closing Price: $108.10

AAPL Performance:
Mean Absolute Error: 2.62
Mean Absolute Percentage Error (%): 1.71
Accuracy (5% Threshold): 94.81
Overall Accuracy (%): 98.29
Highest Closing Price: $196.45
Lowest Closing Price: $125.02

AXP Performance:
Mean Absolute Error: 2.78
Mean Absolute Percentage Error (%): 1.80
Accuracy (5% Threshold): 96.10
Overall Accuracy (%): 98.20
Highest Closing Price: $181.33
Lowest Closing Price: $134.91

TMUS Performance:
Mean Absolute Error: 1.67
Mean Absolute Percentage Error (%): 1.19
Accuracy (5% Threshold): 100.00
Overall Accuracy (%): 98.81
Highest Closing Price: $152.41
Lowest Closing Price: $121.73
