<a href="https://colab.research.google.com/github/ColeJMitchell/machine-learning-stock-prediction/blob/main/model_notebooks/model_evaluation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns
import datetime, time
from termcolor import colored
from IPython.display import clear_output
!pip install tensorflow
!git clone https://github.com/ColeJMitchell/machine-learning-stock-prediction
from tensorflow.keras.models import load_model
from sklearn.preprocessing import MinMaxScaler
import random
clear_output()

# **Loading The Data**

In [4]:
test_stocks = ['AAPL', 'NVDA', 'MSTR', 'MP', 'PG', 'GLD', 'TSLA', 'AMC', 'META', 'HIMS']
closing_prices = {}
end_date = datetime.datetime.today()
start_date = end_date - datetime.timedelta(days=210)
for ticker in test_stocks:
    data = yf.download(ticker, start=start_date, end=end_date)
    closing_prices[ticker] = data["Close"].values.squeeze()

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


# **Paper Trading Method 1 : Using The LSTM Model**

In [5]:
#Load the LSTM model and simulate it by paper trading
model = load_model("machine-learning-stock-prediction/saved_models/stock_prediction.h5", compile = False)
model.summary()

In [6]:
# Number of closing price values for each stock ticker
print(f"Number of Closing Price Data Points: {len(closing_prices['AAPL'])}")

Number of Closing Price Data Points: 143


## Preprocessing The Data With Sliding Windows To Match Shape Required By The LSTM

In [7]:
window_size = 30
closing_price_windows = {}
# Stocks need to remain separate for the paper trading
for ticker in test_stocks:
    closing_price_windows[ticker] = []
    for i in range(0, 144 - window_size + 1, window_size):
        window = closing_prices[ticker][i:i + window_size]
        closing_price_windows[ticker].append(np.array(window))

print(f"Shape of Closing Price Sliding Windows: {np.array(closing_price_windows[test_stocks[0]]).shape}")

Shape of Closing Price Sliding Windows: (4, 30)


# **Paper Trading Method 1 : LSTM Model**

In [8]:
# A list of stocks purchased by the LSTM model is saved so it can know when to sell,
# the model starts with the same amount of money as the random model - 10,000 dollars
portfolio = {}
initial_wallet = 10000

for stock in test_stocks:
    portfolio[stock] = {
        "Remaining_Cash": 1000,
        "Bought": 0,
        "Purchase_Prices": [],
        "Final_Amount_Sold": 0
    }

# If a stock is predicted to rise in the future (larger value than final entry in sliding window),
# it is bought by the model, $100 of the allocated money for the stock is bought.
# If the stock is predicted to fall below the purchase price the stock is sold at the baseline price (last day of the sliding window)
for stock in test_stocks:
    windows = closing_price_windows[stock]
    for window in windows:
        baseline_price = window[-1]
        scaler = MinMaxScaler(feature_range=(0, 1))
        normalized_window = scaler.fit_transform(window.reshape(-1, 1))

        # The model makes a prediction based on the window and compares it to the last day in the window to decide whether to buy, sell, or do nothing
        prediction = model.predict(normalized_window.reshape(1, 30), verbose=0)
        prediction = scaler.inverse_transform(prediction).item()

        current_remaining_cash = portfolio[stock]["Remaining_Cash"]
        current_bought = portfolio[stock]["Bought"]
        purchase_prices = portfolio[stock]["Purchase_Prices"]

        # When the model thinks the price will go up
        if baseline_price < prediction:
            if current_remaining_cash >= 200:
                # Stocks are bought in 200 dollar increments
                portfolio[stock]["Remaining_Cash"] -= 200
                portfolio[stock]["Bought"] += 200
                purchase_prices.append(baseline_price)

        # When the model things the price will go down
        elif baseline_price > prediction:
            if current_bought > 0:
                # Copy the list to avoid mutation during iteration
                for price in purchase_prices[:]:
                    # Sells the bought stock at the baseline price if the prediction anticipates that the stock price will go down.
                    # The final bought calculation only considers the final liquidation so 100 is removed when sold.
                    if price > prediction:
                        percentage_change = baseline_price / price
                        portfolio[stock]["Remaining_Cash"] += 200 * percentage_change
                        portfolio[stock]["Bought"] -= 200
                        purchase_prices.remove(price)
                        break
        else:
            pass

    # Sells all the stocks that were bought at the very end
    final_stock_price = float(windows[-1][-1])
    for purchase_price in portfolio[stock]["Purchase_Prices"]:
        percentage_change = final_stock_price / purchase_price
        sale_proceeds = 200 * percentage_change
        portfolio[stock]["Final_Amount_Sold"] += sale_proceeds


## Profit And Loss Margins For The LSTM Model

In [9]:
# Output the final earnings and losses
for stock in test_stocks:
  print(f"{stock} detailed stats:")
  print(f"Final Remaining Cash: {int(portfolio[stock]['Remaining_Cash'])}$")
  print(f"Stock Quantity Bought At Liquidation: {portfolio[stock]['Bought']}$")
  print(f"Stock Purchase Prices At Liquidation: {[int(x) for x in portfolio[stock]['Purchase_Prices']]}")
  print(f"Final Amount Sold: {int(portfolio[stock]['Final_Amount_Sold'])}$")
  print("\n")

AAPL detailed stats:
Final Remaining Cash: 963$
Stock Quantity Bought At Liquidation: 0$
Stock Purchase Prices At Liquidation: []
Final Amount Sold: 0$


NVDA detailed stats:
Final Remaining Cash: 200$
Stock Quantity Bought At Liquidation: 800$
Stock Purchase Prices At Liquidation: [135, 131, 120, 107]
Final Amount Sold: 701$


MSTR detailed stats:
Final Remaining Cash: 800$
Stock Quantity Bought At Liquidation: 200$
Stock Purchase Prices At Liquidation: [240]
Final Amount Sold: 226$


MP detailed stats:
Final Remaining Cash: 800$
Stock Quantity Bought At Liquidation: 200$
Stock Purchase Prices At Liquidation: [23]
Final Amount Sold: 200$


PG detailed stats:
Final Remaining Cash: 800$
Stock Quantity Bought At Liquidation: 200$
Stock Purchase Prices At Liquidation: [157]
Final Amount Sold: 206$


GLD detailed stats:
Final Remaining Cash: 1000$
Stock Quantity Bought At Liquidation: 0$
Stock Purchase Prices At Liquidation: []
Final Amount Sold: 0$


TSLA detailed stats:
Final Remaining C

In [10]:
# Final amount made and lost
total = 0
for stock in test_stocks:
  print(f"{stock} overall stats:")
  print(f"Starting Amount: 1000$")
  total += float(portfolio[stock]['Final_Amount_Sold']) + portfolio[stock]['Remaining_Cash']
  print(f"Ending Amount: {int(portfolio[stock]['Final_Amount_Sold'] + portfolio[stock]['Remaining_Cash'])}$")
  print("\n")
print("-----------------------------------------------------------------------------------------------")
percent_change = total / 10000
print("Total Beginning: 10,000$\n")
print(f"Total End: {int(total)}$\n")
print(f"Percent Change From Initial Wallet: {round(percent_change*100,3)}%")

AAPL overall stats:
Starting Amount: 1000$
Ending Amount: 963$


NVDA overall stats:
Starting Amount: 1000$
Ending Amount: 901$


MSTR overall stats:
Starting Amount: 1000$
Ending Amount: 1026$


MP overall stats:
Starting Amount: 1000$
Ending Amount: 1000$


PG overall stats:
Starting Amount: 1000$
Ending Amount: 1006$


GLD overall stats:
Starting Amount: 1000$
Ending Amount: 1000$


TSLA overall stats:
Starting Amount: 1000$
Ending Amount: 906$


AMC overall stats:
Starting Amount: 1000$
Ending Amount: 961$


META overall stats:
Starting Amount: 1000$
Ending Amount: 992$


HIMS overall stats:
Starting Amount: 1000$
Ending Amount: 1004$


-----------------------------------------------------------------------------------------------
Total Beginning: 10,000$

Total End: 9762$

Percent Change From Initial Wallet: 97.626%


# **Paper Trading Method 2 : Random Behavior**

In [11]:
# A list of the randomly purchased stocks is kept for when the model sells the stocks at the end
random_portfolio = {}
for stock in test_stocks:
    random_portfolio[stock] = {
        "Remaining_Cash": 1000,
        "Bought": 0,
        "Purchase_Prices": [],
        "Final_Amount_Sold": 0
    }

# The window structure from the evaluation of the LSTM model is maintained so that the random model performs actions at the same time intervals
for stock in test_stocks:
    windows = closing_price_windows[stock]
    for window in windows:
        baseline_price = window[-1]
        current_remaining_cash = random_portfolio[stock]["Remaining_Cash"]
        current_bought = random_portfolio[stock]["Bought"]
        purchase_prices = random_portfolio[stock]["Purchase_Prices"]

        random_value = random.choice([0, 1, 2])

        if random_value == 0:
            if current_remaining_cash >= 200:
                # Stocks are still bought in 200 dollar increments
                random_portfolio[stock]["Remaining_Cash"] -= 200
                random_portfolio[stock]["Bought"] += 200
                purchase_prices.append(baseline_price)

        elif random_value == 1:
            if current_bought > 0 and purchase_prices:
                # The selling logic is also the same as the LSTM except it picks a stock at random out of the purchased list
                random_index = random.randint(0, len(purchase_prices) - 1)
                purchase_price = purchase_prices.pop(random_index)
                percentage_change = baseline_price / purchase_price
                random_portfolio[stock]["Remaining_Cash"] += 200 * percentage_change
                random_portfolio[stock]["Bought"] -= 200

        else:
            pass

    # Liquidates all the stocks that were bought at the very end
    final_stock_price = float(windows[-1][-1])
    for purchase_price in random_portfolio[stock]["Purchase_Prices"]:
        percentage_change = final_stock_price / purchase_price
        sale_proceeds = 100 * percentage_change
        random_portfolio[stock]["Final_Amount_Sold"] += sale_proceeds


## Profit And Loss Margins For The Random Algorithm

In [12]:
# Output the final earnings and losses
for stock in test_stocks:
  print(f"{stock} detailed stats:")
  print(f"Final Remaining Cash: {int(random_portfolio[stock]['Remaining_Cash'])}$")
  print(f"Stock Quantity Bought At Liquidation: {random_portfolio[stock]['Bought']}$")
  print(f"Stock Purchase Prices At Liquidation: {[int(x) for x in random_portfolio[stock]['Purchase_Prices']]}")
  print(f"Final Amount Sold: {int(random_portfolio[stock]['Final_Amount_Sold'])}$")
  print("\n")

AAPL detailed stats:
Final Remaining Cash: 1003$
Stock Quantity Bought At Liquidation: 0$
Stock Purchase Prices At Liquidation: []
Final Amount Sold: 0$


NVDA detailed stats:
Final Remaining Cash: 763$
Stock Quantity Bought At Liquidation: 200$
Stock Purchase Prices At Liquidation: [135]
Final Amount Sold: 79$


MSTR detailed stats:
Final Remaining Cash: 600$
Stock Quantity Bought At Liquidation: 400$
Stock Purchase Prices At Liquidation: [342, 240]
Final Amount Sold: 193$


MP detailed stats:
Final Remaining Cash: 1000$
Stock Quantity Bought At Liquidation: 0$
Stock Purchase Prices At Liquidation: []
Final Amount Sold: 0$


PG detailed stats:
Final Remaining Cash: 600$
Stock Quantity Bought At Liquidation: 400$
Stock Purchase Prices At Liquidation: [157, 162]
Final Amount Sold: 203$


GLD detailed stats:
Final Remaining Cash: 617$
Stock Quantity Bought At Liquidation: 400$
Stock Purchase Prices At Liquidation: [247, 292]
Final Amount Sold: 218$


TSLA detailed stats:
Final Remaining 

In [13]:
# Final amount made and lost
total = 0
for stock in test_stocks:
  print(f"{stock} overall stats:")
  print(f"Starting Amount: 1000")
  total += int(random_portfolio[stock]['Final_Amount_Sold']) + random_portfolio[stock]['Remaining_Cash']
  print(f"Ending Amount: {int(random_portfolio[stock]['Final_Amount_Sold'] + random_portfolio[stock]['Remaining_Cash'])}$")
  print("\n")
print("-----------------------------------------------------------------------------------------------")
percent_change = total / 10000
print("Total Beginning: 10,000$\n")
print(f"Total End: {int(total)}$\n")
print(f"Percent Change From Initial Wallet: {round(percent_change*100,3)}%")

AAPL overall stats:
Starting Amount: 1000
Ending Amount: 1003$


NVDA overall stats:
Starting Amount: 1000
Ending Amount: 842$


MSTR overall stats:
Starting Amount: 1000
Ending Amount: 793$


MP overall stats:
Starting Amount: 1000
Ending Amount: 1000$


PG overall stats:
Starting Amount: 1000
Ending Amount: 803$


GLD overall stats:
Starting Amount: 1000
Ending Amount: 835$


TSLA overall stats:
Starting Amount: 1000
Ending Amount: 816$


AMC overall stats:
Starting Amount: 1000
Ending Amount: 998$


META overall stats:
Starting Amount: 1000
Ending Amount: 783$


HIMS overall stats:
Starting Amount: 1000
Ending Amount: 972$


-----------------------------------------------------------------------------------------------
Total Beginning: 10,000$

Total End: 8848$

Percent Change From Initial Wallet: 88.485%


# **Paper Trading Method 3 : Sentiment Analysis**



In [14]:
# Load the sentiment data from the parquet
sentiment_data = pd.read_parquet("machine-learning-stock-prediction/data/posts-with-tickers-wss.parquet")
positive_sentiment = {}
negative_sentiment = {}
for stock in test_stocks:
    # Filter rows where ticker == stock and extract the sentiment scores
    positive_sentiment[stock] = round(sum(sentiment_data.loc[sentiment_data['ticker'] == stock, 'ss_pos'].values) / len(sentiment_data.loc[sentiment_data['ticker'] == stock, 'ss_pos'].values), 4) * 100
    negative_sentiment[stock] = round(sum(sentiment_data.loc[sentiment_data['ticker'] == stock, 'ss_neg'].values) / len(sentiment_data.loc[sentiment_data['ticker'] == stock, 'ss_neg'].values), 4) * 100
print("Raw sentiment data: \n")
print("Positive: ")
print(positive_sentiment)
print("\nNegative: ")
print(negative_sentiment)
print("\n")


# Calculate how much the positive and negative sentiment contributes to overall percentage
buy_distribution = {}
sell_distribution = {}
for stock in test_stocks:
  positive = int(positive_sentiment.get(stock))
  negative = int(negative_sentiment.get(stock))
  total = positive + negative
  buy_distribution[stock] = round(positive / total, 2)
  sell_distribution[stock] = round(negative / total, 2)
print("Sentiment Distributions: \n")
print("Positive: ")
print(buy_distribution)
print("\nNegative: ")
print(sell_distribution)

Raw sentiment data: 

Positive: 
{'AAPL': np.float64(10.84), 'NVDA': np.float64(23.849999999999998), 'MSTR': np.float64(24.3), 'MP': np.float64(16.2), 'PG': np.float64(17.2), 'GLD': np.float64(7.9), 'TSLA': np.float64(13.33), 'AMC': np.float64(16.42), 'META': np.float64(20.65), 'HIMS': np.float64(19.5)}

Negative: 
{'AAPL': np.float64(8.309999999999999), 'NVDA': np.float64(7.000000000000001), 'MSTR': np.float64(9.700000000000001), 'MP': np.float64(6.3), 'PG': np.float64(0.0), 'GLD': np.float64(13.700000000000001), 'TSLA': np.float64(10.58), 'AMC': np.float64(12.25), 'META': np.float64(25.650000000000002), 'HIMS': np.float64(7.8)}


Sentiment Distributions: 

Positive: 
{'AAPL': 0.56, 'NVDA': 0.77, 'MSTR': 0.73, 'MP': 0.73, 'PG': 1.0, 'GLD': 0.35, 'TSLA': 0.57, 'AMC': 0.57, 'META': 0.44, 'HIMS': 0.73}

Negative: 
{'AAPL': 0.44, 'NVDA': 0.23, 'MSTR': 0.27, 'MP': 0.27, 'PG': 0.0, 'GLD': 0.65, 'TSLA': 0.43, 'AMC': 0.43, 'META': 0.56, 'HIMS': 0.27}


In [15]:
# A list of the randomly purchased stocks is kept for when the model sells the stocks at the end
sentiment_portfolio = {}
for stock in test_stocks:
    sentiment_portfolio[stock] = {
        "Remaining_Cash": 1000,
        "Bought": 0,
        "Purchase_Prices": [],
        "Final_Amount_Sold": 0
    }

# The window structure from the evaluation of the LSTM model is maintained so that the random model performs actions at the same time intervals
for stock in test_stocks:
    windows = closing_price_windows[stock]
    for window in windows:
        baseline_price = window[-1]
        current_remaining_cash = sentiment_portfolio[stock]["Remaining_Cash"]
        current_bought = sentiment_portfolio[stock]["Bought"]
        purchase_prices = sentiment_portfolio[stock]["Purchase_Prices"]

        # The algorithm uses the sentiment distribution to decide if it is going to buy sell or do nothing
        prob_buy = buy_distribution[stock] - .005
        prob_sell = sell_distribution[stock] - .005

        # The LSTM only does nothing when the prediction price matches the baseline which is extremely rare
        prob_nothing = 0.01

        r = random.random()
        if r < prob_buy:
          action = 'buy'
        elif r < prob_buy + prob_sell:
          action = 'sell'
        else:
          action = 'nothing'

        if action == 'buy':
            if current_remaining_cash >= 200:
                # Stocks are still bought in 200 dollar increments
                sentiment_portfolio[stock]["Remaining_Cash"] -= 200
                sentiment_portfolio[stock]["Bought"] += 200
                purchase_prices.append(baseline_price)

        elif action == 'sell':
            if current_bought > 0 and purchase_prices:
                # The selling logic is also the same as the LSTM except it picks a stock at random out of the purchased list
                random_index = random.randint(0, len(purchase_prices) - 1)
                purchase_price = purchase_prices.pop(random_index)
                percentage_change = baseline_price / purchase_price
                sentiment_portfolio[stock]["Remaining_Cash"] += 200 * percentage_change
                sentiment_portfolio[stock]["Bought"] -= 200

        else:
            pass

    # Liquidates all the stocks that were bought at the very end
    final_stock_price = float(windows[-1][-1])
    for purchase_price in sentiment_portfolio[stock]["Purchase_Prices"]:
        percentage_change = final_stock_price / purchase_price
        sale_proceeds = 100 * percentage_change
        sentiment_portfolio[stock]["Final_Amount_Sold"] += sale_proceeds

## Profit And Loss Margins For The Sentiment Algorithm

In [16]:
# Output the final earnings and losses
for stock in test_stocks:
  print(f"{stock} detailed stats:")
  print(f"Final Remaining Cash: {int(sentiment_portfolio[stock]['Remaining_Cash'])}$")
  print(f"Stock Quantity Bought At Liquidation: {sentiment_portfolio[stock]['Bought']}$")
  print(f"Stock Purchase Prices At Liquidation: {[int(x) for x in sentiment_portfolio[stock]['Purchase_Prices']]}")
  print(f"Final Amount Sold: {int(sentiment_portfolio[stock]['Final_Amount_Sold'])}$")
  print("\n")

AAPL detailed stats:
Final Remaining Cash: 800$
Stock Quantity Bought At Liquidation: 200$
Stock Purchase Prices At Liquidation: [190]
Final Amount Sold: 100$


NVDA detailed stats:
Final Remaining Cash: 982$
Stock Quantity Bought At Liquidation: 0$
Stock Purchase Prices At Liquidation: []
Final Amount Sold: 0$


MSTR detailed stats:
Final Remaining Cash: 775$
Stock Quantity Bought At Liquidation: 200$
Stock Purchase Prices At Liquidation: [272]
Final Amount Sold: 100$


MP detailed stats:
Final Remaining Cash: 200$
Stock Quantity Bought At Liquidation: 800$
Stock Purchase Prices At Liquidation: [20, 19, 23, 23]
Final Amount Sold: 434$


PG detailed stats:
Final Remaining Cash: 200$
Stock Quantity Bought At Liquidation: 800$
Stock Purchase Prices At Liquidation: [177, 157, 170, 162]
Final Amount Sold: 389$


GLD detailed stats:
Final Remaining Cash: 802$
Stock Quantity Bought At Liquidation: 200$
Stock Purchase Prices At Liquidation: [292]
Final Amount Sold: 100$


TSLA detailed stats:

In [17]:
# Final amount made and lost
total = 0
for stock in test_stocks:
  print(f"{stock} overall stats:")
  print(f"Starting Amount: 1000")
  total += int(sentiment_portfolio[stock]['Final_Amount_Sold']) + sentiment_portfolio[stock]['Remaining_Cash']
  print(f"Ending Amount: {int(sentiment_portfolio[stock]['Final_Amount_Sold'] + sentiment_portfolio[stock]['Remaining_Cash'])}$")
  print("\n")
print("-----------------------------------------------------------------------------------------------")
percent_change = total / 10000
print("Total Beginning: 10,000$\n")
print(f"Total End: {int(total)}$\n")
print(f"Percent Change From Initial Wallet: {round(percent_change*100,3)}%")

AAPL overall stats:
Starting Amount: 1000
Ending Amount: 900$


NVDA overall stats:
Starting Amount: 1000
Ending Amount: 982$


MSTR overall stats:
Starting Amount: 1000
Ending Amount: 875$


MP overall stats:
Starting Amount: 1000
Ending Amount: 634$


PG overall stats:
Starting Amount: 1000
Ending Amount: 589$


GLD overall stats:
Starting Amount: 1000
Ending Amount: 902$


TSLA overall stats:
Starting Amount: 1000
Ending Amount: 1017$


AMC overall stats:
Starting Amount: 1000
Ending Amount: 757$


META overall stats:
Starting Amount: 1000
Ending Amount: 763$


HIMS overall stats:
Starting Amount: 1000
Ending Amount: 737$


-----------------------------------------------------------------------------------------------
Total Beginning: 10,000$

Total End: 8158$

Percent Change From Initial Wallet: 81.58%
