In [1]:
#libary imports. Broken down by - Python libraries, pandas, yfinance, widget libraries, nympy and matplotlip,
#scikit learn libraries, and seaborn.

import os
import json
import requests
import itertools

import pandas as pd

import yfinance as yf

import ipywidgets as widgets
import ipyspin
from IPython.display import display, clear_output

import numpy as np
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, precision_recall_curve, precision_score, roc_auc_score, roc_curve

import seaborn as sns

In [8]:
#This method will get the ticker data from Yahoo Finance and return the ticker history.

def get_ticker_data(symbol):
    ticker = yf.Ticker(symbol)
    ticker_hist = ticker.history(period="max")

    return ticker_hist

#This method will create a dataframe and apply the close price of the given stock and the target.
#The target is either 1 or 0, with a 1 meaning the price went up and a 0 being the price went down.
#The target is what the ML algorithm will predict.

def prepare_data(ticker_hist):
    data = ticker_hist[["Close"]]
    data = data.rename(columns = {'Close':'Actual Close'})
    data['Target'] = ticker_hist.rolling(2).apply(lambda x: x.iloc[1] > x.iloc[0])["Close"]
    
    # Shift stock prices forward one day. This will ensure we are predicting tomorrow's stock price
    ticker_prev = ticker_hist.copy()
    ticker_prev = ticker_hist.shift(1)
    
    return data, ticker_prev

def create_features(data, ticker_prev):
    
    main_features = ["Close", "Volume", "Open", "High", "Low"]
    data = data.join(ticker_prev[main_features]).iloc[1:]
    
    
    weekly_mean = data.rolling(7).mean()
    quarterly_mean = data.rolling(90).mean()
    annual_mean = data.rolling(365).mean()
    weekly_trend = data.shift(1).rolling(7).mean()["Target"]
    
    data["weekly_mean"] = weekly_mean["Close"] / data["Close"]
    data["quarterly_mean"] = quarterly_mean["Close"] / data["Close"]
    data["annual_mean"] = annual_mean["Close"] / data["Close"]

    data["annual_weekly_mean"] = data["annual_mean"] / data["weekly_mean"]
    data["annual_quarterly_mean"] = data["annual_mean"] / data["quarterly_mean"]
    data["weekly_trend"] = weekly_trend

    data["open_close_ratio"] = data["Open"] / data["Close"]
    data["high_close_ratio"] = data["High"] / data["Close"]
    data["low_close_ratio"] = data["Low"] / data["Close"]
    
    all_features = ["Close", "Volume", "Open", "High", "Low", "weekly_mean",
               "quarterly_mean", "annual_mean", "annual_weekly_mean", "annual_quarterly_mean",
               "weekly_trend", "open_close_ratio", "high_close_ratio", "low_close_ratio"]
    
    return data, all_features


def create_rf_model():
    rf = RandomForestClassifier(n_estimators=100, min_samples_split=200, random_state=1)
    return rf


def backtest_model(data, rf, features, start=1000, step=750):
    predictions = []
    # Loop over the dataset in increments
    for i in range(start, data.shape[0], step):
        # Split into train and test sets
        train = data.iloc[0:i].copy()
        test = data.iloc[i:(i+step)].copy()
        
        # Fit the random forest model
        rf.fit(train[features], train["Target"])
        
        # Make predictions
        preds = rf.predict_proba(test[features])[:,1]
        preds = pd.Series(preds, index=test.index)
        preds[preds > .6] = 1
        preds[preds<=.6] = 0
        
        # Combine predictions and test values
        combined = pd.concat({"Target": test["Target"],"Predictions": preds}, axis=1)
        
        predictions.append(combined)
    
    return pd.concat(predictions)

def strategy_return(backtested_data, data):

    df = backtested_data.copy()
    df['Return'] = np.log(data.Close.pct_change() + 1)
    df['Strategy Return'] = df['Predictions'].shift(1) * df['Return']
    
    return np.exp(df[-260:][['Return', 'Strategy Return']].sum()) - 1, df

# Plotting methods

def historical_price_plot(ticker_hist, company_name):
    ticker_hist.plot.line(y="Close", use_index=True)
    plt.xlabel('Year')
    plt.ylabel('USD Per Share')
    plt.title(f"Historical Closing Price of {company_name}")
    plt.show()
    
def return_price_plot(df):
    first_date = df[-260:][['Return', 'Strategy Return']]
    last_date = first_date.index[-1]
    first_date = first_date.first_valid_index()
    first_date = first_date.strftime("%Y-%m-%d")
    last_date = last_date.strftime("%Y-%m-%d")

    plt.rcParams['figure.figsize'] = [20, 15]
    df[-260:][['Return', 'Strategy Return']].plot(style={'Return':':', 'Strategy Return':'--'}, use_index=True)
    plt.xlabel('Date')
    plt.ylabel('Return')
    plt.legend()
    plt.title(f"Model Return Performance Return from {first_date} - {last_date}")
    plt.show() 
    
def bar_chart_predictions(predictions):
    fig, ax = plt.subplots(figsize=(8,6))
    predictions.plot(kind='bar')
    plt.xticks([0,1],['Price Dropped (0)','Price Increased (1)'], rotation=0)
    plt.ylabel('Count')
    plt.title('Predicted Price Drop Versus Price Increase')
    plt.show()
    
def confusion_matrix_predictions(backtested_data):
    cnf_matrix = confusion_matrix(backtested_data["Target"], backtested_data["Predictions"])
    plt.figure(figsize=(8,5))
    sns.heatmap(cnf_matrix, annot=True)
    plt.xlabel('Predicted Target Values')
    plt.ylabel('Actual Target Values')
    plt.title('Data Comparison Confusion Matrix')
    plt.show()

In [3]:
#!jupyter nbextension enable --py widgetsnbextension --sys-prefix
#!jupyter serverextension enable voila --sys-prefix

In [4]:
# Loading Spinner widget
spinner = ipyspin.Spinner()
spinner.layout.height = '125px'
spinner.layout.width = '125px'
spinner.lines = 10 
spinner.radius = 40
spinner.scale = 0.50
spinner.corners = 1
spinner.color = '#68b7f7'
spinner.animation = 'spinner-line-fade-quick'
spinner.direction = 1
spinner.top = '50%'
spinner.left = '50%'
spinner.z_index = 200000
spinner.class_name = 'spinner'
spinner.position = 'absolute'

# Image Widget

file = open("HedgeVisionLogo.png", "rb")
image = file.read()

image_headline = widgets.Image(
                    value=image,
                    format='jpg',
                    width='200'
                )

vbox_headline = widgets.VBox([image_headline])


# Select ticker for NASDAQ

ticker = widgets.Dropdown(
            options=['Microsoft', 'Apple', 'Tesla'],
            value='Microsoft',
            description='Company:',
            disabled=False,
        )


# button start

button_start = widgets.Button(
                description='Begin Model Testing',
                tooltip='Send',
                style={'description_width': 'initial'}
            )

output = widgets.Output()


def on_button_clicked(event):
    with output:
        clear_output()
        
        if (ticker.value == "Microsoft"):
            spinner.spin()
            
            symbol = "MSFT"
            
            ticker_hist = get_ticker_data(symbol)
            data, ticker_prev = prepare_data(ticker_hist)
            
            data, features = create_features(data, ticker_prev)
            
            rf = create_rf_model()
            
            backtested_data = backtest_model(data.iloc[365:], rf, features)
            
            predictions = backtested_data["Predictions"].value_counts()
            
            backtested_percision = precision_score(backtested_data["Target"], backtested_data["Predictions"])
            
            spinner.stop()
            
            print(f"Backtested Model Predictions: \nPrice Went Down: {predictions[0]} times\nPrice Went Up: {predictions[1]} times\n")
            
            print(f"Precision Score of Backtested model: \n{str(int(backtested_percision*100))}%\n")
            
            
            historical_price_plot(ticker_hist, ticker.value)
            
            overall_return, df = strategy_return(backtested_data, data)
            
            print(f"The overall percentage return for {ticker.value} was {int(overall_return['Return']*100)}%\n")
            print(f"The overall percentage return when using the machine learning model was {int(overall_return['Strategy Return']*100)}%\n")

            if(int(overall_return['Return']*100) < int(overall_return['Strategy Return']*100)):
               print("The model lost less money than the actual market return.\n")
            elif(int(overall_return['Return']*100) > int(overall_return['Strategy Return']*100)):
               print("The model lost more money than the actual market return.\n")
            
            return_price_plot(df)
            
            bar_chart_predictions(predictions)
            
            confusion_matrix_predictions(backtested_data)
            
            plt.close('all')
            
        elif (ticker.value == "Apple"):
            spinner.spin()
            
            symbol = "AAPL"
            
            ticker_hist = get_ticker_data(symbol)
            data, ticker_prev = prepare_data(ticker_hist)
            
            data, features = create_features(data, ticker_prev)
            
            rf = create_rf_model()
            
            backtested_data = backtest_model(data.iloc[365:], rf, features)
            
            predictions = backtested_data["Predictions"].value_counts()
            
            backtested_percision = precision_score(backtested_data["Target"], backtested_data["Predictions"])
            
            spinner.stop()
            
            print(f"Backtested Model Predictions: \nPrice Went Down: {predictions[0]} times\nPrice Went Up: {predictions[1]} times\n")
            
            print(f"Precision Score of Backtested model: \n{str(int(backtested_percision*100))}%\n")
            
            
            historical_price_plot(ticker_hist, ticker.value)
            
            overall_return, df = strategy_return(backtested_data, data)
            
            print(f"The overall percentage return for {ticker.value} was {int(overall_return['Return']*100)}%\n")
            print(f"The overall percentage return when using the machine learning model was {int(overall_return['Strategy Return']*100)}%\n")

            if(int(overall_return['Return']*100) < int(overall_return['Strategy Return']*100)):
               print("The model lost less money than the actual market return.\n")
            elif(int(overall_return['Return']*100) > int(overall_return['Strategy Return']*100)):
               print("The model lost more money than the actual market return.\n")
            
            return_price_plot(df)
            
            bar_chart_predictions(predictions)
            
            confusion_matrix_predictions(backtested_data)
            
            plt.close('all')
            
        elif (ticker.value == "Tesla"):
            spinner.spin()
            
            symbol = "TSLA"  
            
            ticker_hist = get_ticker_data(symbol)
            data, ticker_prev = prepare_data(ticker_hist)
            
            data, features = create_features(data, ticker_prev)
            
            rf = create_rf_model()
            
            backtested_data = backtest_model(data.iloc[365:], rf, features)
            
            predictions = backtested_data["Predictions"].value_counts()
            
            backtested_percision = precision_score(backtested_data["Target"], backtested_data["Predictions"])
            
            spinner.stop()
            
            print(f"Backtested Model Predictions: \nPrice Went Down: {predictions[0]} times\nPrice Went Up: {predictions[1]} times\n")
            
            print(f"Precision Score of Backtested model: \n{str(int(backtested_percision*100))}%\n")
            
            
            historical_price_plot(ticker_hist, ticker.value)
            
            overall_return, df = strategy_return(backtested_data, data)
            
            print(f"The overall percentage return for {ticker.value} was {int(overall_return['Return']*100)}%\n")
            print(f"The overall percentage return when using the machine learning model was {int(overall_return['Strategy Return']*100)}%\n")

            if(int(overall_return['Return']*100) < int(overall_return['Strategy Return']*100)):
               print("The model lost less money than the actual market return.\n")
            elif(int(overall_return['Return']*100) > int(overall_return['Strategy Return']*100)):
               print("The model lost more money than the actual market return.\n")
            
            return_price_plot(df)
            
            bar_chart_predictions(predictions)
            
            confusion_matrix_predictions(backtested_data)
            
            plt.close('all')
            
spinner.stop()
            
        
vbox_result = widgets.VBox([button_start, spinner, output])

button_start.on_click(on_button_clicked)

# stacked right hand side

text_0 = widgets.HTML(value="<h1>NASDAQ Analyzer V1</h1>")
text_1 = widgets.HTML(value="<h3>Select a ticker</h3>")
text_2= widgets.HTML(value="<h3>Select a beginning data date</h3>")
text_3= widgets.HTML(value="<h2>Select graph display (optional)</h2>")

vbox_text = widgets.VBox([text_0, text_1, ticker, vbox_result])

In [5]:
page = widgets.HBox([vbox_text, vbox_headline])
display(page)
spinner.stop()

HBox(children=(VBox(children=(HTML(value='<h1>NASDAQ Analyzer V1</h1>'), HTML(value='<h3>Select a ticker</h3>'…