In [40]:
import numpy as np
import os
import pandas as pd

import math

from datetime import datetime, timedelta
from dotenv import load_dotenv
 
import yfinance as yf

import hvplot.pandas

from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import balanced_accuracy_score
from sklearn.ensemble import RandomForestRegressor

In [41]:
load_dotenv()

True

In [42]:
source_df = None

In [43]:
symbol = 'AMZN'
file_name = f"{symbol}.csv"
source_df = None

In [44]:
if os.path.exists(file_name):
    source_df = pd.read_csv(file_name)

In [45]:
if source_df is None:
    display("download")
    ticker = yf.Ticker(symbol)
    start = datetime.utcnow() - timedelta(days=10*365)
    end=datetime.utcnow() - timedelta(days=1)
    source_df = ticker.history(start=start, end=end)
    source_df.to_csv(f"{symbol}.csv")

In [46]:
display(source_df.tail())

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
2510,2024-02-16 00:00:00-05:00,168.740005,170.419998,167.169998,169.509995,48074600,0.0,0.0
2511,2024-02-20 00:00:00-05:00,167.830002,168.710007,165.740005,167.080002,41980300,0.0,0.0
2512,2024-02-21 00:00:00-05:00,168.940002,170.229996,167.139999,168.589996,44575600,0.0,0.0
2513,2024-02-22 00:00:00-05:00,173.100006,174.800003,171.770004,174.580002,55392400,0.0,0.0
2514,2024-02-23 00:00:00-05:00,174.279999,175.75,173.699997,174.990005,59662900,0.0,0.0


In [47]:
bars_df = source_df.copy()
bars_df["Next Close"] = bars_df["Close"].shift(-1)
bars_df["Next High"] = bars_df["High"].shift(-1)
bars_df["Next Low"] = bars_df["Low"].shift(-1)

In [48]:
# So we want to add the high, low, and open for the 15 minute and 60 minute window
period = 1
windows = [3, 12]

for window in windows:
    wp = window*period
    # compute the rolling high, low, open for a given window.  the close is the same at this time for all windows
    bars_df[f"High - {wp}"] = bars_df["High"].rolling(window=window).max()
    bars_df[f"Low - {wp}"] = bars_df["Low"].rolling(window=window).min()
    bars_df[f"Open - {wp}"] = bars_df["Open"].shift(periods=window-1)

bars_df = bars_df.dropna()
display(bars_df)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,Next Close,Next High,Next Low,High - 3,Low - 3,Open - 3,High - 12,Low - 12,Open - 12
11,2014-03-14 00:00:00-04:00,18.639999,18.928499,18.577499,18.687000,88044000,0.0,0.0,18.752001,18.942499,18.743999,19.155500,18.180500,18.320000,19.155500,17.724001,17.861000
12,2014-03-17 00:00:00-04:00,18.785999,18.942499,18.743999,18.752001,46060000,0.0,0.0,18.938499,18.950001,18.750000,19.155500,18.403999,18.830999,19.155500,17.724001,18.030001
13,2014-03-18 00:00:00-04:00,18.865999,18.950001,18.750000,18.938499,49670000,0.0,0.0,18.661501,18.950001,18.471001,18.950001,18.577499,18.639999,19.155500,17.724001,17.937000
14,2014-03-19 00:00:00-04:00,18.938499,18.950001,18.471001,18.661501,52934000,0.0,0.0,18.448500,18.650000,18.311001,18.950001,18.471001,18.785999,19.155500,18.122999,18.195000
15,2014-03-20 00:00:00-04:00,18.532000,18.650000,18.311001,18.448500,51170000,0.0,0.0,18.031000,18.642000,17.920000,18.950001,18.311001,18.865999,19.155500,18.180500,18.206499
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2509,2024-02-15 00:00:00-05:00,170.580002,171.169998,167.589996,169.800003,49855200,0.0,0.0,169.509995,170.419998,167.169998,171.210007,165.750000,167.729996,175.389999,154.809998,157.000000
2510,2024-02-16 00:00:00-05:00,168.740005,170.419998,167.169998,169.509995,48074600,0.0,0.0,167.080002,168.710007,165.740005,171.210007,167.169998,169.210007,175.389999,155.619995,155.869995
2511,2024-02-20 00:00:00-05:00,167.830002,168.710007,165.740005,167.080002,41980300,0.0,0.0,168.589996,170.229996,167.139999,171.169998,165.740005,170.580002,175.389999,165.740005,169.190002
2512,2024-02-21 00:00:00-05:00,168.940002,170.229996,167.139999,168.589996,44575600,0.0,0.0,174.580002,174.800003,171.770004,170.419998,165.740005,168.740005,175.389999,165.740005,170.199997


In [49]:
df = bars_df.reset_index()
ys = {
    "Predicted High": df["Next High"], 
    "Predicted Low": df["Next Low"], 
    "Predicted Close": df["Next Close"],
}

X = df.drop(columns=["Next Close", "Next Low", "Next High", "Date"])
display(X)

Unnamed: 0,index,Open,High,Low,Close,Volume,Dividends,Stock Splits,High - 3,Low - 3,Open - 3,High - 12,Low - 12,Open - 12
0,11,18.639999,18.928499,18.577499,18.687000,88044000,0.0,0.0,19.155500,18.180500,18.320000,19.155500,17.724001,17.861000
1,12,18.785999,18.942499,18.743999,18.752001,46060000,0.0,0.0,19.155500,18.403999,18.830999,19.155500,17.724001,18.030001
2,13,18.865999,18.950001,18.750000,18.938499,49670000,0.0,0.0,18.950001,18.577499,18.639999,19.155500,17.724001,17.937000
3,14,18.938499,18.950001,18.471001,18.661501,52934000,0.0,0.0,18.950001,18.471001,18.785999,19.155500,18.122999,18.195000
4,15,18.532000,18.650000,18.311001,18.448500,51170000,0.0,0.0,18.950001,18.311001,18.865999,19.155500,18.180500,18.206499
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2498,2509,170.580002,171.169998,167.589996,169.800003,49855200,0.0,0.0,171.210007,165.750000,167.729996,175.389999,154.809998,157.000000
2499,2510,168.740005,170.419998,167.169998,169.509995,48074600,0.0,0.0,171.210007,167.169998,169.210007,175.389999,155.619995,155.869995
2500,2511,167.830002,168.710007,165.740005,167.080002,41980300,0.0,0.0,171.169998,165.740005,170.580002,175.389999,165.740005,169.190002
2501,2512,168.940002,170.229996,167.139999,168.589996,44575600,0.0,0.0,170.419998,165.740005,168.740005,175.389999,165.740005,170.199997


In [50]:
for name, y in ys.items():
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
    model = RandomForestRegressor()
    model.fit(X_train, y_train)
    predicted_y_values = model.predict(X)
    # Create a copy of the original data
    bars_df[name] = predicted_y_values
    bars_df

In [51]:
display(bars_df.tail())

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,Next Close,Next High,Next Low,High - 3,Low - 3,Open - 3,High - 12,Low - 12,Open - 12,Predicted High,Predicted Low,Predicted Close
2509,2024-02-15 00:00:00-05:00,170.580002,171.169998,167.589996,169.800003,49855200,0.0,0.0,169.509995,170.419998,167.169998,171.210007,165.75,167.729996,175.389999,154.809998,157.0,170.839184,167.451289,169.639031
2510,2024-02-16 00:00:00-05:00,168.740005,170.419998,167.169998,169.509995,48074600,0.0,0.0,167.080002,168.710007,165.740005,171.210007,167.169998,169.210007,175.389999,155.619995,155.869995,170.953929,168.126636,169.807379
2511,2024-02-20 00:00:00-05:00,167.830002,168.710007,165.740005,167.080002,41980300,0.0,0.0,168.589996,170.229996,167.139999,171.169998,165.740005,170.580002,175.389999,165.740005,169.190002,170.062708,166.355223,169.478789
2512,2024-02-21 00:00:00-05:00,168.940002,170.229996,167.139999,168.589996,44575600,0.0,0.0,174.580002,174.800003,171.770004,170.419998,165.740005,168.740005,175.389999,165.740005,170.199997,169.999821,167.040342,169.111503
2513,2024-02-22 00:00:00-05:00,173.100006,174.800003,171.770004,174.580002,55392400,0.0,0.0,174.990005,175.75,173.699997,174.800003,165.740005,167.830002,175.389999,165.740005,169.389999,177.706371,172.267678,174.09582


In [52]:
bars_df["High/Low Success"] = np.where(
    (bars_df["Next Close"] >= bars_df["Predicted Low"]),
    np.where(
        bars_df["Next Close"] <= bars_df["Predicted High"], 1, 0
    ), 0
)

In [53]:
bars_df["Predicted Close Delta"] = bars_df["Predicted Close"] - bars_df["Close"]
bars_df["Next Close Delta"] = bars_df["Next Close"] - bars_df["Close"]
bars_df["Close Product"] = bars_df["Next Close Delta"] * bars_df["Predicted Close Delta"]
bars_df["Close Success"] = np.where(
    bars_df["Close Product"] >= 0, 1, 0
)

In [54]:
actual_close = bars_df.hvplot.line(
    x="Date",
    y="Next Close",
)

In [55]:
predicted_high = bars_df.hvplot.line(
    x="Date",
    y="Predicted High",
)

In [56]:
predicted_low = bars_df.hvplot.line(
    x="Date",
    y="Predicted Low",
)

In [57]:
predicted_close = bars_df.hvplot.line(
    x="Date",
    y="Predicted Close",
)

In [58]:
actual_close * predicted_high * predicted_low * predicted_close

In [59]:
bars_df.describe()

Unnamed: 0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Next Close,Next High,Next Low,...,Low - 12,Open - 12,Predicted High,Predicted Low,Predicted Close,High/Low Success,Predicted Close Delta,Next Close Delta,Close Product,Close Success
count,2503.0,2503.0,2503.0,2503.0,2503.0,2503.0,2503.0,2503.0,2503.0,2503.0,...,2503.0,2503.0,2503.0,2503.0,2503.0,2503.0,2503.0,2503.0,2503.0,2503.0
mean,86.806135,87.811165,85.713729,86.784523,79814850.0,0.0,0.00799,86.846969,87.873819,85.775704,...,81.818735,86.13935,87.84176,85.743193,86.802241,0.670795,0.017718,0.062446,2.188519,0.813823
std,51.502549,52.133731,50.822622,51.473194,41352540.0,0.0,0.39976,51.485357,52.145147,50.8353,...,48.658263,51.401862,52.139138,50.811472,51.452832,0.470018,1.287118,2.147179,8.713057,0.389327
min,14.22,14.521,14.2,14.3475,17626000.0,0.0,0.0,14.3475,14.521,14.2,...,14.2,14.22,14.60398,14.28373,14.47814,0.0,-13.061165,-20.314995,-15.034347,0.0
25%,38.167749,38.372499,37.856501,38.04425,53590000.0,0.0,0.0,38.050249,38.381001,37.86125,...,36.834999,37.969,38.411712,37.957808,38.179547,0.0,-0.339357,-0.542999,0.006325,1.0
50%,88.269997,89.199997,87.280502,88.238503,68408000.0,0.0,0.0,88.25,89.208,87.283997,...,84.217003,88.0,88.9467,87.120952,88.170319,1.0,0.015275,0.053497,0.146144,1.0
75%,130.480003,132.040001,128.770004,130.096497,92683000.0,0.0,0.0,130.184998,132.060005,128.785004,...,123.660004,129.794502,132.456089,129.129889,130.625679,1.0,0.429735,0.739498,1.197022,1.0
max,187.199997,188.654007,184.839493,186.570496,477122000.0,0.0,20.0,186.570496,188.654007,184.839493,...,174.957993,187.199997,187.695899,183.747294,184.996922,1.0,8.12134,18.793991,265.337501,1.0


In [60]:
bars_df["Action"] = np.where(
    bars_df["Next Close"] > bars_df["Close"], 1, -1
)

In [61]:
display(bars_df.head())
display(bars_df.tail())

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,Next Close,Next High,...,Open - 12,Predicted High,Predicted Low,Predicted Close,High/Low Success,Predicted Close Delta,Next Close Delta,Close Product,Close Success,Action
11,2014-03-14 00:00:00-04:00,18.639999,18.928499,18.577499,18.687,88044000,0.0,0.0,18.752001,18.942499,...,17.861,18.842525,18.531815,18.532646,1,-0.154355,0.065001,-0.010033,0,1
12,2014-03-17 00:00:00-04:00,18.785999,18.942499,18.743999,18.752001,46060000,0.0,0.0,18.938499,18.950001,...,18.030001,18.911505,18.536985,18.557695,0,-0.194305,0.186499,-0.036238,0,1
13,2014-03-18 00:00:00-04:00,18.865999,18.950001,18.75,18.938499,49670000,0.0,0.0,18.661501,18.950001,...,17.937,18.933375,18.437915,18.578446,1,-0.360054,-0.276999,0.099734,1,-1
14,2014-03-19 00:00:00-04:00,18.938499,18.950001,18.471001,18.661501,52934000,0.0,0.0,18.4485,18.65,...,18.195,18.803415,18.37345,18.53727,1,-0.124231,-0.213001,0.026461,1,-1
15,2014-03-20 00:00:00-04:00,18.532,18.65,18.311001,18.4485,51170000,0.0,0.0,18.031,18.642,...,18.206499,18.722435,18.2171,18.46362,0,0.01512,-0.4175,-0.006313,0,-1


Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,Next Close,Next High,...,Open - 12,Predicted High,Predicted Low,Predicted Close,High/Low Success,Predicted Close Delta,Next Close Delta,Close Product,Close Success,Action
2509,2024-02-15 00:00:00-05:00,170.580002,171.169998,167.589996,169.800003,49855200,0.0,0.0,169.509995,170.419998,...,157.0,170.839184,167.451289,169.639031,1,-0.160972,-0.290009,0.046683,1,-1
2510,2024-02-16 00:00:00-05:00,168.740005,170.419998,167.169998,169.509995,48074600,0.0,0.0,167.080002,168.710007,...,155.869995,170.953929,168.126636,169.807379,0,0.297384,-2.429993,-0.722641,0,-1
2511,2024-02-20 00:00:00-05:00,167.830002,168.710007,165.740005,167.080002,41980300,0.0,0.0,168.589996,170.229996,...,169.190002,170.062708,166.355223,169.478789,1,2.398787,1.509995,3.622156,1,1
2512,2024-02-21 00:00:00-05:00,168.940002,170.229996,167.139999,168.589996,44575600,0.0,0.0,174.580002,174.800003,...,170.199997,169.999821,167.040342,169.111503,0,0.521507,5.990005,3.12383,1,1
2513,2024-02-22 00:00:00-05:00,173.100006,174.800003,171.770004,174.580002,55392400,0.0,0.0,174.990005,175.75,...,169.389999,177.706371,172.267678,174.09582,1,-0.484182,0.410004,-0.198516,0,1


In [62]:
current_position = 0
starting_cash = 1000
current_cash = starting_cash
max_position = 50
for index, row in bars_df.iterrows():
    # Get the current action
    close = row["Close"]
    action = row["Action"]

    # Take the action if possible
    if action > 0:
        if action + current_position <= max_position and action * close < current_cash:
            current_position += action
            current_cash -= action*close
        else:
            action = 0
    elif action < 0:
        if action + current_position >= 0:
            current_position += action
            current_cash += -action*close
        else:
            action = 0

    # Update enabled and position
    bars_df.loc[index, "Position"] = current_position
    bars_df.loc[index, "Cash"] = current_cash

In [63]:
# Compute Holdings
bars_df["Holdings"] = bars_df["Close"] * bars_df["Position"]

# Compute Profit
bars_df["Strategy Value"] = bars_df["Holdings"] + bars_df["Cash"]
starting_close = bars_df.iloc[0]["Close"] 
display(f"starting_close {starting_close}")
starting_shares = math.floor(starting_cash / starting_close)
display(f"starting_shares {starting_shares}")
bars_df["Stock Value"] =  bars_df["Close"] * starting_shares

# Compute Returns
bars_df["Stock Returns"] = bars_df["Close"].pct_change()
bars_df["Strategy Returns"] = bars_df["Strategy Value"].pct_change()

# Compute Cumulative Daily Returns
bars_df["Stock Cumulative Returns"] = (bars_df["Stock Returns"] + 1).cumprod()
bars_df["Strategy Cumulative Returns"] = (bars_df["Strategy Returns"] + 1).cumprod()

'starting_close 18.687000274658203'

'starting_shares 53'

In [64]:
bars_df.tail()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,Next Close,Next High,...,Action,Position,Cash,Holdings,Strategy Value,Stock Value,Stock Returns,Strategy Returns,Stock Cumulative Returns,Strategy Cumulative Returns
2509,2024-02-15 00:00:00-05:00,170.580002,171.169998,167.589996,169.800003,49855200,0.0,0.0,169.509995,170.419998,...,-1,46.0,1949.953305,7810.80014,9760.753446,8999.400162,-0.006901,-0.00565,9.086531,9.760753
2510,2024-02-16 00:00:00-05:00,168.740005,170.419998,167.169998,169.509995,48074600,0.0,0.0,167.080002,168.710007,...,-1,45.0,2119.4633,7627.949753,9747.413053,8984.029709,-0.001708,-0.001367,9.071012,9.747413
2511,2024-02-20 00:00:00-05:00,167.830002,168.710007,165.740005,167.080002,41980300,0.0,0.0,168.589996,170.229996,...,1,46.0,1952.383298,7685.680084,9638.063382,8855.240097,-0.014335,-0.011218,8.940975,9.638063
2512,2024-02-21 00:00:00-05:00,168.940002,170.229996,167.139999,168.589996,44575600,0.0,0.0,174.580002,174.800003,...,1,47.0,1783.793302,7923.729828,9707.523129,8935.269806,0.009038,0.007207,9.02178,9.707523
2513,2024-02-22 00:00:00-05:00,173.100006,174.800003,171.770004,174.580002,55392400,0.0,0.0,174.990005,175.75,...,1,48.0,1609.2133,8379.840088,9989.053388,9252.740097,0.03553,0.029001,9.342324,9.989053


In [73]:
returns = bars_df.hvplot.line(
    x="Date",
    y=["Strategy Cumulative Returns", "Stock Cumulative Returns"],
).opts(
    title=f"Stock vs Strategy Returns for {symbol} using Random Forest Regressor",
    ylabel="Percent Returns"
)
returns

In [74]:
portfolio = bars_df.hvplot.line(
    x="Date",
    y=["Cash", "Holdings"],
).opts(
    title=f"Cash vs Holdings for {symbol} using Random Forest Regressor",
    ylabel="Dollar Value"
)
portfolio

In [76]:
position = bars_df.hvplot.line(
    x="Date",
    y=["Position"],
).opts(
    title=f"Number of shares of {symbol} using Random Forest Regressor",
    ylabel="Dollar Value"
)
position