In [None]:
import numpy as np
import os
import pandas as pd

import math

from datetime import datetime, timedelta
from dotenv import load_dotenv
 
import yfinance as yf

import hvplot.pandas

from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import balanced_accuracy_score
from sklearn.ensemble import RandomForestRegressor

In [None]:
load_dotenv()

In [None]:
source_df = None

In [None]:
symbol = 'amzn'
file_name = f"{symbol}.csv"
source_df = None

In [None]:
if os.path.exists(file_name):
    source_df = pd.read_csv(file_name)

In [None]:
if source_df is None:
    display("download")
    ticker = yf.Ticker(symbol)
    start = datetime.utcnow() - timedelta(days=10*365)
    end=datetime.utcnow() - timedelta(days=1)
    source_df = ticker.history(start=start, end=end)
    source_df.to_csv(f"{symbol}.csv")

In [None]:
display(source_df.tail())

In [None]:
bars_df = source_df.copy()
bars_df["Next Close"] = bars_df["Close"].shift(-1)
bars_df["Next High"] = bars_df["High"].shift(-1)
bars_df["Next Low"] = bars_df["Low"].shift(-1)

In [None]:
# So we want to add the high, low, and open for the 15 minute and 60 minute window
period = 1
windows = [3, 12]

for window in windows:
    wp = window*period
    # compute the rolling high, low, open for a given window.  the close is the same at this time for all windows
    bars_df[f"High - {wp}"] = bars_df["High"].rolling(window=window).max()
    bars_df[f"Low - {wp}"] = bars_df["Low"].rolling(window=window).min()
    bars_df[f"Open - {wp}"] = bars_df["Open"].shift(periods=window-1)

bars_df = bars_df.dropna()
display(bars_df)

In [None]:
df = bars_df.reset_index()
ys = {
    "Predicted High": df["Next High"], 
    "Predicted Low": df["Next Low"], 
    "Predicted Close": df["Next Close"],
}

X = df.drop(columns=["Next Close", "Next Low", "Next High", "Date"])
display(X)

In [None]:
for name, y in ys.items():
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
    model = RandomForestRegressor()
    model.fit(X_train, y_train)
    predicted_y_values = model.predict(X)
    # Create a copy of the original data
    bars_df[name] = predicted_y_values
    bars_df

In [None]:
display(bars_df.tail())

In [None]:
bars_df["High/Low Success"] = np.where(
    (bars_df["Next Close"] >= bars_df["Predicted Low"]),
    np.where(
        bars_df["Next Close"] <= bars_df["Predicted High"], 1, 0
    ), 0
)

In [None]:
bars_df["Predicted Close Delta"] = bars_df["Predicted Close"] - bars_df["Close"]
bars_df["Next Close Delta"] = bars_df["Next Close"] - bars_df["Close"]
bars_df["Close Product"] = bars_df["Next Close Delta"] * bars_df["Predicted Close Delta"]
bars_df["Close Success"] = np.where(
    bars_df["Close Product"] >= 0, 1, 0
)

In [None]:
actual_close = bars_df.hvplot.line(
    x="Date",
    y="Next Close",
)

In [None]:
predicted_high = bars_df.hvplot.line(
    x="Date",
    y="Predicted High",
)

In [None]:
predicted_low = bars_df.hvplot.line(
    x="Date",
    y="Predicted Low",
)

In [None]:
predicted_close = bars_df.hvplot.line(
    x="Date",
    y="Predicted Close",
)

In [None]:
actual_close * predicted_high * predicted_low * predicted_close

In [None]:
bars_df.describe()

In [None]:
bars_df["Action"] = np.where(
    bars_df["Next Close"] > bars_df["Close"], 1, -1
)

In [None]:
display(bars_df.head())
display(bars_df.tail())

In [None]:
current_position = 0
starting_cash = 1000
current_cash = starting_cash
max_position = 50
for index, row in bars_df.iterrows():
    # Get the current action
    close = row["Close"]
    action = row["Action"]

    # Take the action if possible
    if action > 0:
        if action + current_position <= max_position and action * close < current_cash:
            current_position += action
            current_cash -= action*close
        else:
            action = 0
    elif action < 0:
        if action + current_position >= 0:
            current_position += action
            current_cash += -action*close
        else:
            action = 0

    # Update enabled and position
    bars_df.loc[index, "Position"] = current_position
    bars_df.loc[index, "Cash"] = current_cash

In [None]:
# Compute Holdings
bars_df["Holdings"] = bars_df["Close"] * bars_df["Position"]

# Compute Profit
bars_df["Strategy Value"] = bars_df["Holdings"] + bars_df["Cash"]
starting_close = bars_df.iloc[0]["Close"] 
display(f"starting_close {starting_close}")
starting_shares = math.floor(starting_cash / starting_close)
display(f"starting_shares {starting_shares}")
bars_df["Stock Value"] =  bars_df["Close"] * starting_shares

# Compute Returns
bars_df["Stock Returns"] = bars_df["Close"].pct_change()
bars_df["Strategy Returns"] = bars_df["Strategy Value"].pct_change()

# Compute Cumulative Daily Returns
bars_df["Stock Cumulative Returns"] = (bars_df["Stock Returns"] + 1).cumprod()
bars_df["Strategy Cumulative Returns"] = (bars_df["Strategy Returns"] + 1).cumprod()

In [None]:
bars_df.tail()

In [None]:
returns = bars_df.hvplot.line(
    x="Date",
    y=["Strategy Cumulative Returns", "Stock Cumulative Returns"],
).opts(
    title=f"Stock vs Strategy Returns for {symbol} using Random Forest Regressor",
    ylabel="Percent Returns"
)
returns

In [None]:
portfolio = bars_df.hvplot.line(
    x="Date",
    y=["Cash", "Holdings"],
).opts(
    title=f"Cash vs Holdings for {symbol} using Random Forest Regressor",
    ylabel="Dollar Value"
)
portfolio

In [None]:
position = bars_df.hvplot.line(
    x="Date",
    y=["Position"],
).opts(
    title=f"Number of shares of {symbol} using Random Forest Regressor",
    ylabel="Number of Shares"
)
position