In [None]:
import numpy as np
import os
import pandas as pd

from datetime import datetime, timedelta
from dotenv import load_dotenv
 
from alpaca.data.requests import StockBarsRequest
from alpaca.data.historical.stock import StockHistoricalDataClient
from  alpaca.data.timeframe import TimeFrame, TimeFrameUnit

import hvplot.pandas

from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import balanced_accuracy_score
from sklearn.linear_model import LinearRegression

In [None]:
load_dotenv()

In [None]:
source_df = None

In [None]:
symbol = 'TSLA'
file_name = f"{symbol}.csv"
source_df = None

In [None]:
if os.path.exists(file_name):
    source_df = pd.read_csv(file_name)

In [None]:
if source_df is None:
    display("download")
    ALPACA_API_KEY = os.getenv("ALPACA_API_KEY")
    ALPACA_SECRET_KEY = os.getenv("ALPACA_API_SECRET")
    client = StockHistoricalDataClient(ALPACA_API_KEY, ALPACA_SECRET_KEY)
    timeframe = TimeFrame(1, TimeFrameUnit.Day)
    start = datetime.utcnow() - timedelta(days=10*365)
    end=datetime.utcnow() - timedelta(days=1)
    request = StockBarsRequest(symbol_or_symbols=symbol, start=start, end=end, timeframe=timeframe)
    source_df = client.get_stock_bars(request).df.tz_convert('America/New_York', level=1)
    source_df.to_csv(f"{symbol}.csv")

In [None]:
display(source_df.tail())

In [None]:
bars_df = source_df.copy()
bars_df["next close"] = bars_df["close"].shift(-1)
bars_df["next high"] = bars_df["high"].shift(-1)
bars_df["next low"] = bars_df["low"].shift(-1)
bars_df = bars_df.reset_index(level=[0]).drop(columns=["symbol"])
display(bars_df)

In [None]:
# these values are the high, low, and open as a percentage of the current close
bars_df["high %"] = (bars_df["high"] - bars_df["close"])/bars_df["close"]
bars_df["low %"] = (bars_df["low"] - bars_df["close"])/bars_df["close"]
bars_df["open %"] = (bars_df["open"] - bars_df["close"])/bars_df["close"]

In [None]:
display(bars_df.head(100))
display(bars_df.tail(100))

In [None]:
# So we want to add the high, low, and open for the 15 minute and 60 minute window
period = 1
windows = [3, 12]

In [None]:
for window in windows:
    wp = window*period
    # compute the rolling high, low, open for a given window.  the close is the same at this time for all windows
    bars_df[f"high - {wp}"] = bars_df["high"].rolling(window=window).max()
    bars_df[f"low - {wp}"] = bars_df["low"].rolling(window=window).min()
    bars_df[f"open - {wp}"] = bars_df["open"].shift(periods=window-1)
    # these values are the high, low, and open as a percentage of the current close
    bars_df[f"high % - {wp}"] = (bars_df[f"high - {wp}"] - bars_df["close"])/bars_df["close"]
    bars_df[f"low % - {wp}"] = (bars_df[f"low - {wp}"] - bars_df["close"])/bars_df["close"]
    bars_df[f"open % - {wp}"] = (bars_df[f"open - {wp}"] - bars_df["close"])/bars_df["close"]

In [None]:
bars_df = bars_df.dropna()
display(bars_df.head(100))
display(bars_df.tail(100))

In [None]:
df = bars_df.reset_index()

ys = {
    "predicted high": df["next high"], 
    "predicted low": df["next low"], 
    "predicted close": df["next close"],
}

X = df.drop(columns=["next close", "next low", "next high", "timestamp"])
display(X)

In [None]:
for name, y in ys.items():
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
    model = LinearRegression()
    model.fit(X_train, y_train)
    predicted_y_values = model.predict(X)
    # Create a copy of the original data
    bars_df[name] = predicted_y_values
    bars_df

In [None]:
display(bars_df.tail())

In [None]:
bars_df["high/low success"] = np.where(
    (bars_df["next close"] >= bars_df["predicted low"]),
    np.where(
        bars_df["next close"] <= bars_df["predicted high"], 1, 0
    ), 0
)

In [None]:
bars_df["predicted close delta"] = bars_df["predicted close"] - bars_df["close"]
bars_df["next close delta"] = bars_df["next close"] - bars_df["close"]
bars_df["close product"] = bars_df["next close delta"] * bars_df["predicted close delta"]
bars_df["close success"] = np.where(
    bars_df["close product"] >= 0, 1, 0
)

In [None]:
actual_close = bars_df.hvplot.line(
    x="timestamp",
    y="close",
    title="Actual Close"
)
actual_close

In [None]:
predicted_high = bars_df.hvplot.line(
    x="timestamp",
    y="predicted high",
    title="Predicted High"
)
predicted_high

In [None]:
predicted_low = bars_df.hvplot.line(
    x="timestamp",
    y="predicted low",
    title="Predicted Low"
)
predicted_low

In [None]:
actual_close * predicted_high * predicted_low

In [None]:
bars_df.describe()