In [45]:
#imports
import numpy as np
import os
import pandas as pd

import math

from datetime import datetime, timedelta
from dotenv import load_dotenv
 
import yfinance as yf

import hvplot.pandas

from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import balanced_accuracy_score
from sklearn.linear_model import ElasticNet




In [46]:
load_dotenv()

True

In [47]:
source_df = None

In [48]:
symbol = 'AMZN'
file_name = f"{symbol}.csv"
source_df = None

In [49]:
if os.path.exists(file_name):
    source_df = pd.read_csv(file_name)

In [50]:

if source_df is None:
    display("download")
    ticker = yf.Ticker(symbol)
    start = datetime.utcnow() - timedelta(days=10*365)
    end=datetime.utcnow() - timedelta(days=1)
    source_df = ticker.history(start=start, end=end)
    source_df.to_csv(f"{symbol}.csv")

In [51]:
display(source_df.tail())

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
2508,2024-02-16 00:00:00-05:00,168.740005,170.419998,167.169998,169.509995,48074600,0.0,0.0
2509,2024-02-20 00:00:00-05:00,167.830002,168.710007,165.740005,167.080002,41980300,0.0,0.0
2510,2024-02-21 00:00:00-05:00,168.940002,170.229996,167.139999,168.589996,44575600,0.0,0.0
2511,2024-02-22 00:00:00-05:00,173.100006,174.800003,171.770004,174.580002,55392400,0.0,0.0
2512,2024-02-23 00:00:00-05:00,174.279999,175.75,173.699997,174.990005,59662900,0.0,0.0


In [52]:
bars_df = source_df.copy()
bars_df["Next Close"] = bars_df["Close"].shift(-1)
bars_df["Next High"] = bars_df["High"].shift(-1)
bars_df["Next Low"] = bars_df["Low"].shift(-1)

#15 and 60 minute window for high,low and open
period = 1
windows = [3, 12]

for window in windows:
    wp = window*period
    bars_df[f"High - {wp}"] = bars_df["High"].rolling(window=window).max()
    bars_df[f"Low - {wp}"] = bars_df["Low"].rolling(window=window).min()
    bars_df[f"Open - {wp}"] = bars_df["Open"].shift(periods=window-1)
    
bars_df = bars_df.dropna()
display(bars_df)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,Next Close,Next High,Next Low,High - 3,Low - 3,Open - 3,High - 12,Low - 12,Open - 12
11,2014-03-18 00:00:00-04:00,18.865999,18.950001,18.750000,18.938499,49670000,0.0,0.0,18.661501,18.950001,18.471001,18.950001,18.577499,18.639999,19.155500,17.724001,17.937000
12,2014-03-19 00:00:00-04:00,18.938499,18.950001,18.471001,18.661501,52934000,0.0,0.0,18.448500,18.650000,18.311001,18.950001,18.471001,18.785999,19.155500,18.122999,18.195000
13,2014-03-20 00:00:00-04:00,18.532000,18.650000,18.311001,18.448500,51170000,0.0,0.0,18.031000,18.642000,17.920000,18.950001,18.311001,18.865999,19.155500,18.180500,18.206499
14,2014-03-21 00:00:00-04:00,18.549999,18.642000,17.920000,18.031000,108282000,0.0,0.0,17.592501,18.075001,17.430000,18.950001,17.920000,18.938499,19.155500,17.920000,18.702499
15,2014-03-24 00:00:00-04:00,18.004499,18.075001,17.430000,17.592501,97470000,0.0,0.0,17.735500,17.948500,17.441999,18.650000,17.430000,18.532000,19.155500,17.430000,18.729000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2507,2024-02-15 00:00:00-05:00,170.580002,171.169998,167.589996,169.800003,49855200,0.0,0.0,169.509995,170.419998,167.169998,171.210007,165.750000,167.729996,175.389999,154.809998,157.000000
2508,2024-02-16 00:00:00-05:00,168.740005,170.419998,167.169998,169.509995,48074600,0.0,0.0,167.080002,168.710007,165.740005,171.210007,167.169998,169.210007,175.389999,155.619995,155.869995
2509,2024-02-20 00:00:00-05:00,167.830002,168.710007,165.740005,167.080002,41980300,0.0,0.0,168.589996,170.229996,167.139999,171.169998,165.740005,170.580002,175.389999,165.740005,169.190002
2510,2024-02-21 00:00:00-05:00,168.940002,170.229996,167.139999,168.589996,44575600,0.0,0.0,174.580002,174.800003,171.770004,170.419998,165.740005,168.740005,175.389999,165.740005,170.199997


In [53]:
df = bars_df.reset_index()
ys = {
    "Predicted High": df["Next High"], 
    "Predicted Low": df["Next Low"], 
    "Predicted Close": df["Next Close"],
}

X = df.drop(columns=["Next Close", "Next Low", "Next High", "Date"])
display(X)

Unnamed: 0,index,Open,High,Low,Close,Volume,Dividends,Stock Splits,High - 3,Low - 3,Open - 3,High - 12,Low - 12,Open - 12
0,11,18.865999,18.950001,18.750000,18.938499,49670000,0.0,0.0,18.950001,18.577499,18.639999,19.155500,17.724001,17.937000
1,12,18.938499,18.950001,18.471001,18.661501,52934000,0.0,0.0,18.950001,18.471001,18.785999,19.155500,18.122999,18.195000
2,13,18.532000,18.650000,18.311001,18.448500,51170000,0.0,0.0,18.950001,18.311001,18.865999,19.155500,18.180500,18.206499
3,14,18.549999,18.642000,17.920000,18.031000,108282000,0.0,0.0,18.950001,17.920000,18.938499,19.155500,17.920000,18.702499
4,15,18.004499,18.075001,17.430000,17.592501,97470000,0.0,0.0,18.650000,17.430000,18.532000,19.155500,17.430000,18.729000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2496,2507,170.580002,171.169998,167.589996,169.800003,49855200,0.0,0.0,171.210007,165.750000,167.729996,175.389999,154.809998,157.000000
2497,2508,168.740005,170.419998,167.169998,169.509995,48074600,0.0,0.0,171.210007,167.169998,169.210007,175.389999,155.619995,155.869995
2498,2509,167.830002,168.710007,165.740005,167.080002,41980300,0.0,0.0,171.169998,165.740005,170.580002,175.389999,165.740005,169.190002
2499,2510,168.940002,170.229996,167.139999,168.589996,44575600,0.0,0.0,170.419998,165.740005,168.740005,175.389999,165.740005,170.199997


In [54]:
for name, y in ys.items():
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
    model = ElasticNet()
    model.fit(X_train, y_train)
    predicted_y_values = model.predict(X)
    # create copy of original data
    bars_df[name] = predicted_y_values
    bars_df

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


In [55]:
display(bars_df.tail())

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,Next Close,Next High,Next Low,High - 3,Low - 3,Open - 3,High - 12,Low - 12,Open - 12,Predicted High,Predicted Low,Predicted Close
2507,2024-02-15 00:00:00-05:00,170.580002,171.169998,167.589996,169.800003,49855200,0.0,0.0,169.509995,170.419998,167.169998,171.210007,165.75,167.729996,175.389999,154.809998,157.0,171.791441,168.015597,169.857266
2508,2024-02-16 00:00:00-05:00,168.740005,170.419998,167.169998,169.509995,48074600,0.0,0.0,167.080002,168.710007,165.740005,171.210007,167.169998,169.210007,175.389999,155.619995,155.869995,170.771779,167.091065,169.024534
2509,2024-02-20 00:00:00-05:00,167.830002,168.710007,165.740005,167.080002,41980300,0.0,0.0,168.589996,170.229996,167.139999,171.169998,165.740005,170.580002,175.389999,165.740005,169.190002,169.020139,165.2015,167.241072
2510,2024-02-21 00:00:00-05:00,168.940002,170.229996,167.139999,168.589996,44575600,0.0,0.0,174.580002,174.800003,171.770004,170.419998,165.740005,168.740005,175.389999,165.740005,170.199997,170.548265,166.881521,168.760008
2511,2024-02-22 00:00:00-05:00,173.100006,174.800003,171.770004,174.580002,55392400,0.0,0.0,174.990005,175.75,173.699997,174.800003,165.740005,167.830002,175.389999,165.740005,169.389999,175.852239,171.895185,173.64042


In [56]:
bars_df["High/Low Success"] = np.where(
    (bars_df["Next Close"] >= bars_df["Predicted Low"]),
    np.where(
        bars_df["Next Close"] <= bars_df["Predicted High"], 1, 0
    ), 0
)

In [57]:
bars_df["Predicted Close Delta"] = bars_df["Predicted Close"] - bars_df["Close"]
bars_df["Next Close Delta"] = bars_df["Next Close"] - bars_df["Close"]
bars_df["Close Product"] = bars_df["Next Close Delta"] * bars_df["Predicted Close Delta"]
bars_df["Close Success"] = np.where(
    bars_df["Close Product"] >= 0, 1, 0
)

In [58]:
actual_close = bars_df.hvplot.line(
    x="Date",
    y="Next Close",
)

In [59]:
predicted_high = bars_df.hvplot.line(
    x="Date",
    y="Predicted High",
)

In [60]:
predicted_low = bars_df.hvplot.line(
    x="Date",
    y="Predicted Low",
)

In [61]:
predicted_close = bars_df.hvplot.line(
    x="Date",
    y="Predicted Close",
)

In [62]:
actual_close * predicted_high * predicted_low * predicted_close

In [63]:
bars_df.describe()

Unnamed: 0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Next Close,Next High,Next Low,...,Low - 12,Open - 12,Predicted High,Predicted Low,Predicted Close,High/Low Success,Predicted Close Delta,Next Close Delta,Close Product,Close Success
count,2501.0,2501.0,2501.0,2501.0,2501.0,2501.0,2501.0,2501.0,2501.0,2501.0,...,2501.0,2501.0,2501.0,2501.0,2501.0,2501.0,2501.0,2501.0,2501.0,2501.0
mean,86.860588,87.866244,85.76735,86.838953,79825060.0,0.0,0.007997,86.901348,87.928939,85.829305,...,81.869991,86.193883,87.905722,85.802499,86.882463,0.457817,0.04351,0.062396,0.029751,0.495802
std,51.487107,52.118155,50.807534,51.45775,41363240.0,0.0,0.39992,51.469993,52.129529,50.820252,...,48.643926,51.386203,52.069047,50.75815,51.397291,0.498317,0.803654,2.148036,2.576632,0.500082
min,14.22,14.521,14.2,14.3475,17626000.0,0.0,0.0,14.3475,14.521,14.2,...,14.2,14.22,14.263755,12.376203,13.43428,0.0,-4.316161,-20.314995,-25.309275,0.0
25%,38.190498,38.387001,37.862499,38.050499,53594000.0,0.0,0.0,38.054501,38.404499,37.879002,...,36.834999,38.002499,38.543303,37.901636,38.305076,0.0,-0.289048,-0.544998,-0.178776,0.0
50%,88.339996,89.208,87.283997,88.25,68408000.0,0.0,0.0,88.2565,89.293999,87.290001,...,84.252998,88.0,89.3882,87.147353,88.280948,0.0,0.083824,0.052498,-0.00045,0.0
75%,130.570007,132.050003,128.770004,130.149994,92710000.0,0.0,0.0,130.220001,132.070007,128.800003,...,123.660004,129.848999,131.910028,128.724672,130.367348,1.0,0.299889,0.739998,0.169546,1.0
max,187.199997,188.654007,184.839493,186.570496,477122000.0,0.0,20.0,186.570496,188.654007,184.839493,...,174.957993,187.199997,188.22141,184.660555,186.203357,1.0,4.917222,18.793991,29.698221,1.0


In [64]:
bars_df["Action"] = np.where(
    bars_df["Next Close"] > bars_df["Close"], 1, -1
)

In [65]:
display(bars_df.head())
display(bars_df.tail())

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,Next Close,Next High,...,Open - 12,Predicted High,Predicted Low,Predicted Close,High/Low Success,Predicted Close Delta,Next Close Delta,Close Product,Close Success,Action
11,2014-03-18 00:00:00-04:00,18.865999,18.950001,18.75,18.938499,49670000,0.0,0.0,18.661501,18.950001,...,17.937,19.00811,19.07317,19.034744,0,0.096244,-0.276999,-0.026659,0,-1
12,2014-03-19 00:00:00-04:00,18.938499,18.950001,18.471001,18.661501,52934000,0.0,0.0,18.4485,18.65,...,18.195,18.921737,18.938262,18.922481,0,0.26098,-0.213001,-0.055589,0,-1
13,2014-03-20 00:00:00-04:00,18.532,18.65,18.311001,18.4485,51170000,0.0,0.0,18.031,18.642,...,18.206499,18.623439,18.629348,18.634537,0,0.186038,-0.4175,-0.077671,0,-1
14,2014-03-21 00:00:00-04:00,18.549999,18.642,17.92,18.031,108282000,0.0,0.0,17.592501,18.075001,...,18.702499,18.439341,18.100165,18.278244,0,0.247244,-0.438499,-0.108416,0,-1
15,2014-03-24 00:00:00-04:00,18.004499,18.075001,17.43,17.592501,97470000,0.0,0.0,17.7355,17.9485,...,18.729,17.928969,17.607864,17.786696,1,0.194195,0.143,0.02777,1,1


Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,Next Close,Next High,...,Open - 12,Predicted High,Predicted Low,Predicted Close,High/Low Success,Predicted Close Delta,Next Close Delta,Close Product,Close Success,Action
2507,2024-02-15 00:00:00-05:00,170.580002,171.169998,167.589996,169.800003,49855200,0.0,0.0,169.509995,170.419998,...,157.0,171.791441,168.015597,169.857266,1,0.057262,-0.290009,-0.016607,0,-1
2508,2024-02-16 00:00:00-05:00,168.740005,170.419998,167.169998,169.509995,48074600,0.0,0.0,167.080002,168.710007,...,155.869995,170.771779,167.091065,169.024534,0,-0.48546,-2.429993,1.179665,1,-1
2509,2024-02-20 00:00:00-05:00,167.830002,168.710007,165.740005,167.080002,41980300,0.0,0.0,168.589996,170.229996,...,169.190002,169.020139,165.2015,167.241072,1,0.161071,1.509995,0.243216,1,1
2510,2024-02-21 00:00:00-05:00,168.940002,170.229996,167.139999,168.589996,44575600,0.0,0.0,174.580002,174.800003,...,170.199997,170.548265,166.881521,168.760008,0,0.170012,5.990005,1.01837,1,1
2511,2024-02-22 00:00:00-05:00,173.100006,174.800003,171.770004,174.580002,55392400,0.0,0.0,174.990005,175.75,...,169.389999,175.852239,171.895185,173.64042,1,-0.939581,0.410004,-0.385232,0,1


In [66]:
current_position = 0
starting_cash = 1000
current_cash = starting_cash
max_position = 50
for index, row in bars_df.iterrows():
   
    close = row["Close"]
    action = row["Action"]

    if action > 0:
        if action + current_position <= max_position and action * close < current_cash:
            current_position += action
            current_cash -= action*close
        else:
            action = 0
    elif action < 0:
        if action + current_position >= 0:
            current_position += action
            current_cash += -action*close
        else:
            action = 0

    
    bars_df.loc[index, "Position"] = current_position
    bars_df.loc[index, "Cash"] = current_cash

In [67]:
bars_df["Holdings"] = bars_df["Close"] * bars_df["Position"]

# profit
bars_df["Strategy Value"] = bars_df["Holdings"] + bars_df["Cash"]
starting_close = bars_df.iloc[0]["Close"] 
display(f"starting_close {starting_close}")
starting_shares = math.floor(starting_cash / starting_close)
display(f"starting_shares {starting_shares}")
bars_df["Stock Value"] =  bars_df["Close"] * starting_shares

# returns
bars_df["Stock Returns"] = bars_df["Close"].pct_change()
bars_df["Strategy Returns"] = bars_df["Strategy Value"].pct_change()

# cumulative daily returns
bars_df["Stock Cumulative Returns"] = (bars_df["Stock Returns"] + 1).cumprod()
bars_df["Strategy Cumulative Returns"] = (bars_df["Strategy Returns"] + 1).cumprod()

'starting_close 18.938499450683597'

'starting_shares 52'

In [68]:
bars_df.tail()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,Next Close,Next High,...,Action,Position,Cash,Holdings,Strategy Value,Stock Value,Stock Returns,Strategy Returns,Stock Cumulative Returns,Strategy Cumulative Returns
2507,2024-02-15 00:00:00-05:00,170.580002,171.169998,167.589996,169.800003,49855200,0.0,0.0,169.509995,170.419998,...,-1,46.0,1949.792306,7810.80014,9760.592446,8829.600159,-0.006901,-0.00565,8.965864,9.760592
2508,2024-02-16 00:00:00-05:00,168.740005,170.419998,167.169998,169.509995,48074600,0.0,0.0,167.080002,168.710007,...,-1,45.0,2119.3023,7627.949753,9747.252053,8814.519714,-0.001708,-0.001367,8.95055,9.747252
2509,2024-02-20 00:00:00-05:00,167.830002,168.710007,165.740005,167.080002,41980300,0.0,0.0,168.589996,170.229996,...,1,46.0,1952.222299,7685.680084,9637.902383,8688.160095,-0.014335,-0.011219,8.822241,9.637902
2510,2024-02-21 00:00:00-05:00,168.940002,170.229996,167.139999,168.589996,44575600,0.0,0.0,174.580002,174.800003,...,1,47.0,1783.632302,7923.729828,9707.36213,8766.67981,0.009038,0.007207,8.901972,9.707362
2511,2024-02-22 00:00:00-05:00,173.100006,174.800003,171.770004,174.580002,55392400,0.0,0.0,174.990005,175.75,...,1,48.0,1609.0523,8379.840088,9988.892388,9078.160095,0.03553,0.029002,9.218259,9.988892


In [69]:
stock_cumulative_returns = bars_df.hvplot.line(
    x="Date",
    y="Stock Cumulative Returns",
)

In [70]:
strategy_cumulative_returns = bars_df.hvplot.line(
    x="Date",
    y="Strategy Cumulative Returns",
)

In [71]:
stock_cumulative_returns * strategy_cumulative_returns

In [72]:
cash = bars_df.hvplot.line(
    x="Date",
    y="Cash",
)

In [73]:
holdings = bars_df.hvplot.line(
    x="Date",
    y="Holdings",
)

In [74]:
cash*holdings

In [75]:
position = bars_df.hvplot.line(
    x="Date",
    y="Position",
)
position