In [1]:
# Import standard library modules
import sys

# Set the relative path to the project root directory
relative_path_to_root = "../../../"

# Add the project root to the system path for importing in-house modules
sys.path.append(relative_path_to_root)

# Import in-house modules from the 'utilities' package
from utilities import calculate_bollinger_bands, calculate_rsi
from utilities import print_title, print_label, print_footer

In [2]:
# Import libraries for data analysis and visualization
import pandas as pd
import numpy as np
import yfinance as yf # Yahoo Finance data retrieval

# Import libraries for signal processing and peak detection
from scipy.signal import find_peaks

# Import libraries for plotting and visualization
import matplotlib.pyplot as plt
import hvplot.pandas
import holoviews as hv
hv.extension('bokeh') # Use Bokeh as the backend for Holoviews

# Import libraries for data storage and retrieval
from pathlib import Path

In [3]:
# Set display options for Pandas (optional, but often helpful)
pd.set_option('display.float_format', lambda x: '%.3f' % x) # Suppress scientific notation

In [5]:
data = pd.read_csv(
    Path("../../../data/raw_data/sp500_adj_close_2000-01-01_2024-10-14.csv"),
    index_col='Date',
    parse_dates=True
)

print("Shape:", data.shape)
data

Shape: (6234, 501)


Unnamed: 0_level_0,A,AAPL,ABBV,ABNB,ABT,ACGL,ACN,ADBE,ADI,ADM,...,WTW,WY,WYNN,XEL,XOM,XYL,YUM,ZBH,ZBRA,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000-01-03 00:00:00+00:00,43.463,0.844,0.000,0.000,8.288,1.278,0.000,16.275,28.215,6.307,...,0.000,11.353,0.000,6.848,18.036,0.000,4.634,0.000,25.028,0.000
2000-01-04 00:00:00+00:00,40.143,0.773,0.000,0.000,8.051,1.271,0.000,14.909,26.787,6.242,...,0.000,10.926,0.000,7.006,17.690,0.000,4.541,0.000,24.667,0.000
2000-01-05 00:00:00+00:00,37.653,0.784,0.000,0.000,8.037,1.389,0.000,15.204,27.178,6.143,...,0.000,11.505,0.000,7.276,18.655,0.000,4.564,0.000,25.139,0.000
2000-01-06 00:00:00+00:00,36.219,0.716,0.000,0.000,8.318,1.375,0.000,15.328,26.435,6.176,...,0.000,12.043,0.000,7.209,19.619,0.000,4.526,0.000,23.778,0.000
2000-01-07 00:00:00+00:00,39.237,0.750,0.000,0.000,8.407,1.451,0.000,16.073,27.178,6.274,...,0.000,11.647,0.000,7.209,19.562,0.000,4.425,0.000,23.514,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-07 00:00:00+00:00,143.890,221.690,192.076,130.230,112.828,107.790,355.354,487.300,226.510,58.860,...,287.990,32.850,106.410,63.010,125.370,134.030,135.880,102.280,365.240,187.280
2024-10-08 00:00:00+00:00,144.610,225.770,191.481,132.500,114.092,109.910,358.919,496.240,229.400,57.550,...,292.940,33.110,102.880,62.930,122.040,134.200,134.720,101.770,368.970,187.370
2024-10-09 00:00:00+00:00,144.800,229.540,193.207,135.490,115.406,113.090,363.600,494.080,233.240,56.970,...,298.050,33.060,104.150,62.580,122.090,136.290,134.760,103.460,373.500,190.170
2024-10-10 00:00:00+00:00,142.740,229.040,192.810,132.810,115.078,113.900,361.070,503.570,230.700,57.290,...,290.240,32.630,103.700,62.160,123.140,134.170,133.270,102.820,369.240,190.280


In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 6234 entries, 2000-01-03 00:00:00+00:00 to 2024-10-11 00:00:00+00:00
Columns: 501 entries, A to ZTS
dtypes: float64(501)
memory usage: 23.9 MB


### Data Preprocessing
___

In [126]:
data.isnull().sum().sum()

0

In [150]:
def predict_signals(today_to_tomorrow, yesterday_to_today):
    match (today_to_tomorrow, yesterday_to_today):
        case (1, -1):
            return "buy"
        case (1, 1):
            return "hold"
        case (-1, -1):
            return "short"
        case (-1, 1):
            return "sell"
        case _:
            return pd.NA

In [151]:
def process_ticker(df, ticker):
    new_cols = pd.DataFrame(index=df.index)
    new_cols[f"{ticker}_next"] = df[ticker].shift(-1)
    new_cols[f"{ticker}_today_to_tomorrow"] = np.sign(new_cols[f"{ticker}_next"] - df[ticker])
    new_cols[f"{ticker}_prev"] = df[ticker].shift(1)
    new_cols[f"{ticker}_yesterday_to_today"] = np.sign(df[ticker] - new_cols[f"{ticker}_prev"])
    new_cols[f"{ticker}_action"] = new_cols.apply(
        lambda x: predict_signals(x[f"{ticker}_today_to_tomorrow"], x[f"{ticker}_yesterday_to_today"]), axis=1
    )
    return new_cols[f"{ticker}_action"]

In [205]:
test_df = data.copy()
test_df

tickers = test_df.columns.to_numpy()
tickers

# Replace ticker values with action signals

for ticker in tickers:
    test_df[ticker] = process_ticker(test_df, ticker)

test_df.head(20)


KeyboardInterrupt: 

In [200]:
def predict_signals(today_to_tomorrow, yesterday_to_today, prev_action):
    # If difference of today to tomorrow or yesterday to today is NaN, return NA
    if pd.isna(today_to_tomorrow) or pd.isna(yesterday_to_today):
        return pd.NA
    
    match (today_to_tomorrow, yesterday_to_today):
        # Buy signal: today to tomorrow (+) and yesterday to today (-)
        case (1, -1):
            return "buy"
        # Hold signal: today to tomorrow (+) and yesterday to today (+)
        case (1, 1):
            return "hold"
        # Short signal: today to tomorrow (-) and yesterday to today (-)
        case (-1, -1):
            return "short"
        # Sell signal: today to tomorrow (-) and yesterday to today (+)
        case (-1, 1):
            return "sell"
        
        # Special cases for when the difference is zero (Neutral signal)

        # Hold signal (No change: Today to Tomorrow): today to tomorrow (0) and yesterday to today (+)
        case (0, 1):
            return "hold"
        # Short signal (No change: Today to Tomorrow): today to tomorrow (0) and yesterday to today (-)
        case (0, -1):
            return "short"
        
        # Hold signal (No change: Yesterday to Today): today to tomorrow (+) and yesterday to today (0)
        case (1, 0):
            return "hold"
        
        # Short signal (No change: Yesterday to Today): today to tomorrow (-) and yesterday to today (0)
        case (-1, 0):
            return "short"
        
        # Hold or Short signal (No change: Both): today to tomorrow (0) and yesterday to today (0)
        case (0, 0):
            if not pd.isna(prev_action) and prev_action in ["buy", "hold"]:
                return "hold"
            elif not pd.isna(prev_action) and prev_action in ["sell", "short"]:
                return "short"
            else:
                return pd.NA
        
        case _:
            return pd.NA

In [195]:
def process_ticker(df, ticker):
    new_cols = pd.DataFrame(index=df.index)
    new_cols[f"{ticker}_next"] = df[ticker].shift(-1)
    new_cols[f"{ticker}_today_to_tomorrow"] = np.sign(new_cols[f"{ticker}_next"] - df[ticker])
    new_cols[f"{ticker}_prev"] = df[ticker].shift(1)
    new_cols[f"{ticker}_yesterday_to_today"] = np.sign(df[ticker] - new_cols[f"{ticker}_prev"])
    new_cols[f"{ticker}_action"] = new_cols.apply(
        lambda x: predict_signals(x[f"{ticker}_today_to_tomorrow"], x[f"{ticker}_yesterday_to_today"]), axis=1
    )
    return new_cols[f"{ticker}_action"]

In [None]:
def process_ticker(df, ticker):
    new_cols = pd.DataFrame(index=df.index)
    new_cols[f"{ticker}_next"] = df[ticker].shift(-1)
    new_cols[f"{ticker}_today_to_tomorrow"] = np.sign(new_cols[f"{ticker}_next"] - df[ticker])
    new_cols[f"{ticker}_prev"] = df[ticker].shift(1)
    new_cols[f"{ticker}_yesterday_to_today"] = np.sign(df[ticker] - new_cols[f"{ticker}_prev"])
    
    # Initialize the action column with NA
    new_cols[f"{ticker}_action"] = pd.NA
    
    # Iterate over the rows to apply the predict_signals function
    for i in range(1, len(new_cols) + 1):
        prev_action = new_cols[f"{ticker}_action"].iloc[i-1] if i > 0 else pd.NA
        new_cols.loc[new_cols.index[i], f"{ticker}_action"] = predict_signals(
            new_cols[f"{ticker}_today_to_tomorrow"].iloc[i],
            new_cols[f"{ticker}_yesterday_to_today"].iloc[i],
            prev_action
        )
    
    return new_cols[f"{ticker}_action"]

In [203]:
def process_ticker(df, ticker):
    new_cols = pd.DataFrame(index=df.index)
    new_cols[f"{ticker}_next"] = df[ticker].shift(-1)
    new_cols[f"{ticker}_today_to_tomorrow"] = np.sign(new_cols[f"{ticker}_next"] - df[ticker])
    new_cols[f"{ticker}_prev"] = df[ticker].shift(1)
    new_cols[f"{ticker}_yesterday_to_today"] = np.sign(df[ticker] - new_cols[f"{ticker}_prev"])
    
    # Initialize the action column with NA
    new_cols[f"{ticker}_action"] = pd.NA
    
    # Iterate over the rows to apply the predict_signals function
    for i in range(1, len(new_cols)):
        prev_action = new_cols[f"{ticker}_action"].iloc[i-1] if i > 0 else pd.NA
        new_cols.loc[new_cols.index[i], f"{ticker}_action"] = predict_signals(
            new_cols[f"{ticker}_today_to_tomorrow"].iloc[i],
            new_cols[f"{ticker}_yesterday_to_today"].iloc[i],
            prev_action
        )
    
    return new_cols[f"{ticker}_action"]

In [206]:
# Assuming test_df is your DataFrame
test_df = data[["ACGL"]].copy()

# Process the ticker data
test_df["ACGL_action"] = process_ticker(test_df, "ACGL")

# Display the DataFrame with the action signals
test_df.head(30)

Unnamed: 0_level_0,ACGL,ACGL_action
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2000-01-03 00:00:00+00:00,1.278,
2000-01-04 00:00:00+00:00,1.271,buy
2000-01-05 00:00:00+00:00,1.389,sell
2000-01-06 00:00:00+00:00,1.375,buy
2000-01-07 00:00:00+00:00,1.451,hold
2000-01-10 00:00:00+00:00,1.5,hold
2000-01-11 00:00:00+00:00,1.514,hold
2000-01-12 00:00:00+00:00,1.514,hold
2000-01-13 00:00:00+00:00,1.583,hold
2000-01-14 00:00:00+00:00,1.653,hold


___

In [209]:
def predict_signal(today_to_tomorrow, yesterday_to_today, prev_action):

    # If either `today_to_tomorrow` or `yesterday_to_today` difference is NaN, return NA
    if pd.isna(today_to_tomorrow) or pd.isna(yesterday_to_today):
        return pd.NA
    
    # Define a signal map to determine the action based on the difference values
    signal_map = {
        (1, -1): "buy",    # Buy signal: rising tomorrow, falling today
        (1, 1): "hold",    # Hold signal: rising tomorrow and today
        (0, 1): "hold",    # Hold signal: no change tomorrow, rising today
        (1, 0): "hold",    # Hold signal: rising tomorrow, no change today
        (-1, 1): "sell",   # Sell signal: falling tomorrow, rising today
        (-1, -1): "short", # Short signal: falling tomorrow and today
        (0, -1): "short",  # Short signal: no change tomorrow, falling today
        (-1, 0): "short",  # Short signal: falling tomorrow, no change today
    }

    # If the difference values are in the signal map...
    # Return the corresponding signal from the map: [buy, hold, short, sell]
    if (today_to_tomorrow, yesterday_to_today) in signal_map:
        return signal_map[(today_to_tomorrow, yesterday_to_today)]
    
    # If the difference values are both zero...
    # Handle neutral signals (when both today-to-tomorrow and yesterday-to-today) are zero
    if (today_to_tomorrow, yesterday_to_today) == (0, 0):
        # If there is a previous action...
        # return hold if the previous action was `buy` or `hold`, otherwise return `short`
        if not pd.isna(prev_action):
            return "hold" if prev_action in ["buy", "hold"] else "short"
        return pd.NA

    # Return NA for any other cases
    return pd.NA


def generate_trading_signals (df, ticker):
    next_day = df[ticker].shift(-1)  # Next day's price data
    prev_day = df[ticker].shift(1)  # Previous day's price data

    # Calculate the difference between today's and tomorrow's prices and yesterday's and today's prices
    # (+1: rise, -1: fall, 0: no change)
    today_to_tomorrow = np.sign(next_day - df[ticker])
    yesterday_to_today = np.sign(df[ticker] - prev_day)

    # Initialize the action column with NA
    # This column will store the action signals for each day: [buy, hold, short, sell]
    actions = pd.Series(
        pd.NA,
        index=df.index
    )

    # Iterate over the rows and apply the predict_signal function
    for i in range(1, len(df)):
        prev_action = actions.iloc[i - 1] if i > 0 else pd.NA
        actions.iloc[i] = predict_signal(
            today_to_tomorrow.iloc[i], yesterday_to_today.iloc[i], prev_action)

    return actions

In [210]:
# Assuming test_df is your DataFrame
test_df = data[["ACGL"]].copy()

# Process the ticker data
test_df["ACGL_action"] = process_ticker(test_df, "ACGL")

# Display the DataFrame with the action signals
test_df.head(30)

Unnamed: 0_level_0,ACGL,ACGL_action
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2000-01-03 00:00:00+00:00,1.278,
2000-01-04 00:00:00+00:00,1.271,buy
2000-01-05 00:00:00+00:00,1.389,sell
2000-01-06 00:00:00+00:00,1.375,buy
2000-01-07 00:00:00+00:00,1.451,hold
2000-01-10 00:00:00+00:00,1.5,hold
2000-01-11 00:00:00+00:00,1.514,hold
2000-01-12 00:00:00+00:00,1.514,hold
2000-01-13 00:00:00+00:00,1.583,hold
2000-01-14 00:00:00+00:00,1.653,hold


___

In [208]:
new_df = pd.DataFrame()

new_df["ACGL"] = data["ACGL"].copy()
new_df["ACGL_action"] = test_df["ACGL"].copy()

new_df.head(30)

Unnamed: 0_level_0,ACGL,ACGL_action
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2000-01-03 00:00:00+00:00,1.278,1.278
2000-01-04 00:00:00+00:00,1.271,1.271
2000-01-05 00:00:00+00:00,1.389,1.389
2000-01-06 00:00:00+00:00,1.375,1.375
2000-01-07 00:00:00+00:00,1.451,1.451
2000-01-10 00:00:00+00:00,1.5,1.5
2000-01-11 00:00:00+00:00,1.514,1.514
2000-01-12 00:00:00+00:00,1.514,1.514
2000-01-13 00:00:00+00:00,1.583,1.583
2000-01-14 00:00:00+00:00,1.653,1.653


In [147]:
test_df.head(20)

Unnamed: 0_level_0,A,AAPL,ABBV,ABNB,ABT,ACGL,ACN,ADBE,ADI,ADM,...,WTW,WY,WYNN,XEL,XOM,XYL,YUM,ZBH,ZBRA,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000-01-03 00:00:00+00:00,,,,,,,,,,,...,,,,,,,,,,
2000-01-04 00:00:00+00:00,short,buy,,,short,buy,,buy,buy,short,...,,buy,,hold,buy,,buy,,buy,
2000-01-05 00:00:00+00:00,short,sell,,,buy,sell,,hold,sell,buy,...,,hold,,sell,hold,,sell,,sell,
2000-01-06 00:00:00+00:00,buy,buy,,,hold,buy,,hold,buy,hold,...,,sell,,,sell,,short,,short,
2000-01-07 00:00:00+00:00,hold,sell,,,sell,hold,,hold,hold,,...,,short,,,short,,buy,,buy,
2000-01-10 00:00:00+00:00,sell,short,,,short,hold,,sell,sell,,...,,short,,,buy,,sell,,hold,
2000-01-11 00:00:00+00:00,short,short,,,short,,,short,buy,buy,...,,short,,,sell,,short,,sell,
2000-01-12 00:00:00+00:00,buy,buy,,,short,,,buy,hold,sell,...,,short,,sell,buy,,buy,,buy,
2000-01-13 00:00:00+00:00,hold,hold,,,buy,hold,,hold,hold,buy,...,,buy,,buy,sell,,sell,,hold,
2000-01-14 00:00:00+00:00,hold,hold,,,sell,hold,,sell,sell,hold,...,,sell,,sell,buy,,short,,sell,


In [133]:
ex = data[["AAPL"]].copy()

ex = process_ticker(ex, "AAPL")

ex

Unnamed: 0_level_0,AAPL,AAPL_next,AAPL_today_to_tomorrow,AAPL_prev,AAPL_yesterday_to_today,AAPL_action
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-01-03 00:00:00+00:00,0.844,0.773,-1.000,,,
2000-01-04 00:00:00+00:00,0.773,0.784,1.000,0.844,-1.000,buy
2000-01-05 00:00:00+00:00,0.784,0.716,-1.000,0.773,1.000,sell
2000-01-06 00:00:00+00:00,0.716,0.750,1.000,0.784,-1.000,buy
2000-01-07 00:00:00+00:00,0.750,0.737,-1.000,0.716,1.000,sell
...,...,...,...,...,...,...
2024-10-07 00:00:00+00:00,221.690,225.770,1.000,226.800,-1.000,buy
2024-10-08 00:00:00+00:00,225.770,229.540,1.000,221.690,1.000,hold
2024-10-09 00:00:00+00:00,229.540,229.040,-1.000,225.770,1.000,sell
2024-10-10 00:00:00+00:00,229.040,227.550,-1.000,229.540,-1.000,short


In [125]:
ex = data[["AAPL"]].copy()

ex["AAPL_next"] = ex["AAPL"].shift(-1)

ex["AAPL_tod_to_tomorrow"] = ex["AAPL_next"] - ex["AAPL"]
ex["AAPL_tod_to_tomorrow"] = np.sign(ex["AAPL_tod_to_tomorrow"])

ex["AAPL_prev"] = ex["AAPL"].shift(1)

ex["AAPL_yes_to_today"] = ex["AAPL"] - ex["AAPL_prev"]
ex["AAPL_yes_to_today"] = np.sign(ex["AAPL_yes_to_today"])

display(ex[["AAPL", "AAPL_next", "AAPL_tod_to_tomorrow", "AAPL_prev", "AAPL_yes_to_today"]].head(20))

# =============================================================================

def predict_signals(ticker_col):
    match (today_to_tomorrow, yesterday_to_today):
        case (1, -1):
            return "buy"
        case (1, 1):
            return "hold"
        case (-1, -1):
            return "short"
        case (-1, 1):
            return "sell"
        case _:
            return pd.NA

# =============================================================================
ex["action"] = ex.apply(lambda x: predict_signals(x["AAPL_tod_to_tomorrow"], x["AAPL_yes_to_today"]), axis=1)

display(ex[["AAPL", "action"]].head(20))
# display(ex[["AAPL", "action"]].tail(20))

Unnamed: 0_level_0,AAPL,AAPL_next,AAPL_tod_to_tomorrow,AAPL_prev,AAPL_yes_to_today
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2000-01-03 00:00:00+00:00,0.844,0.773,-1.0,,
2000-01-04 00:00:00+00:00,0.773,0.784,1.0,0.844,-1.0
2000-01-05 00:00:00+00:00,0.784,0.716,-1.0,0.773,1.0
2000-01-06 00:00:00+00:00,0.716,0.75,1.0,0.784,-1.0
2000-01-07 00:00:00+00:00,0.75,0.737,-1.0,0.716,1.0
2000-01-10 00:00:00+00:00,0.737,0.699,-1.0,0.75,-1.0
2000-01-11 00:00:00+00:00,0.699,0.657,-1.0,0.737,-1.0
2000-01-12 00:00:00+00:00,0.657,0.729,1.0,0.699,-1.0
2000-01-13 00:00:00+00:00,0.729,0.757,1.0,0.657,1.0
2000-01-14 00:00:00+00:00,0.757,0.784,1.0,0.729,1.0


Unnamed: 0_level_0,AAPL,action
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2000-01-03 00:00:00+00:00,0.844,
2000-01-04 00:00:00+00:00,0.773,buy
2000-01-05 00:00:00+00:00,0.784,sell
2000-01-06 00:00:00+00:00,0.716,buy
2000-01-07 00:00:00+00:00,0.75,sell
2000-01-10 00:00:00+00:00,0.737,short
2000-01-11 00:00:00+00:00,0.699,short
2000-01-12 00:00:00+00:00,0.657,buy
2000-01-13 00:00:00+00:00,0.729,hold
2000-01-14 00:00:00+00:00,0.757,hold


In [116]:
def determine_action(current_diff, next_diff):
    if pd.isna(current_diff) or pd.isna(next_diff):
        return np.nan
    elif current_diff > 0 and (pd.isna(next_diff) or next_diff < 0):
        return 'buy'
    elif current_diff > 0 and next_diff > 0:
        return 'hold'
    elif current_diff < 0 and next_diff > 0:
        return 'sell'
    else:
        return 'hold'

# Apply the determine_action function to create the action column
ex['action'] = ex.apply(
    lambda row: determine_action(row['AAPL_diff'], row['AAPL_next_diff']), axis=1
)


display(ex[['AAPL', 'action']].head(10))
display(ex[['AAPL', 'action']].tail(10))

KeyError: 'AAPL_diff'

In [61]:
# Create a difference column for each ticker
new_columns = {}

for ticker in tickers:
    new_columns[f"{ticker}_diff"] = test_df[f"{ticker}_next"] - test_df[ticker]

test_df = pd.concat([test_df, pd.DataFrame(new_columns)], axis=1)

test_df

Unnamed: 0_level_0,A,AAPL,ABBV,ABNB,ABT,ACGL,ACN,ADBE,ADI,ADM,...,WTW_diff,WY_diff,WYNN_diff,XEL_diff,XOM_diff,XYL_diff,YUM_diff,ZBH_diff,ZBRA_diff,ZTS_diff
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000-01-03 00:00:00+00:00,43.463,0.844,0.000,0.000,8.288,1.278,0.000,16.275,28.215,6.307,...,0.000,-0.426,0.000,0.158,-0.345,0.000,-0.093,0.000,-0.361,0.000
2000-01-04 00:00:00+00:00,40.143,0.773,0.000,0.000,8.051,1.271,0.000,14.909,26.787,6.242,...,0.000,0.579,0.000,0.270,0.964,0.000,0.023,0.000,0.472,0.000
2000-01-05 00:00:00+00:00,37.653,0.784,0.000,0.000,8.037,1.389,0.000,15.204,27.178,6.143,...,0.000,0.538,0.000,-0.068,0.964,0.000,-0.039,0.000,-1.361,0.000
2000-01-06 00:00:00+00:00,36.219,0.716,0.000,0.000,8.318,1.375,0.000,15.328,26.435,6.176,...,0.000,-0.396,0.000,0.000,-0.058,0.000,-0.101,0.000,-0.264,0.000
2000-01-07 00:00:00+00:00,39.237,0.750,0.000,0.000,8.407,1.451,0.000,16.073,27.178,6.274,...,0.000,-0.051,0.000,0.000,-0.273,0.000,0.179,0.000,0.792,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-07 00:00:00+00:00,143.890,221.690,192.076,130.230,112.828,107.790,355.354,487.300,226.510,58.860,...,4.950,0.260,-3.530,-0.080,-3.330,0.170,-1.160,-0.510,3.730,0.090
2024-10-08 00:00:00+00:00,144.610,225.770,191.481,132.500,114.092,109.910,358.919,496.240,229.400,57.550,...,5.110,-0.050,1.270,-0.350,0.050,2.090,0.040,1.690,4.530,2.800
2024-10-09 00:00:00+00:00,144.800,229.540,193.207,135.490,115.406,113.090,363.600,494.080,233.240,56.970,...,-7.810,-0.430,-0.450,-0.420,1.050,-2.120,-1.490,-0.640,-4.260,0.110
2024-10-10 00:00:00+00:00,142.740,229.040,192.810,132.810,115.078,113.900,361.070,503.570,230.700,57.290,...,-0.210,0.400,1.050,0.150,0.470,2.690,0.750,0.680,5.780,-0.200


In [71]:
ex = test_df[["AAPL", "AAPL_next", "AAPL_diff"]].copy()

ex["AAPL_next_diff"] = ex["AAPL_diff"].shift(-1)

ex

Unnamed: 0_level_0,AAPL,AAPL_next,AAPL_diff,AAPL_next_diff
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-03 00:00:00+00:00,0.844,0.773,-0.071,0.011
2000-01-04 00:00:00+00:00,0.773,0.784,0.011,-0.068
2000-01-05 00:00:00+00:00,0.784,0.716,-0.068,0.034
2000-01-06 00:00:00+00:00,0.716,0.750,0.034,-0.013
2000-01-07 00:00:00+00:00,0.750,0.737,-0.013,-0.038
...,...,...,...,...
2024-10-07 00:00:00+00:00,221.690,225.770,4.080,3.770
2024-10-08 00:00:00+00:00,225.770,229.540,3.770,-0.500
2024-10-09 00:00:00+00:00,229.540,229.040,-0.500,-1.490
2024-10-10 00:00:00+00:00,229.040,227.550,-1.490,


In [72]:
# def determine_action(current_price, next_price):

#     if pd.isna(current_price) or pd.isna(next_price):
#         return np.nan
    
#     difference = next_price - current_price

#     if difference > 0:
#         return "buy"
    
#     elif difference < 0:
#         return "sell"
    
#     else:

def determine_action(current_diff, next_diff):
    if pd.isna(current_diff):
        return np.nan
    elif current_diff > 0 and (pd.isna(next_diff) or next_diff < 0):
        return 'buy'
    elif current_diff > 0 and next_diff > 0:
        return 'hold'
    elif current_diff < 0 and next_diff > 0:
        return 'sell'
    else:
        return 'hold'
        

In [73]:
ex["action"] = ex.apply(lambda x: determine_action(x["AAPL_diff"], x["AAPL_next_diff"]), axis=1)

In [77]:
display(ex.head(10))
display(ex[["AAPL", "action"]].tail(10))

Unnamed: 0_level_0,AAPL,AAPL_next,AAPL_diff,AAPL_next_diff,action
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2000-01-03 00:00:00+00:00,0.844,0.773,-0.071,0.011,sell
2000-01-04 00:00:00+00:00,0.773,0.784,0.011,-0.068,buy
2000-01-05 00:00:00+00:00,0.784,0.716,-0.068,0.034,sell
2000-01-06 00:00:00+00:00,0.716,0.75,0.034,-0.013,buy
2000-01-07 00:00:00+00:00,0.75,0.737,-0.013,-0.038,hold
2000-01-10 00:00:00+00:00,0.737,0.699,-0.038,-0.042,hold
2000-01-11 00:00:00+00:00,0.699,0.657,-0.042,0.072,sell
2000-01-12 00:00:00+00:00,0.657,0.729,0.072,0.028,hold
2000-01-13 00:00:00+00:00,0.729,0.757,0.028,0.026,hold
2000-01-14 00:00:00+00:00,0.757,0.784,0.026,0.02,hold


Unnamed: 0_level_0,AAPL,action
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-09-30 00:00:00+00:00,233.0,sell
2024-10-01 00:00:00+00:00,226.21,buy
2024-10-02 00:00:00+00:00,226.78,sell
2024-10-03 00:00:00+00:00,225.67,buy
2024-10-04 00:00:00+00:00,226.8,sell
2024-10-07 00:00:00+00:00,221.69,hold
2024-10-08 00:00:00+00:00,225.77,buy
2024-10-09 00:00:00+00:00,229.54,hold
2024-10-10 00:00:00+00:00,229.04,hold
2024-10-11 00:00:00+00:00,227.55,


In [44]:
new_columns = {}

for ticker in tickers:
    # Create a new column for the next day's price
    next_day_column = f"{ticker}_next"
    test_df[next_day_column] = test_df[ticker].shift(-1)
    
    # Apply the determine_action function to create the action column
    new_columns[f"{ticker}_action"] = test_df.apply(
        lambda row: determine_action(row[ticker], row[next_day_column]), axis=1
    )

# Concatenate the new columns to the original DataFrame
test_df = pd.concat([test_df, pd.DataFrame(new_columns)], axis=1)

In [45]:
test_df[["AAPL", "AAPL_next", "AAPL_action"]]

Unnamed: 0_level_0,AAPL,AAPL_next,AAPL_action,AAPL_action
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-03 00:00:00+00:00,0.844,0.773,sell,sell
2000-01-04 00:00:00+00:00,0.773,0.784,buy,buy
2000-01-05 00:00:00+00:00,0.784,0.716,sell,sell
2000-01-06 00:00:00+00:00,0.716,0.750,buy,buy
2000-01-07 00:00:00+00:00,0.750,0.737,sell,sell
...,...,...,...,...
2024-10-07 00:00:00+00:00,221.690,225.770,buy,buy
2024-10-08 00:00:00+00:00,225.770,229.540,buy,buy
2024-10-09 00:00:00+00:00,229.540,229.040,sell,sell
2024-10-10 00:00:00+00:00,229.040,227.550,sell,sell


___

In [6]:
# Calculate various indicators
volatility = data.pct_change()

# Handle inf values in the volatility DataFrame
volatility.replace([np.inf, -np.inf], np.nan, inplace=True)

# Fill missing values
volatility.ffill(inplace=True)
volatility.bfill(inplace=True)

rsi = data.apply(calculate_rsi)

sma_50 = data.rolling(window=50).mean()
sma_100 = data.rolling(window=100).mean()
sma_200 = data.rolling(window=200).mean()

upper_band, lower_band = calculate_bollinger_bands(data)

support = data.rolling(window=50).min()

resistance = data.rolling(window=50).max()

In [7]:
# Align data lengths by filling forward and backward
volatility.ffill(inplace=True)
volatility.bfill(inplace=True)
rsi.ffill(inplace=True)
rsi.bfill(inplace=True)
sma_50.ffill(inplace=True)
sma_50.bfill(inplace=True)
sma_100.ffill(inplace=True)
sma_100.bfill(inplace=True)
sma_200.ffill(inplace=True)
sma_200.bfill(inplace=True)
upper_band.ffill(inplace=True)
upper_band.bfill(inplace=True)
lower_band.ffill(inplace=True)
lower_band.bfill(inplace=True)
support.ffill(inplace=True)
support.bfill(inplace=True)
resistance.ffill(inplace=True)
resistance.bfill(inplace=True)

In [None]:
print(f"Volatility shape: {volatility.shape}")
print(volatility.head())

print(f"RSI shape: {rsi.shape}")
print(rsi.head())

print(f"SMA_50 shape: {sma_50.shape}")
print(sma_50.head())

print(f"SMA_100 shape: {sma_100.shape}")
print(sma_100.head())

print(f"SMA_200 shape: {sma_200.shape}")
print(sma_200.head())

print(f"Upper Band shape: {upper_band.shape}")
print(upper_band.head())

print(f"Lower Band shape: {lower_band.shape}")
print(lower_band.head())

print(f"Support shape: {support.shape}")
print(support.head())

print(f"Resistance shape: {resistance.shape}")
print(resistance.head())

In [None]:
# Compile indicators into a single DataFrame
length = len(data) * len(data.columns)

indicators_df = pd.DataFrame({
    'Date': np.repeat(data.index, len(data.columns)),
    'Ticker': np.tile(data.columns, len(data)),
    'Adjusted Close': data.values.flatten(),
    'Volatility': volatility.values.flatten(),
    'RSI': rsi.values.flatten(),
    'SMA_50': sma_50.values.flatten(),
    'SMA_100': sma_100.values.flatten(),
    'SMA_200': sma_200.values.flatten(),
    'Upper Band': upper_band.values.flatten(),
    'Lower Band': lower_band.values.flatten(),
    'Support': support.values.flatten(),
    'Resistance': resistance.values.flatten(),
})

# Drop NaN values
indicators_df.dropna(subset=['Adjusted Close', 'Volatility', 'RSI', 'SMA_50', 'SMA_100', 'SMA_200', 'Upper Band', 'Lower Band', 'Support', 'Resistance'], inplace=True)

# Display DataFrame
display(indicators_df.head())

In [None]:
# Plot RSI and Bollinger Bands for a sample stock (e.g., AAPL)
ticker_sample = 'AAPL'
plt.figure(figsize=(14, 7))
plt.subplot(2, 1, 1)
plt.plot(data[ticker_sample], label=f'{ticker_sample} Price')
plt.plot(sma_50[ticker_sample], label='50-Day SMA', linestyle='--')
plt.plot(upper_band[ticker_sample], label='Upper Bollinger Band', linestyle='--', color='orange')
plt.plot(lower_band[ticker_sample], label='Lower Bollinger Band', linestyle='--', color='orange')
plt.fill_between(data.index, lower_band[ticker_sample], upper_band[ticker_sample], color='lightgray')
plt.legend()
plt.title(f'{ticker_sample} Price with Bollinger Bands')
plt.subplot(2, 1, 2)
plt.plot(rsi[ticker_sample], label='RSI', color='purple')
plt.axhline(70, color='red', linestyle='--')
plt.axhline(30, color='green', linestyle='--')
plt.legend()
plt.title(f'{ticker_sample} RSI')
plt.tight_layout()
plt.show()

In [None]:
# Prepare data for plotting
ticker_sample = 'AAPL'
data_sample = data[[ticker_sample]].copy()
data_sample['SMA_50'] = sma_50[ticker_sample]
data_sample['SMA_100'] = sma_100[ticker_sample]
data_sample['SMA_200'] = sma_200[ticker_sample]
data_sample['RSI'] = rsi[ticker_sample]
data_sample['Support'] = support[ticker_sample]
data_sample['Resistance'] = resistance[ticker_sample]

# Plot
price_plot = data_sample.hvplot.line(y=[ticker_sample, 'SMA_50', 'SMA_100', 'SMA_200'], title=f'{ticker_sample} Price with Bollinger Bands, SMA, and Resistance Levels', ylabel='Price')
support_resistance_plot = data_sample.hvplot.line(y=['Support', 'Resistance'], line_dash='dashed', color=['green', 'red'])
rsi_plot = data_sample.hvplot.line(y='RSI', title=f'{ticker_sample} RSI', ylabel='RSI').opts(yformatter='%.0f') * hv.HLine(70).opts(color='red', line_dash='dashed') * hv.HLine(30).opts(color='green', line_dash='dashed')

(price_plot * support_resistance_plot + rsi_plot).cols(1)
