In [64]:
import pandas as pd
import numpy as np
import yfinance as yf
import plotly.express as px
import plotly.graph_objects as go



In [2]:
def download_data(stocks):
    stocks_str = " ".join(stocks)
    return yf.Tickers(stocks_str)

In [21]:
russell_table = pd.read_html("https://en.wikipedia.org/wiki/Russell_1000_Index")
russell_list = list(russell_table[2]["Ticker"])

ticker_object = download_data(russell_list)


In [195]:
def add_moving_average(df):
    df["interday_chg"] = (df["Close"] - df["Open"]) / df["Open"]
    df["day_change"] = df["Close"].pct_change()

    for i in [3, 5, 10, 20, 60]:
        df[f"ma{i}"] = df["Close"].rolling(i).mean()
        df[f"ema{i}"] = df["Close"].ewm(span=i).mean()

    return df

def add_volatility(df):
    if "day_change" not in df.columns:
        df["day_change"] = df["Close"].pct_change()

    for i in [5, 20, 60]:
        df[f"volatility{i}"] = df["day_change"].rolling(i).std()

    return df

def add_norm_volume(df):
    for i in [5, 10, 20]:
        df[f"vol{i}"] = df["Volume"].rolling(i).mean() / df["Volume"]
        
    return df

def add_bollinger_band(df):
    if "ma20" not in df.columns:
        df["ma20"] = df["Close"].rolling(20).mean()
    std = df["Close"].rolling(20).std()

    df["upper_bb"] = (df["ma20"] + 2 * std)
    df["lower_bb"] = (df["ma20"] - 2 * std)

    return df

def add_macd(df):
    ema12 = df["Close"].ewm(span=12).mean()
    ema26 = df["Close"].ewm(span=26).mean()
    macd = (ema12 - ema26) / ema26
    macd_signal = macd.ewm(span=9).mean()

    df["macd_hist"] = macd - macd_signal
    df["macd_ma3"] = df["macd_hist"].rolling(3).mean() / df["macd_hist"]
    df["macd_ma5"] = df["macd_hist"].rolling(5).mean() / df["macd_hist"]

    for i in [1, 2, 3]:
        df[f"macd_{i}d_diff"] = df["macd_hist"] - df["macd_hist"].shift(i)

    return df

def add_psar(df, af_step=0.02, af_max=0.2):
    # Initialize columns
    psar_array = np.zeros(len(df))
    af_array = np.full(len(df), af_step)

    # Initial values
    bull = True
    psar = df.iloc[0]["Low"]
    ep = df.iloc[0]["High"]

    for i in range(1, len(df)):
        if bull:
            psar = psar + af_array[i - 1] * (ep - psar)
            if df.iloc[i]["Low"] < psar:
                bull = False
                psar = ep
                ep = df.iloc[i]["Low"]
                af_array[i] = af_step
            else:
                if df.iloc[i]["High"] > ep:
                    ep = df.iloc[i]["High"]
                    af_array[i] = min(af_array[i - 1] + af_step, af_max)
                else:
                    af_array[i] = af_array[i - 1]
        else:
            psar = psar - af_array[i - 1] * (psar - ep)
            if df.iloc[i]["High"] > psar:
                bull = True
                psar = ep
                ep = df.iloc[i]["High"]
                af_array[i] = af_step
            else:
                if df.iloc[i]["Low"] < ep:
                    ep = df.iloc[i]["Low"]
                    af_array[i] = min(af_array[i - 1] + af_step, af_max)
                else:
                    af_array[i] = af_array[i - 1]

        psar_array[i] = psar

    psar_array[0] = df.iloc[0]["Close"]

    df["psar"] = psar_array

    return df

def add_stochastic_oscillator(df):
    for i in [7, 23]:
        low = df["Low"].rolling(window=i).min()
        high = df["High"].rolling(window=i).max()
        df[f"os_k{i}"] = (df["Close"] - low) / (high - low)
        df[f"os_d{i}"] = df[f"os_k{i}"].rolling(window=3).mean()

        df[f"os_kd{i}"] = df[f"os_k{i}"] - df[f"os_d{i}"]
        df[f"os_kd{i}_1d"] = df[f"os_kd{i}"] - df[f"os_kd{i}"].shift(1)
        df[f"os_kd{i}_2d"] = df[f"os_kd{i}"] - df[f"os_kd{i}"].shift(2)

    return df


def add_target(df):
    df["ema3"] = df["Close"].ewm(span=3).mean()

    df["short_target"] = (df["ema3"].shift(-2) - df["Close"]) / df["Close"]
    df["short_target"] = pd.cut(df["short_target"], bins=[-1, -0.03, 0.03, 1], labels=[-1, 0, 1], right=False)

    df["long_target"] = (df["ema3"].shift(-10) - df["Close"]) / df["Close"]
    df["long_target"] = pd.cut(df["long_target"], bins=[-1, -0.05, 0.05, 1], labels=[-1, 0, 1], right=False)

    target = ["short_target", "long_target"]
        
    return df, target

In [207]:
data_set = pd.DataFrame()
count = 0

for ticker in russell_list:
    # ticker = "TSLA"
    print(ticker)
    
    stock_data = ticker_object.tickers[ticker].history(period="300d")
    stock_data, target = add_target(stock_data)
    old_columns = stock_data.columns

    # Un-normalized features
    stock_data = add_moving_average(stock_data)
    stock_data = add_bollinger_band(stock_data)
    stock_data = add_psar(stock_data, af_step=0.02, af_max=0.2)

    unnorm_features = list(set(stock_data.columns) - set(old_columns))
    stock_data[unnorm_features] = stock_data[unnorm_features].div(stock_data["Close"], axis=0)

    # Normalized features
    stock_data["ticker"] = ticker
    stock_data = add_macd(stock_data)
    stock_data = add_norm_volume(stock_data)
    stock_data = add_stochastic_oscillator(stock_data)
    stock_data = add_volatility(stock_data)

    train_features = list(set(stock_data.columns) - set(old_columns))

    
    stock_data = stock_data.dropna()

    data_set = pd.concat([data_set, stock_data[train_features + target]])

    count += 1
    if count < 3:
        break





TXG


In [209]:
data_set.tail(10).columns

Index(['os_kd23_1d', 'os_d7', 'ema20', 'os_kd7', 'os_k23', 'ema60', 'ma3',
       'os_d23', 'ticker', 'macd_ma3', 'vol5', 'os_kd23', 'vol20',
       'macd_3d_diff', 'ema5', 'os_kd7_2d', 'volatility5', 'psar',
       'macd_1d_diff', 'lower_bb', 'ma60', 'ma5', 'ma20', 'macd_hist',
       'macd_2d_diff', 'os_kd23_2d', 'interday_chg', 'day_change', 'vol10',
       'ma10', 'upper_bb', 'os_k7', 'os_kd7_1d', 'ema10', 'volatility20',
       'macd_ma5', 'volatility60', 'short_target', 'long_target'],
      dtype='object')

In [191]:
(data_set.head(40)[["ticker"] + target])

Unnamed: 0_level_0,ticker,short_target,long_target
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-05-19 00:00:00-04:00,TXG,0,0
2023-05-22 00:00:00-04:00,TXG,-1,0
2023-05-23 00:00:00-04:00,TXG,0,0
2023-05-24 00:00:00-04:00,TXG,0,0
2023-05-25 00:00:00-04:00,TXG,1,1
2023-05-26 00:00:00-04:00,TXG,0,0
2023-05-30 00:00:00-04:00,TXG,0,0
2023-05-31 00:00:00-04:00,TXG,0,1
2023-06-01 00:00:00-04:00,TXG,0,1
2023-06-02 00:00:00-04:00,TXG,0,1


In [150]:
train_features

['macd_hist',
 'macd_ma3',
 'macd_ma5',
 'macd_1d_diff',
 'macd_2d_diff',
 'macd_3d_diff',
 'vol5',
 'vol10',
 'vol20',
 'ma3',
 'ema3',
 'ma5',
 'ema5',
 'ma10',
 'ema10',
 'ma20',
 'ema20',
 'ma60',
 'ema60',
 'upper_bb',
 'lower_bb',
 'psar']

In [140]:
stock_data.columns

Index(['Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock Splits',
       'macd_hist', 'macd_ma3', 'macd_ma5', 'macd_1d_diff', 'macd_2d_diff',
       'macd_3d_diff', 'vol5', 'vol10', 'vol20', 'ma3', 'ema3', 'ma5', 'ema5',
       'ma10', 'ema10', 'ma20', 'ema20', 'ma60', 'ema60', 'upper_bb',
       'lower_bb', 'psar', 'short_target', 'long_target'],
      dtype='object')

In [144]:
stock_data.tail(5)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,macd_hist,macd_ma3,macd_ma5,...,ema10,ma20,ema20,ma60,ema60,upper_bb,lower_bb,psar,short_target,long_target
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-04-12 00:00:00-04:00,259.420013,259.970001,244.309998,245.75,8951800,0.0,0.0,-0.016521,1.048023,1.145964,...,1.023115,1.028395,1.006002,0.804115,0.86025,1.12113,0.935659,1.145576,-1,-1
2024-04-15 00:00:00-04:00,247.940002,249.740005,222.139999,223.410004,11239800,0.0,0.0,-0.022616,0.803094,0.853412,...,1.102618,1.126988,1.096446,0.891914,0.948032,1.243187,1.01079,1.255885,0,0
2024-04-16 00:00:00-04:00,223.0,223.0,205.669998,218.830002,16472100,0.0,0.0,-0.026959,0.817245,0.753136,...,1.102841,1.145826,1.108023,0.917747,0.968928,1.280367,1.011284,1.271488,0,0
2024-04-17 00:00:00-04:00,222.039993,224.869995,205.899994,213.779999,9440200,0.0,0.0,-0.030076,0.882769,0.741606,...,1.105458,1.169071,1.121417,0.946097,0.992084,1.321516,1.016626,1.281156,0,0
2024-04-18 00:00:00-04:00,215.570007,228.050003,213.25,218.080002,9403900,0.0,0.0,-0.029691,0.97364,0.847809,...,1.06845,1.137124,1.089847,0.934618,0.973424,1.299215,0.975033,1.237127,0,0


In [70]:
px.line(stock_data[["Close", "ma20", "ma60", "upper_bb", "lower_bb"]])

In [78]:
px.line(stock_data[["ma20", "ma60", "upper_bb", "lower_bb"]])