In [53]:
import numpy as np
import pandas as pd
import yfinance as yf
import pandas_ta as ta
import matplotlib.pyplot as plt
import tabulate as tb

In [54]:
TICKER = "^NDX"
START_DATE = "2023-12-01"
END_DATE = None
TIMEFRAME = "4h"    # 1d, 1h, 15m, 5m
PATH = f'./../data/{TICKER}_{TIMEFRAME}.csv'

In [55]:
df = yf.download(
    TICKER, 
    start=START_DATE,
    end=END_DATE,
    interval=TIMEFRAME,
    progress=False
)

if isinstance(df.columns, pd.MultiIndex):
    df.columns = [col[0] for col in df.columns]

df.dropna(inplace=True)
print(tb.tabulate(df.tail(), headers='keys', tablefmt='psql'))

  df = yf.download(


+---------------------------+---------+---------+---------+---------+-------------+
| Datetime                  |   Close |    High |     Low |    Open |      Volume |
|---------------------------+---------+---------+---------+---------+-------------|
| 2025-11-19 18:30:00+00:00 | 24637.9 | 24745.6 | 24495.1 | 24512   | 3.45301e+08 |
| 2025-11-20 14:30:00+00:00 | 24423.7 | 25221.7 | 24234.9 | 25119.9 | 6.06885e+08 |
| 2025-11-20 18:30:00+00:00 | 24064   | 24438.7 | 24021.4 | 24421.2 | 4.27383e+08 |
| 2025-11-21 14:30:00+00:00 | 24191.9 | 24337.9 | 23857.4 | 24150.1 | 5.65109e+08 |
| 2025-11-21 18:30:00+00:00 | 24239.8 | 24521.8 | 24181.8 | 24184.7 | 4.63769e+08 |
+---------------------------+---------+---------+---------+---------+-------------+


In [56]:
df["rsi_14"] = ta.rsi(df["Close"], length=14)
df["rsi_28"] = ta.rsi(df["Close"], length=28)
df["rsi_7"] = ta.rsi(df["Close"], length=7)

macd = ta.macd(df["Close"])
# df = df.join(macd)
df["macd"] = macd["MACD_12_26_9"]

df["ema_20"] = ta.ema(df["Close"], length=20)
df["ema_50"] = ta.ema(df["Close"], length=50)

# Stochastic Oscillator
stoch = ta.stoch(df["High"], df["Low"], df["Close"])
df["stoch_k"] = stoch["STOCHk_14_3_3"]
df["stoch_d"] = stoch["STOCHd_14_3_3"]

df["roc"] = ta.roc(close=df["Close"], length=10)

adx = ta.adx(
    high=df["High"],
    low=df["Low"],
    close=df["Close"],
    length=14
)
df["adx"]  = adx["ADX_14"]
df["di_plus"]  = adx["DMP_14"] 
df["di_minus"] = adx["DMN_14"]  

df["atr"] = ta.atr(
    high=df["High"],
    low=df["Low"],
    close=df["Close"],
    length=14
)

df["close_pos"] = (df["Close"] - df["Low"]) / (df["High"] - df["Low"])

df["body_range_ratio"] = (df["Close"] - df["Open"]).abs() / (df["High"] - df["Low"])


df["returns"] = df["Close"].pct_change()
df['direction'] = np.where(df['returns'] > 0, 1, 0)

df.dropna(inplace=True)

print(tb.tabulate(df.head(), headers='keys', tablefmt='psql'))
print(tb.tabulate(df.tail(), headers='keys', tablefmt='psql'))

df.to_csv(PATH)

+---------------------------+---------+---------+---------+---------+-------------+----------+----------+---------+-----------+----------+----------+-----------+-----------+-----------+---------+-----------+------------+---------+-------------+--------------------+--------------+-------------+
| Datetime                  |   Close |    High |     Low |    Open |      Volume |   rsi_14 |   rsi_28 |   rsi_7 |      macd |   ema_20 |   ema_50 |   stoch_k |   stoch_d |       roc |     adx |   di_plus |   di_minus |     atr |   close_pos |   body_range_ratio |      returns |   direction |
|---------------------------+---------+---------+---------+---------+-------------+----------+----------+---------+-----------+----------+----------+-----------+-----------+-----------+---------+-----------+------------+---------+-------------+--------------------+--------------+-------------|
| 2024-01-08 18:30:00+00:00 | 16645.6 | 16655   | 16555.2 | 16555.9 | 2.56931e+08 |  55.4875 |  58.1485 | 62.3088 |