In [85]:
import numpy as np
import pandas as pd
import yfinance as yf
import pandas_ta as ta
import matplotlib.pyplot as plt
import tabulate as tb

In [86]:
TICKER = "^GSPC" # ^GSPC, ^NDX, AAPL, GOOGL
START_DATE = "2023-12-01"
END_DATE = None
TIMEFRAME = "1h"    # 1d, 1h, 15m, 5m
PATH = f'./../data/{TICKER}_{TIMEFRAME}.csv'

In [87]:
df = yf.download(
    TICKER, 
    start=START_DATE,
    end=END_DATE,
    interval=TIMEFRAME,
    progress=False
)

if isinstance(df.columns, pd.MultiIndex):
    df.columns = [col[0] for col in df.columns]

df.dropna(inplace=True)
print(tb.tabulate(df.tail(), headers='keys', tablefmt='psql'))

  df = yf.download(


+---------------------------+---------+---------+---------+---------+-------------+
| Datetime                  |   Close |    High |     Low |    Open |      Volume |
|---------------------------+---------+---------+---------+---------+-------------|
| 2025-11-21 16:30:00+00:00 | 6617.38 | 6626.33 | 6567.49 | 6573.49 | 4.5586e+08  |
| 2025-11-21 17:30:00+00:00 | 6597.96 | 6629.22 | 6583.68 | 6617.91 | 3.16002e+08 |
| 2025-11-21 18:30:00+00:00 | 6657.44 | 6660.05 | 6595.05 | 6597.94 | 3.62949e+08 |
| 2025-11-21 19:30:00+00:00 | 6611.4  | 6659.62 | 6611.38 | 6657.3  | 3.78135e+08 |
| 2025-11-21 20:30:00+00:00 | 6603.8  | 6626.58 | 6596.99 | 6611.12 | 5.33559e+08 |
+---------------------------+---------+---------+---------+---------+-------------+


In [88]:
df["rsi_14"] = ta.rsi(df["Close"], length=14)
df["rsi_28"] = ta.rsi(df["Close"], length=28)
df["rsi_50"] = ta.rsi(df["Close"], length=50)
df["rsi_7"] = ta.rsi(df["Close"], length=7)

macd = ta.macd(df["Close"])
# df = df.join(macd)
df["macd"] = macd["MACD_12_26_9"]

df["ema_10"] = ta.ema(df["Close"], length=10)
df["ema_20"] = ta.ema(df["Close"], length=20)
df["ema_50"] = ta.ema(df["Close"], length=50)
df["ema_100"] = ta.ema(df["Close"], length=100)
df["ema_200"] = ta.ema(df["Close"], length=50)

# Stochastic Oscillator
stoch = ta.stoch(df["High"], df["Low"], df["Close"])
df["stoch_k"] = stoch["STOCHk_14_3_3"]
df["stoch_d"] = stoch["STOCHd_14_3_3"]

df["roc"] = ta.roc(close=df["Close"], length=10)

adx = ta.adx(
    high=df["High"],
    low=df["Low"],
    close=df["Close"],
    length=14
)
df["adx"]  = adx["ADX_14"]
df["di_plus"]  = adx["DMP_14"] 
df["di_minus"] = adx["DMN_14"]  

df["atr_14"] = ta.atr(
    high=df["High"],
    low=df["Low"],
    close=df["Close"],
    length=14
)
df["atr_20"] = ta.atr(
    high=df["High"],
    low=df["Low"],
    close=df["Close"],
    length=20
)

df["close_pos"] = (df["Close"] - df["Low"]) / (df["High"] - df["Low"])

df["body_range_ratio"] = (df["Close"] - df["Open"]).abs() / (df["High"] - df["Low"])

# --- Volume Z-Score(50) ---
df["volume_zscore_50"] = (df["Volume"] - df["Volume"].rolling(50).mean()) / df["Volume"].rolling(50).std()

# --- Bollinger Bands(20) ---
bb = ta.bbands(df["Close"], length=20, std=2)

col_lower  = next(c for c in bb.columns if c.startswith("BBL_20"))
col_middle = next(c for c in bb.columns if c.startswith("BBM_20"))
col_upper  = next(c for c in bb.columns if c.startswith("BBU_20"))

df["bb_lower_20"] = bb[col_lower]
df["bb_middle_20"] = bb[col_middle]
df["bb_upper_20"]  = bb[col_upper]

# --- Bollinger Bands Width (20) ---
df["bb_width_20"] = (df["bb_upper_20"] - df["bb_lower_20"]) / df["bb_middle_20"]

# --- On-Balance Volume (OBV) ---
df["obv"] = ta.obv(df["Close"], df["Volume"])

df["returns"] = df["Close"].pct_change()
df['direction'] = np.where(df['returns'] > 0, 1, 0)

df.dropna(inplace=True)

print(tb.tabulate(df.head(), headers='keys', tablefmt='psql'))
print(tb.tabulate(df.tail(), headers='keys', tablefmt='psql'))

df.to_csv(PATH)

+---------------------------+---------+---------+---------+---------+-------------+----------+----------+----------+---------+-----------+----------+----------+----------+-----------+-----------+-----------+-----------+-----------+---------+-----------+------------+----------+----------+-------------+--------------------+--------------------+---------------+----------------+---------------+---------------+-------------+--------------+-------------+
| Datetime                  |   Close |    High |     Low |    Open |      Volume |   rsi_14 |   rsi_28 |   rsi_50 |   rsi_7 |      macd |   ema_10 |   ema_20 |   ema_50 |   ema_100 |   ema_200 |   stoch_k |   stoch_d |       roc |     adx |   di_plus |   di_minus |   atr_14 |   atr_20 |   close_pos |   body_range_ratio |   volume_zscore_50 |   bb_lower_20 |   bb_middle_20 |   bb_upper_20 |   bb_width_20 |         obv |      returns |   direction |
|---------------------------+---------+---------+---------+---------+-------------+----------+