In [14]:
import numpy as np
import pandas as pd
import yfinance as yf
import pandas_ta as ta
import matplotlib.pyplot as plt
import tabulate as tb

In [15]:
TICKER = "GOOGL"
START_DATE = "2000-01-01"
END_DATE = None
TIMEFRAME = "1d"    # 1d, 1h, 15m, 5m

In [16]:
df = yf.download(
    TICKER, 
    start=START_DATE,
    end=END_DATE,
    interval=TIMEFRAME,
    progress=False
)

if isinstance(df.columns, pd.MultiIndex):
    df.columns = [col[0] for col in df.columns]

df.dropna(inplace=True)
print(tb.tabulate(df.tail(), headers='keys', tablefmt='psql'))

  df = yf.download(


+---------------------+---------+--------+--------+--------+-------------+
| Date                |   Close |   High |    Low |   Open |      Volume |
|---------------------+---------+--------+--------+--------+-------------|
| 2025-11-10 00:00:00 |  290.1  | 290.8  | 282.86 | 284.42 | 2.95573e+07 |
| 2025-11-11 00:00:00 |  291.31 | 291.92 | 287.32 | 287.75 | 1.98421e+07 |
| 2025-11-12 00:00:00 |  286.71 | 292.01 | 283.69 | 291.68 | 2.48299e+07 |
| 2025-11-13 00:00:00 |  278.57 | 282.84 | 277.24 | 282.34 | 2.9494e+07  |
| 2025-11-14 00:00:00 |  276.41 | 278.56 | 270.7  | 271.41 | 2.82468e+07 |
+---------------------+---------+--------+--------+--------+-------------+


In [17]:
df["rsi_14"] = ta.rsi(df["Close"], length=14)
df["rsi_28"] = ta.rsi(df["Close"], length=28)
df["rsi_7"] = ta.rsi(df["Close"], length=7)

macd = ta.macd(df["Close"])
# df = df.join(macd)
df["macd"] = macd["MACD_12_26_9"]

df["ema_20"] = ta.ema(df["Close"], length=20)
df["ema_50"] = ta.ema(df["Close"], length=50)

# Stochastic Oscillator
stoch = ta.stoch(df["High"], df["Low"], df["Close"])
df["stoch_k"] = stoch["STOCHk_14_3_3"]
df["stoch_d"] = stoch["STOCHd_14_3_3"]

df["roc"] = ta.roc(close=df["Close"], length=10)

adx = ta.adx(
    high=df["High"],
    low=df["Low"],
    close=df["Close"],
    length=14
)
df["adx"]  = adx["ADX_14"]
df["di_plus"]  = adx["DMP_14"] 
df["di_minus"] = adx["DMN_14"]  

df["atr"] = ta.atr(
    high=df["High"],
    low=df["Low"],
    close=df["Close"],
    length=14
)

df["close_pos"] = (df["Close"] - df["Low"]) / (df["High"] - df["Low"])

df["body_range_ratio"] = (df["Close"] - df["Open"]).abs() / (df["High"] - df["Low"])


df["returns"] = df["Close"].pct_change()
df['direction'] = np.where(df['returns'] > 0, 1, 0)

df.dropna(inplace=True)

print(tb.tabulate(df.head(), headers='keys', tablefmt='psql'))
print(tb.tabulate(df.tail(), headers='keys', tablefmt='psql'))

df.to_csv(f"./../data/{TICKER}.csv")

+---------------------+---------+---------+---------+---------+-------------+----------+----------+---------+----------+----------+----------+-----------+-----------+---------+---------+-----------+------------+----------+-------------+--------------------+-------------+-------------+
| Date                |   Close |    High |     Low |    Open |      Volume |   rsi_14 |   rsi_28 |   rsi_7 |     macd |   ema_20 |   ema_50 |   stoch_k |   stoch_d |     roc |     adx |   di_plus |   di_minus |      atr |   close_pos |   body_range_ratio |     returns |   direction |
|---------------------+---------+---------+---------+---------+-------------+----------+----------+---------+----------+----------+----------+-----------+-----------+---------+---------+-----------+------------+----------+-------------+--------------------+-------------+-------------|
| 2004-10-28 00:00:00 | 4.8042  | 4.83129 | 4.61283 | 4.63967 | 5.93278e+08 |  82.5206 |  85.5229 | 83.4517 | 0.421614 |  3.86713 |  3.15757 |