# Data Extraction from `yhfinance`

In [1]:
import datetime
import yfinance as yf

In [2]:
# !pip install --upgrade yfinance

In [3]:
start_date = datetime.datetime(2022, 10, 29)
end_date = datetime.datetime(2025, 10, 29)
data = yf.download(["AAPL", "META", "NDAQ", "SPY",], start=start_date, end=end_date, progress=False)
data = data.stack(level='Ticker', future_stack=True).reset_index()
data.columns.name = None
print(data.head(10))

  data = yf.download(["AAPL", "META", "NDAQ", "SPY",], start=start_date, end=end_date, progress=False)


        Date Ticker       Close        High         Low        Open     Volume
0 2022-10-31   AAPL  150.957062  151.843085  149.559131  150.779866   97943200
1 2022-10-31   META   92.589218   98.711472   92.032644   97.618213  121361400
2 2022-10-31   NDAQ   59.641731   59.910041   59.268009   59.411749    2623800
3 2022-10-31    SPY  370.645203  372.746945  369.733507  370.865944   96631300
4 2022-11-01   AAPL  148.308868  153.034279  146.812500  152.670033   80379300
5 2022-11-01   META   94.616714   96.892685   92.976830   93.752050  110189600
6 2022-11-01   NDAQ   59.747139   60.341259   59.220100   59.957956    1724500
7 2022-11-01    SPY  369.023438  374.656895  367.843027  374.416970   85407600
8 2022-11-02   AAPL  142.776215  149.805258  142.746682  146.635296   93604600
9 2022-11-02   META   89.985275   94.616720   89.925645   93.632788   71821100


In [4]:
# Function to calculate RSI
def calculate_rsi(series, window=14):
    delta = series.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
    rs = gain / loss
    return 100 - (100 / (1 + rs))

In [5]:
# Calculate all indicators using groupby
grouped = data.groupby('Ticker')

# RSI
data['RSI'] = grouped['Close'].transform(lambda x: calculate_rsi(x))

# MACD
data['EMA_12'] = grouped['Close'].transform(lambda x: x.ewm(span=12, adjust=False).mean())
data['EMA_26'] = grouped['Close'].transform(lambda x: x.ewm(span=26, adjust=False).mean())
data['MACD'] = data['EMA_12'] - data['EMA_26']
data['MACD_Signal'] = grouped['MACD'].transform(lambda x: x.ewm(span=9, adjust=False).mean())
data['MACD_Histogram'] = data['MACD'] - data['MACD_Signal']
data = data.drop(['EMA_12', 'EMA_26'], axis=1)  # Clean up intermediate columns

# Rate of Change (10-day)
data['ROC'] = grouped['Close'].transform(lambda x: x.pct_change(periods=10) * 100)

# Simple Moving Averages
data['SMA_10'] = grouped['Close'].transform(lambda x: x.rolling(window=10).mean())
data['SMA_20'] = grouped['Close'].transform(lambda x: x.rolling(window=20).mean())
data['SMA_50'] = grouped['Close'].transform(lambda x: x.rolling(window=50).mean())
data['SMA_100'] = grouped['Close'].transform(lambda x: x.rolling(window=100).mean())

# Close X days ago
for days in [1, 2, 3, 4, 5]:
    data[f'Close_{days}d_ago'] = grouped['Close'].shift(days)

# Close change since yesterday
data['Close_Change'] = grouped['Close'].diff()
data['Close_Change_Pct'] = grouped['Close'].transform(lambda x: x.pct_change() * 100)

# Volume 1 day ago
for days in [1]:
    data[f'Volume_{days}d_ago'] = grouped['Volume'].shift(days)

# Volume change since yesterday
data['Volume_Change'] = grouped['Volume'].diff()
data['Volume_Change_Pct'] = grouped['Volume'].transform(lambda x: x.pct_change() * 100)

In [6]:
# Display sample
print("Sample data with indicators:")
print(data[data['Ticker'] == 'AAPL'].iloc[30:40][
    ['Date', 'Ticker', 'Close', 'RSI', 'MACD', 'SMA_20', 'Close_Change_Pct', 'Volume_Change_Pct']
])

print("\n\nAll columns:")
print(data.columns.tolist())

Sample data with indicators:
          Date Ticker       Close        RSI      MACD      SMA_20  \
120 2022-12-13   AAPL  143.446960  42.356358 -1.263578  144.610531   
124 2022-12-14   AAPL  141.218369  37.787404 -1.297654  144.273781   
128 2022-12-15   AAPL  134.601700  33.843551 -1.837389  143.667828   
132 2022-12-16   AAPL  132.639374  35.733153 -2.395858  142.868600   
136 2022-12-19   AAPL  130.529114  36.714881 -2.974441  141.935757   
140 2022-12-20   AAPL  130.460098  20.129093 -3.399356  141.161181   
144 2022-12-21   AAPL  133.566269  27.979335 -3.445742  140.434922   
148 2022-12-22   AAPL  130.391068  25.595147 -3.696109  139.506023   
152 2022-12-23   AAPL  130.026215  26.261550 -3.879250  138.704823   
156 2022-12-27   AAPL  128.221649  27.960191 -4.122482  138.005190   

     Close_Change_Pct  Volume_Change_Pct  
120          0.678257          33.242411  
124         -1.553600         -12.350058  
128         -4.685416          20.221725  
132         -1.457876       

In [7]:
# Save
data.to_csv("TADASI_yhfinance.csv", index=False)
print("\n✓ Saved to 'TADASI_yhfinance.csv'")


✓ Saved to 'TADASI_yhfinance.csv'


In [8]:
# https://github.com/ranaroussi/yfinance/issues/2469
# import curl_cffi
# session = curl_cffi.Session(impersonate="chrome", timeout=5)
# ticker = yf.Ticker('GBPEUR=X', session=session)
# data = ticker.history(start='2025-05-05', end='2025-05-07')