# Data Extraction from `yhfinance`

In [1]:
import datetime
import yfinance as yf
import pandas as pd

In [2]:
# !pip install --upgrade yfinance

In [3]:
ticker_list = ["AAPL", "META", "NDAQ", "SPY",]
company_list = []

for ticker_symbol in ticker_list:
    try: 
        stock = yf.Ticker(ticker_symbol)
        company_name = stock.info.get('longName', 'N/A')
        company_list.append(company_name)
    except Exception as e:
        print(f"Error fetching {ticker}: {e}")
        company_list.append("Error")

tick_comp_df = pd.DataFrame({
    'Ticker': ticker_list,
    'Company': company_list
})

In [4]:
start_date = datetime.datetime(2022, 10, 29)
end_date = datetime.datetime(2025, 10, 29)
data = yf.download(ticker_list, start=start_date, end=end_date, progress=False)
data = data.stack(level='Ticker', future_stack=True).reset_index()
data.columns.name = None
print(data.head(10))

  data = yf.download(ticker_list, start=start_date, end=end_date, progress=False)


        Date Ticker       Close        High         Low        Open     Volume
0 2022-10-31   AAPL  150.957092  151.843115  149.559161  150.779897   97943200
1 2022-10-31   META   92.589218   98.711472   92.032644   97.618213  121361400
2 2022-10-31   NDAQ   59.641735   59.910045   59.268013   59.411752    2623800
3 2022-10-31    SPY  370.645264  372.747006  369.733567  370.866005   96631300
4 2022-11-01   AAPL  148.308884  153.034294  146.812515  152.670049   80379300
5 2022-11-01   META   94.616699   96.892669   92.976815   93.752035  110189600
6 2022-11-01   NDAQ   59.747131   60.341251   59.220092   59.957948    1724500
7 2022-11-01    SPY  369.023407  374.656864  367.842996  374.416939   85407600
8 2022-11-02   AAPL  142.776230  149.805274  142.746697  146.635311   93604600
9 2022-11-02   META   89.985260   94.616704   89.925630   93.632772   71821100


In [5]:
# Function to calculate RSI
def calculate_rsi(series, window=14):
    delta = series.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
    rs = gain / loss
    return 100 - (100 / (1 + rs))

In [6]:
# Calculate all indicators using groupby
grouped = data.groupby('Ticker')

# RSI
data['RSI'] = grouped['Close'].transform(lambda x: calculate_rsi(x))

# MACD
data['EMA_12'] = grouped['Close'].transform(lambda x: x.ewm(span=12, adjust=False).mean())
data['EMA_26'] = grouped['Close'].transform(lambda x: x.ewm(span=26, adjust=False).mean())
data['MACD'] = data['EMA_12'] - data['EMA_26']
data['MACD_Signal'] = grouped['MACD'].transform(lambda x: x.ewm(span=9, adjust=False).mean())
data['MACD_Histogram'] = data['MACD'] - data['MACD_Signal']
data = data.drop(['EMA_12', 'EMA_26'], axis=1)  # Clean up intermediate columns

# Rate of Change (10-day)
data['ROC'] = grouped['Close'].transform(lambda x: x.pct_change(periods=10) * 100)

# Simple Moving Averages by X days
for days in [10, 20, 50, 100, 150, 200, 250]:
    data[f'SMA_{days}'] = grouped['Close'].transform(lambda x: x.rolling(window=days).mean())

# Close X days ago
for days in [1, 2, 3, 4, 5]:
    data[f'Close_{days}days_ago'] = grouped['Close'].shift(days)

# Close change since yesterday
data['Close_Chg'] = grouped['Close'].diff()
data['Close_ChgPct'] = grouped['Close'].transform(lambda x: x.pct_change() * 100)

# Volume X days ago
for days in [1]:
    data[f'Volume_{days}d_ago'] = grouped['Volume'].shift(days)

# Volume change since yesterday
data['Volume_Chg'] = grouped['Volume'].diff()
data['Volume_ChgPct'] = grouped['Volume'].transform(lambda x: x.pct_change() * 100)

In [7]:
# Display sample
print("Sample data with indicators:")
print(data[data['Ticker'] == 'AAPL'].iloc[30:40][
    ['Date', 'Ticker', 'Close', 'RSI', 'MACD', 'SMA_20', 'Close_ChgPct', 'Volume_ChgPct']
])

print("\n\nAll columns:")
print(data.columns.tolist())

Sample data with indicators:
          Date Ticker       Close        RSI      MACD      SMA_20  \
120 2022-12-13   AAPL  143.446930  42.356427 -1.263585  144.610529   
124 2022-12-14   AAPL  141.218414  37.787450 -1.297656  144.273782   
128 2022-12-15   AAPL  134.601669  33.843592 -1.837393  143.667826   
132 2022-12-16   AAPL  132.639374  35.733205 -2.395861  142.868597   
136 2022-12-19   AAPL  130.529114  36.714948 -2.974444  141.935753   
140 2022-12-20   AAPL  130.460083  20.129234 -3.399360  141.161178   
144 2022-12-21   AAPL  133.566284  27.979466 -3.445744  140.434921   
148 2022-12-22   AAPL  130.391083  25.595278 -3.696109  139.506020   
152 2022-12-23   AAPL  130.026215  26.261655 -3.879250  138.704821   
156 2022-12-27   AAPL  128.221695  27.960352 -4.122479  138.005190   

     Close_ChgPct  Volume_ChgPct  
120      0.678236      33.242411  
124     -1.553547     -12.350058  
128     -4.685469      20.221725  
132     -1.457854      61.886004  
136     -1.590976     -50

In [8]:
# Save
excel_file = 'TADASI_yhfinance.xlsx'
with pd.ExcelWriter(excel_file, engine='xlsxwriter') as writer:
    tick_comp_df.to_excel(writer, sheet_name='Tickers', index=False)
    data.to_excel(writer, sheet_name='OHLC', index=False)
print(f"\n Saved to {excel_file}")


 Saved to TADASI_yhfinance.xlsx


In [9]:
# https://github.com/ranaroussi/yfinance/issues/2469
# import curl_cffi
# session = curl_cffi.Session(impersonate="chrome", timeout=5)
# ticker = yf.Ticker('GBPEUR=X', session=session)
# data = ticker.history(start='2025-05-05', end='2025-05-07')