# Data Extraction from `yhfinance`

In [1]:
import datetime
import yfinance as yf
import pandas as pd
import numpy as np
import requests
from io import StringIO

In [2]:
# !pip install --upgrade yfinance

In [3]:
def SAndP500_Wikipedia_Scrape():
    """
    Fetch S&P500 tickers and corresponding data from Wikipedia
    """
    print("Fetching from Wikipedia...")
    url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    
    # Download HTML, parse it to find all tables and create corresponding pandas DataFrames and return list of DataFrames
    response = requests.get(url, headers=headers)
    tables = pd.read_html(StringIO(response.text))
    sp500_table = tables[0]
    
    # Create DataFrame with relevant info
    df = pd.DataFrame({
        'Ticker': sp500_table['Symbol'].tolist(),
        'Company': sp500_table['Security'].tolist(),
        'Sector': sp500_table['GICS Sector'].tolist(),
        'Industry': sp500_table['GICS Sub-Industry'].tolist()
    })
    
    return df

In [4]:
def NASDAQ100_Wikipedia_Scrape():
    """
    Fetch NASDAQ100 tickers and corresponding data from Wikipedia
    """
    print("Fetching from Wikipedia...")
    url = 'https://en.wikipedia.org/wiki/Nasdaq-100'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    
    # Download HTML, parse it to find all tables and create corresponding pandas DataFrames and return list of DataFrames
    response = requests.get(url, headers=headers)
    tables = pd.read_html(StringIO(response.text))

    # Find NASDAQ table
    for index, table in enumerate(tables):
        if 'Ticker' in table.columns and len(table) > 90:
            ndaq100_table = tables[index]
    
    # Create DataFrame with relevant info
    df = pd.DataFrame({
        'Ticker': ndaq100_table['Ticker'].tolist(),
        'Company': ndaq100_table['Company'].tolist(),
        'Industry': ndaq100_table['ICB Industry'].tolist(),
        'Sector': ndaq100_table['ICB Subsector'].tolist(),
    })

    # Add NDAQ manually
    ndaq_row = pd.DataFrame({
        'Ticker': ["NDAQ"],
        'Company': ["Nasdaq, Inc."],
        'Industry': ["Stock Exchange"],
        'Sector': ["Financial Services"],
    })

    df = pd.concat([df, ndaq_row], ignore_index=True)
    
    return df

In [5]:
try:
    ndaq100df = NASDAQ100_Wikipedia_Scrape()
    ticker_list = ndaq100df['Ticker'].tolist()
    print(f" Successfully fetched {len(ndaq100df)} NASDAQ-100 tickers!")
    
    # Display summary
    print("\n" + "="*60)
    print(f"Total tickers: {len(ndaq100df)}")
    print("\nFirst 10 tickers:")
    print(ndaq100df.head(10).to_string(index=False))
    
    print("\n" + "="*60)
    print("Sector Distribution:")
    print("="*60)
    print(ndaq100df['Sector'].value_counts())
    
except Exception as e:
    print(f"Python scrape failed: {e}")

Fetching from Wikipedia...
 Successfully fetched 103 NASDAQ-100 tickers!

Total tickers: 103

First 10 tickers:
Ticker                 Company               Industry                          Sector
  ADBE              Adobe Inc.             Technology               Computer Software
   AMD  Advanced Micro Devices             Technology                  Semiconductors
  ABNB                  Airbnb Consumer Discretionary Diversified Commercial Services
 GOOGL Alphabet Inc. (Class A)             Technology               Computer Software
  GOOG Alphabet Inc. (Class C)             Technology               Computer Software
  AMZN                  Amazon Consumer Discretionary  Catalog/Specialty Distribution
   AEP American Electric Power              Utilities              Electric Utilities
  AMGN                   Amgen            Health Care                   Biotechnology
   ADI          Analog Devices             Technology                  Semiconductors
  AAPL              Apple In

In [6]:
# ticker_list = ["AAPL", "META", "NDAQ", "SPY",]
# company_list = []

# for ticker_symbol in ticker_list:
#     try: 
#         stock = yf.Ticker(ticker_symbol)
#         company_name = stock.info.get('longName', 'N/A')
#         company_list.append(company_name)
#     except Exception as e:
#         print(f"Error fetching {ticker}: {e}")
#         company_list.append("Error")

# tick_comp_df = pd.DataFrame({
#     'Ticker': ticker_list,
#     'Company': company_list
# })

In [7]:
start_date = datetime.datetime(2022, 10, 29)
end_date = datetime.datetime(2025, 10, 29)
data = yf.download(ticker_list, start=start_date, end=end_date, progress=False)
data = data.stack(level='Ticker', future_stack=True).reset_index()
data.columns.name = None
print(data.head(10))

  data = yf.download(ticker_list, start=start_date, end=end_date, progress=False)


        Date Ticker       Close        High         Low        Open  \
0 2022-10-31   AAPL  150.957092  151.843115  149.559161  150.779897   
1 2022-10-31   ABNB  106.910004  113.800003  106.669998  113.059998   
2 2022-10-31   ADBE  318.500000  325.579987  317.420013  323.489990   
3 2022-10-31    ADI  135.263535  136.458552  133.347738  136.392156   
4 2022-10-31    ADP  226.601654  227.248554  224.501590  225.560992   
5 2022-10-31   ADSK  214.300003  216.289993  214.000000  214.759995   
6 2022-10-31    AEP   78.322495   79.632028   77.823627   79.596393   
7 2022-10-31   AMAT   85.900398   86.980356   85.287452   86.454971   
8 2022-10-31    AMD   60.060001   61.860001   59.529999   60.750000   
9 2022-10-31   AMGN  245.694214  247.021055  243.558527  244.649099   

       Volume  
0  97943200.0  
1  10733800.0  
2   3253200.0  
3   3078300.0  
4   1711400.0  
5    965000.0  
6   4104000.0  
7   6875400.0  
8  73274100.0  
9   3033600.0  


In [8]:
# Function to calculate RSI
def calculate_rsi(series, window=14):
    delta = series.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
    rs = gain / loss
    return 100 - (100 / (1 + rs))

# Function to calculate Aroon
def add_aroon(data, period=14, ticker_col=None):
    """
    Add Aroon Up and Aroon Down indicators to data
    
    Parameters:
        data (DataFrame): Must have 'High' and 'Low' columns
        period (int): Lookback period (default: 14)
        ticker_col (str): Column name for ticker if multi-ticker data
    
    Returns:
        DataFrame: Data with 'Aroon_Up' and 'Aroon_Down' columns added
        
    Note: Intermediate columns are automatically removed
    """
    
    df = data.copy()
    
    def calc_aroon(group):
        # Periods since highest high
        periods_high = group['High'].rolling(window=period).apply(
            lambda x: period - 1 - np.argmax(x), raw=True
        )
        
        # Periods since lowest low  
        periods_low = group['Low'].rolling(window=period).apply(
            lambda x: period - 1 - np.argmin(x), raw=True
        )
        
        # Calculate Aroon indicators
        group['Aroon_Up'] = ((period - periods_high) / period)
        group['Aroon_Down'] = ((period - periods_low) / period)
        
        return group
    
    # Apply to each ticker or entire dataset
    if ticker_col and ticker_col in df.columns:
        df = df.groupby(ticker_col, as_index=False, group_keys=False).apply(calc_aroon)
    else:
        df = calc_aroon(df)
    
    return df

In [9]:
# Function to detect RSI divergence
def rsi_divergence(group, lookback=14):
    """
    Detect RSI divergence for each row in a group
    Returns Series with divergence directions (1, -1, or 0)
    """
    result = pd.Series(index=group.index, dtype=object)
    
    for i in range(len(group)):
        # Need at least lookback periods
        if i < lookback:
            result.iloc[i] = None
            continue
        
        # Get the lookback window (including current point)
        start_idx = max(0, i - lookback)
        price_window = group['Close'].iloc[start_idx:i+1]
        rsi_window = group['RSI'].iloc[start_idx:i+1]
        
        # Current values
        current_price = group['Close'].iloc[i]
        current_rsi = group['RSI'].iloc[i]
        
        # Skip if RSI is NaN
        if pd.isna(current_rsi):
            result.iloc[i] = None
            continue
        
        # Find min and max in the window (excluding current point)
        price_window_prev = price_window.iloc[:-1]
        rsi_window_prev = rsi_window.iloc[:-1]
        
        if len(price_window_prev) == 0:
            result.iloc[i] = None
            continue
        
        # Get indices of min/max
        price_min_idx = price_window_prev.idxmin()
        price_max_idx = price_window_prev.idxmax()
        
        # Bullish Divergence: Price making lower lows, RSI making higher lows
        if price_min_idx != group.index[i]:  # Min is not at current point
            prev_price_low = price_window_prev.loc[price_min_idx]
            prev_rsi_at_price_low = group.loc[price_min_idx, 'RSI']
            
            if pd.notna(prev_rsi_at_price_low) and current_price < prev_price_low and current_rsi > prev_rsi_at_price_low:
                result.iloc[i] = 1
                continue
        
        # Bearish Divergence: Price making higher highs, RSI making lower highs
        if price_max_idx != group.index[i]:  # Max is not at current point
            prev_price_high = price_window_prev.loc[price_max_idx]
            prev_rsi_at_price_high = group.loc[price_max_idx, 'RSI']
            
            if pd.notna(prev_rsi_at_price_high) and current_price > prev_price_high and current_rsi < prev_rsi_at_price_high:
                result.iloc[i] = -1
                continue
        
        result.iloc[i] = 0
    
    return result

In [10]:
def calculate_drawdowns(df, lookback_period=100, date_col='Date', ticker_col='Ticker', close_col='Close'):
    """
    Calculate rolling drawdown from peak for each ticker at each date.
    
    Parameters:
    -----------
    df : pd.DataFrame
        DataFrame with columns: Date, Ticker, Open, High, Low, Close
    lookback_period : int, default=100
        Number of periods to look back for peak calculation
    date_col : str, default='Date'
        Name of the date column
    ticker_col : str, default='Ticker'
        Name of the ticker column
    close_col : str, default='Close'
        Name of the close price column
    
    Returns:
    --------
    pd.DataFrame
        Original dataframe with added 'Drawdown_%' column
    
    Formula:
    --------
    Drawdown = ((Current Price - Peak Price in Window) / Peak Price) × 100
    
    Example:
    --------
    >>> df = pd.DataFrame({
    ...     'Date': pd.date_range('2024-01-01', periods=150, freq='D'),
    ...     'Ticker': ['AAPL']*150,
    ...     'Close': np.random.randn(150).cumsum() + 100
    ... })
    >>> df_with_dd = calculate_drawdowns(df, lookback_period=50)
    >>> print(df_with_dd[['Date', 'Ticker', 'Close', 'Drawdown_%']].tail())
    """
    
    # Create a copy to avoid modifying original
    df = df.copy()
    
    # Ensure date column is datetime
    df[date_col] = pd.to_datetime(df[date_col])
    
    # Sort by ticker and date
    df = df.sort_values([ticker_col, date_col]).reset_index(drop=True)
    
    # Calculate drawdown for each ticker separately
    def calc_dd_for_ticker(ticker_df):
        """Calculate drawdown for a single ticker"""
        # Get closing prices
        prices = ticker_df[close_col].values
        
        # Calculate rolling maximum (peak) within lookback window
        drawdowns = []
        for i in range(len(prices)):
            # Determine lookback window
            start_idx = max(0, i - lookback_period + 1)
            window_prices = prices[start_idx:i+1]
            
            # Find peak in window
            peak = np.max(window_prices)
            
            # Calculate drawdown
            current_price = prices[i]
            if peak > 0:
                dd = ((current_price - peak) / peak)
            else:
                dd = 0.0
            
            drawdowns.append(dd)
        
        ticker_df['Drawdown_%'] = drawdowns
        return ticker_df
    
    # Apply to each ticker
    df = df.groupby(ticker_col, group_keys=False).apply(calc_dd_for_ticker)
    
    return df

In [11]:
def calculate_relative_strength(df, benchmark_ticker, lookback_period=7, 
                                date_col='Date', ticker_col='Ticker', close_col='Close'):
    """
    Calculate Relative Strength of each ticker vs benchmark using ONLY historical data.
    
    CRITICAL: For each date, only uses data from EARLIER dates (no look-ahead bias).
    
    Parameters:
    -----------
    df : pd.DataFrame
        DataFrame with columns: Date, Ticker, Open, High, Low, Close
    benchmark_ticker : str
        Ticker symbol to use as benchmark (e.g., 'NDAQ', 'SPY')
    lookback_period : int, default=7
        Number of HISTORICAL periods to calculate RS over
    date_col : str, default='Date'
        Name of the date column
    ticker_col : str, default='Ticker'
        Name of the ticker column
    close_col : str, default='Close'
        Name of the close price column
    
    Returns:
    --------
    pd.DataFrame
        Original dataframe with added 'RS_%' column
    
    Temporal Logic:
    ---------------
    For date T, RS calculation uses:
    - Current ratio: Asset[T] / Benchmark[T]
    - Historical ratio: Asset[T-lookback] / Benchmark[T-lookback]
    - Both Asset and Benchmark data must exist for dates T and T-lookback
    
    Formula:
    --------
    RS_T = ((Ratio_T - Ratio_(T-lookback)) / Ratio_(T-lookback)) × 100
    where Ratio_T = Asset_Price_T / Benchmark_Price_T
    
    Example Timeline:
    -----------------
    Date          Asset    Benchmark   Ratio    RS (7-day)
    2024-01-01    150      100         1.50     N/A (need 7 days history)
    2024-01-02    152      101         1.505    N/A
    ...
    2024-01-08    160      105         1.524    RS = ((1.524-1.50)/1.50)*100 = +1.6%
    
    Example:
    --------
    >>> df = pd.DataFrame({
    ...     'Date': pd.date_range('2024-01-01', periods=30, freq='D').tolist() * 2,
    ...     'Ticker': ['AAPL']*30 + ['NDAQ']*30,
    ...     'Close': [150 + i*0.5 for i in range(30)] + [100 + i*0.3 for i in range(30)]
    ... })
    >>> df_with_rs = calculate_relative_strength(df, benchmark_ticker='NDAQ', lookback_period=7)
    >>> # RS on day 10 uses data from day 10 and day 3 only (7 days back)
    """
    
    # Create a copy
    df = df.copy()
    
    # Ensure date column is datetime and sort chronologically
    df[date_col] = pd.to_datetime(df[date_col])
    df = df.sort_values([ticker_col, date_col]).reset_index(drop=True)
    
    # Get unique dates in chronological order
    all_dates = sorted(df[date_col].unique())
    
    # Extract benchmark data and create lookup dictionary
    benchmark_df = df[df[ticker_col] == benchmark_ticker][[date_col, close_col]].copy()
    
    # Check if benchmark exists
    if len(benchmark_df) == 0:
        raise ValueError(f"Benchmark ticker '{benchmark_ticker}' not found in dataframe")
    
    # Create benchmark price lookup by date
    benchmark_prices = dict(zip(benchmark_df[date_col], benchmark_df[close_col]))
    
    # Calculate RS for each ticker separately
    def calc_rs_for_ticker(ticker_df):
        """Calculate RS for a single ticker using only historical data"""
        ticker_name = ticker_df[ticker_col].iloc[0]
        
        # Skip benchmark ticker itself
        if ticker_name == benchmark_ticker:
            ticker_df['RS_%'] = 0.0
            return ticker_df
        
        # Reset index for positional access
        ticker_df = ticker_df.reset_index(drop=True)
        
        rs_values = []
        
        for idx in range(len(ticker_df)):
            current_date = ticker_df[date_col].iloc[idx]
            current_price = ticker_df[close_col].iloc[idx]
            
            # Get current benchmark price for this date
            current_bench_price = benchmark_prices.get(current_date)
            
            if current_bench_price is None or current_bench_price == 0:
                rs_values.append(0.0)
                continue
            
            # Find historical date (lookback periods ago)
            # Must use only data from EARLIER dates
            if idx < lookback_period:
                # Not enough historical data available
                rs_values.append(0.0)
                continue
            
            # Get historical prices from lookback_period bars ago
            historical_idx = idx - lookback_period
            historical_date = ticker_df[date_col].iloc[historical_idx]
            historical_price = ticker_df[close_col].iloc[historical_idx]
            
            # Get historical benchmark price
            historical_bench_price = benchmark_prices.get(historical_date)
            
            if historical_bench_price is None or historical_bench_price == 0:
                rs_values.append(0.0)
                continue
            
            # Calculate ratios using ONLY historical data
            current_ratio = current_price / current_bench_price
            historical_ratio = historical_price / historical_bench_price
            
            # Calculate RS
            if historical_ratio > 0:
                rs = ((current_ratio - historical_ratio) / historical_ratio)
            else:
                rs = 0.0
            
            rs_values.append(rs)
        
        ticker_df['RS_%'] = rs_values
        
        return ticker_df
    
    # Apply to each ticker
    df = df.groupby(ticker_col, group_keys=False).apply(calc_rs_for_ticker)
    
    return df

In [12]:
def detect_outliers(df, benchmark_ticker = "NDAQ", 
                    coin_strength_threshold=-0.05,
                    bench_weakness_threshold=-0.1,
                    rs_threshold=0.05,
                    date_col='Date', ticker_col='Ticker'):
    """
    Detect outlier tickers: strong performance during benchmark weakness.
    
    Parameters:
    -----------
    df : pd.DataFrame
        DataFrame with columns: Date, Ticker, Drawdown_%, RS_%
        (Must have already run calculate_drawdowns and calculate_relative_strength)
    benchmark_ticker : str
        Ticker symbol used as benchmark
    coin_strength_threshold : float, default=-5.0
        Ticker drawdown must be > this value (closer to peak)
        Example: -5.0 means ticker must be within 5% of its peak
    bench_weakness_threshold : float, default=-10.0
        Benchmark drawdown must be < this value (significant weakness)
        Example: -10.0 means benchmark must be down >10% from peak
    rs_threshold : float, default=5.0
        Ticker RS must be > this value (outperforming benchmark)
        Example: 5.0 means ticker must be +5% stronger than benchmark
    date_col : str, default='Date'
        Name of the date column
    ticker_col : str, default='Ticker'
        Name of the ticker column
    
    Returns:
    --------
    pd.DataFrame
        Original dataframe with added 'Outlier' boolean column
    
    Outlier Criteria (ALL must be true):
    ------------------------------------
    1. Ticker Strength: Drawdown_% > coin_strength_threshold
       → Ticker is near its recent peak (small drawdown)
    
    2. Benchmark Weakness: Benchmark Drawdown_% < bench_weakness_threshold
       → Market/Benchmark is in significant drawdown
    
    3. Relative Strength: RS_% > rs_threshold
       → Ticker is outperforming benchmark significantly
    
    Example:
    --------
    >>> # Assuming df already has Drawdown_% and RS_% columns
    >>> df_outliers = detect_outliers(df, benchmark_ticker='NDAQ',
    ...                               coin_strength_threshold=-5.0,
    ...                               bench_weakness_threshold=-10.0,
    ...                               rs_threshold=5.0)
    >>> print(df_outliers[df_outliers['Outlier']==True][['Date', 'Ticker', 'Drawdown_%', 'RS_%']])
    """
    
    # Create a copy
    df = df.copy()
    
    # Ensure date column is datetime
    df[date_col] = pd.to_datetime(df[date_col])
    
    # Check required columns exist
    required_cols = ['Drawdown_%', 'RS_%']
    missing_cols = [col for col in required_cols if col not in df.columns]
    if missing_cols:
        raise ValueError(f"Missing required columns: {missing_cols}. "
                        f"Run calculate_drawdowns() and calculate_relative_strength() first.")
    
    # Get benchmark drawdown for each date
    benchmark_df = df[df[ticker_col] == benchmark_ticker][[date_col, 'Drawdown_%']].copy()
    benchmark_df = benchmark_df.rename(columns={'Drawdown_%': 'Benchmark_DD_%'})
    
    # Check if benchmark exists
    if len(benchmark_df) == 0:
        raise ValueError(f"Benchmark ticker '{benchmark_ticker}' not found in dataframe")
    
    # Merge benchmark drawdown
    df = df.merge(benchmark_df, on=date_col, how='left')
    
    # Detect outliers
    def is_outlier_row(row):
        """Check if a single row meets outlier criteria"""
        # Skip benchmark ticker
        if row[ticker_col] == benchmark_ticker:
            return False
        
        # Criterion 1: Ticker is strong (near peak)
        coin_strong = row['Drawdown_%'] > coin_strength_threshold
        
        # Criterion 2: Benchmark is weak (significant drawdown)
        bench_weak = row['Benchmark_DD_%'] < bench_weakness_threshold
        
        # Criterion 3: Ticker has strong RS (outperforming)
        rs_strong = row['RS_%'] > rs_threshold
        
        # All criteria must be true
        return coin_strong and bench_weak and rs_strong
    
    # Apply outlier detection
    df['Outlier'] = df.apply(is_outlier_row, axis=1)
    
    return df

In [13]:
# Calculate all indicators using groupby
grouped = data.groupby('Ticker')

# RSI and change
data['RSI'] = grouped['Close'].transform(lambda x: calculate_rsi(x))
data['RSI_Chg'] = grouped['RSI'].diff()
data['RSI_Divergence'] = grouped.apply(
    lambda x: rsi_divergence(x, lookback=14), include_groups=False
).reset_index(level=0, drop=True)

# MACD
data['EMA_12'] = grouped['Close'].transform(lambda x: x.ewm(span=12, adjust=False).mean())
data['EMA_26'] = grouped['Close'].transform(lambda x: x.ewm(span=26, adjust=False).mean())
data['MACD'] = data['EMA_12'] - data['EMA_26']
data['MACD_Signal'] = grouped['MACD'].transform(lambda x: x.ewm(span=9, adjust=False).mean())
data['MACD_Histogram'] = data['MACD'] - data['MACD_Signal']
data['Prev_MACD'] = grouped['MACD'].transform(lambda x: x.shift(1))
data['Prev_MACD_Signal'] = grouped['MACD_Signal'].transform(lambda x: x.shift(1))
data['Prev_MACD_Histogram'] = grouped['MACD_Histogram'].transform(lambda x: x.shift(1))
data = data.drop(['EMA_12', 'EMA_26'], axis=1)  # Clean up intermediate columns

# Rate of Change (10-day)
data['ROC'] = grouped['Close'].transform(lambda x: x.pct_change(periods=10))

# Simple Moving Averages by X days
for days in [10, 20, 50, 100, 150, 200, 250]:
    data[f'SMA_{days}'] = grouped['Close'].transform(lambda x: x.rolling(window=days).mean())

# Awesome Oscillator
data['SMA_5'] = grouped['Close'].transform(lambda x: x.rolling(window=5).mean())
data['SMA_34'] = grouped['Close'].transform(lambda x: x.rolling(window=34).mean())
data['AO'] = data['SMA_5'] - data['SMA_34']
data['AO_Chg'] = data.groupby('Ticker')['AO'].diff()
data = data.drop(['SMA_5', 'SMA_34'], axis=1)  # Clean up intermediate columns

# Close X days ago
for days in [1, 2, 3, 4, 5]:
    data[f'Close_{days}days_ago'] = grouped['Close'].shift(days)

# Close change since yesterday
data['Close_Chg'] = grouped['Close'].diff()
data['Close_ChgPct'] = grouped['Close'].transform(lambda x: x.pct_change())

# Volume X days ago
for days in [1]:
    data[f'Volume_{days}d_ago'] = grouped['Volume'].shift(days)

# Volume change since yesterday
data['Volume_Chg'] = grouped['Volume'].diff()
data['Volume_ChgPct'] = grouped['Volume'].transform(lambda x: x.pct_change())

# Aroon Up and Down
data = add_aroon(data, period=14, ticker_col='Ticker')

  df = df.groupby(ticker_col, as_index=False, group_keys=False).apply(calc_aroon)


In [14]:
date_col = 'Date'
ticker_col = 'Ticker'
close_col = 'Close'

# Drawdowns
data = calculate_drawdowns(data, lookback_period=100, date_col=date_col, ticker_col=ticker_col, close_col=close_col)

# Relative Strength
data = calculate_relative_strength(data, benchmark_ticker = "NDAQ", lookback_period=7, 
                                date_col=date_col, ticker_col=ticker_col, close_col=close_col)

# Outliers
data = detect_outliers(data, benchmark_ticker = "NDAQ", date_col=date_col, ticker_col='Ticker')

  df = df.groupby(ticker_col, group_keys=False).apply(calc_dd_for_ticker)
  df = df.groupby(ticker_col, group_keys=False).apply(calc_rs_for_ticker)


In [15]:
# Display sample
print("Sample data with indicators:")
print(data[data['Ticker'] == 'AAPL'].iloc[30:40][
    ['Date', 'Ticker', 'Close', 'RSI', 'MACD', 'SMA_20', 'Close_ChgPct', 'Volume_ChgPct']
])

print("\n\nAll columns:")
print(data.columns.tolist())

Sample data with indicators:
         Date Ticker       Close        RSI      MACD      SMA_20  \
30 2022-12-13   AAPL  143.446945  42.356388 -1.263583  144.610532   
31 2022-12-14   AAPL  141.218369  37.787409 -1.297658  144.273782   
32 2022-12-15   AAPL  134.601669  33.843549 -1.837395  143.667827   
33 2022-12-16   AAPL  132.639374  35.733163 -2.395863  142.868600   
34 2022-12-19   AAPL  130.529129  36.714940 -2.974444  141.935755   
35 2022-12-20   AAPL  130.460129  20.129152 -3.399356  141.161180   
36 2022-12-21   AAPL  133.566284  27.979388 -3.445741  140.434923   
37 2022-12-22   AAPL  130.391083  25.595171 -3.696107  139.506023   
38 2022-12-23   AAPL  130.026230  26.261599 -3.879247  138.704824   
39 2022-12-27   AAPL  128.221680  27.960259 -4.122478  138.005192   

    Close_ChgPct  Volume_ChgPct  
30      0.006782       0.332424  
31     -0.015536      -0.123501  
32     -0.046854       0.202217  
33     -0.014579       0.618860  
34     -0.015910      -0.503034  
35     

In [16]:
# Data from last day
lastday = data.loc[data.groupby('Ticker')['Date'].idxmax()]

In [17]:
# Save
excel_file = 'TADASI_yhfinance.xlsx'
with pd.ExcelWriter(excel_file, engine='xlsxwriter') as writer:
    ndaq100df.to_excel(writer, sheet_name='Tickers', index=False)
    data.to_excel(writer, sheet_name='OHLC', index=False)
    lastday.to_excel(writer, sheet_name='Last_Day', index=False)
print(f"\n Saved to {excel_file}")


 Saved to TADASI_yhfinance.xlsx


In [18]:
# https://github.com/ranaroussi/yfinance/issues/2469
# import curl_cffi
# session = curl_cffi.Session(impersonate="chrome", timeout=5)
# ticker = yf.Ticker('GBPEUR=X', session=session)
# data = ticker.history(start='2025-05-05', end='2025-05-07')