In [1]:
!pip install yfinance



In [3]:
import pandas as pd
import yfinance as yf
import numpy as np
import asyncio
import time
from ib_async import IB, Stock
from ib_async import IB, ScannerSubscription
from ib_async import IB

### VALUE, QUALITY & GROWTH

In [176]:
# Step 1: Define Metrics and Parameters
# Description: Define value, quality, and growth metrics for stock screening,
# incorporating industry medians, historical ranges, and logical targets.

# Value Metrics (60% weight)
value_metrics = {
    'P/E': {
        'benchmark': {
            'industry_median': 'below_median',
            'historical_percentile': '<50th'  # Below the 5-year historical median
        },
        'target': 'below both benchmarks'
    },
    'P/FCF': {
        'benchmark': {
            'industry_median': 'below_median',
            'historical_range': '<50th'  # Below historical range (inverse high FCF Yield)
        },
        'target': 'below both benchmarks'
    },
    'EV/EBITDA': {
        'benchmark': {
            'industry_median': '<1.5x',  # EV/EBITDA less than 1.5x industry median (growth-adjusted)
            'historical_range': '<50th'  # Below historical median
        },
        'target': 'below both benchmarks'
    },
    'PEG': {
        'benchmark': {
            'industry_median': '<1.5',  # Adjusted for flexible interpretation
            'historical_median': 'below'  # Ensure it’s undervalued relative to its growth
        },
        'target': 'PEG < 1.5 & reasonable EPS growth'
    }
}

# Quality Metrics (20% weight)
quality_metrics = {
    'ROE': {
        'benchmark': {
            'industry_median': 'above_median OR 25th_percentile_cutoff',  # Flexible benchmark
            'historical_stability': 'consistent'  # Ensure stable trends over time
        },
        'target': 'Above industry OR in top 75% of stocks'
    },
    'FCF Yield': {
        'benchmark': {
            'industry_median': 'above_median'
        },
        'target': 'above benchmark'
    },
    'CFO-to-Net Income': {
        'benchmark': {
            'ratio': '>1.5'  # Ensuring high earnings quality
        },
        'target': 'above threshold'
    }
}

# Growth Metrics (20% weight)
growth_metrics = {
    'EBITDA Growth': {
        'benchmark': {
            'historical_cagr': 'above_median'  # CAGR greater than industry median
        },
        'target': 'above industry median'
    },
    'Revenue Growth Stability': {
        'benchmark': {
            'historical_stability': 'consistent YoY growth'
        },
        'target': 'positive YoY trend'
    }
}

# Print Confirmation
print("Metrics and parameters for Value, Quality, and Growth Factors successfully updated.")


Metrics and parameters for Value, Quality, and Growth Factors successfully updated.


In [178]:

# Step 2: Fetch Stock Universe
def fetch_stock_universe():
    print("Fetching stock universe from Wikipedia sources...")

    # Fetch S&P 500 tickers
    sp500_url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
    try:
        sp500_table = pd.read_html(sp500_url)[0]
        sp500_tickers = sp500_table['Symbol'].dropna().tolist()  # Convert to list
        print(f"Fetched {len(sp500_tickers)} S&P 500 stocks.")
    except Exception as e:
        print(f"Error fetching S&P 500 data: {e}")
        sp500_tickers = []

    # Fetch NASDAQ-100 tickers
    nasdaq_url = 'https://en.wikipedia.org/wiki/NASDAQ-100'
    try:
        nasdaq_table = pd.read_html(nasdaq_url)[4]
        nasdaq_tickers = nasdaq_table['Symbol'].dropna().tolist()  # Convert to list
        print(f"Fetched {len(nasdaq_tickers)} NASDAQ-100 stocks.")
    except Exception as e:
        print(f"Error fetching NASDAQ-100 data: {e}")
        nasdaq_tickers = []

    # Combine stock lists and explicitly remove duplicates
    stock_universe = list(set(sp500_tickers + nasdaq_tickers))  # Convert back to list
    stock_universe.sort()  # Ensure sorted, consistent order
    print(f"Final stock universe includes {len(stock_universe)} unique stocks.")

    return stock_universe

# Execute stock universe fetch
stock_universe = fetch_stock_universe()


Fetching stock universe from Wikipedia sources...
Fetched 503 S&P 500 stocks.
Fetched 101 NASDAQ-100 stocks.
Final stock universe includes 519 unique stocks.


In [144]:

# Step 3: Fetch Historical Data with Batching and Sleep (Includes EPS)
def fetch_historical_data(stock_list, batch_size=50, delay=1):
    print("Fetching historical data in batches...")
    historical_data = {}

    for i in range(0, len(stock_list), batch_size):
        batch = stock_list[i:i + batch_size]  # Get the current batch
        print(f"Processing batch {i // batch_size + 1} ({len(batch)} stocks)...")

        for symbol in batch:
            try:
                ticker = yf.Ticker(symbol)
                hist = ticker.history(period="max")
                financials = ticker.financials if ticker.financials is not None else pd.DataFrame()

                # Extract financial metrics safely
                net_income = financials.loc['Net Income', :] if 'Net Income' in financials.index else np.nan
                revenue = (
                    financials.loc['Total Revenue', :] if 'Total Revenue' in financials.index else
                    financials.loc['Revenue', :] if 'Revenue' in financials.index else np.nan
                )
                ebitda = financials.loc['EBITDA', :] if 'EBITDA' in financials.index else np.nan

                # Extract EPS Data (Historical)
                eps_from_financials = (
                    financials.loc["Diluted EPS", :].dropna().astype(float)
                    if "Diluted EPS" in financials.index else pd.Series(dtype=float)
                )

                # Store collected data
                historical_data[symbol] = {
                    "price_median": hist['Close'].median() if not hist.empty else np.nan,
                    "price_50th_percentile": np.percentile(hist['Close'], 50) if not hist.empty else np.nan,
                    "net_income": net_income,
                    "revenue": revenue,
                    "ebitda": ebitda,
                    "eps_historical": eps_from_financials  
                }
            except Exception as e:
                print(f"Error fetching data for {symbol}: {e}")

            time.sleep(delay)  
            
    historical_df = pd.DataFrame.from_dict(historical_data, orient='index')

    # Debug: Check if historical data is fetched correctly
    print("\nSample of Historical Data Fetched:")
    print(historical_df.head())

    return historical_df

# Example Execution (Uncomment to run)
# historical_data = fetch_historical_data(stock_universe)



In [146]:


# Step 4: Fetch Fundamental Data with Batching and Sleep (Includes EPS)
def fetch_fundamental_data(stock_list, batch_size=50, delay=1):
    print("Fetching fundamental data in batches...")
    fundamental_data = {}

    for i in range(0, len(stock_list), batch_size):
        batch = stock_list[i:i + batch_size]
        print(f"Processing batch {i // batch_size + 1} ({len(batch)} stocks)...")

        for symbol in batch:
            try:
                ticker = yf.Ticker(symbol)
                info = ticker.info if ticker.info else {}
                financials = ticker.financials if ticker.financials is not None else pd.DataFrame()

                # Fetch net income from financials (fallback to Yahoo API if missing)
                net_income = (
                    financials.loc['Net Income', :].iloc[-1] if 'Net Income' in financials.index else
                    financials.loc['Net Income Applicable to Common Shares', :].iloc[-1] if 'Net Income Applicable to Common Shares' in financials.index else
                    info.get("netIncome", np.nan)
                )

                # Fetch EPS values from Yahoo Finance API
                eps_trailing = info.get("trailingEps", np.nan)  # Trailing EPS
                eps_forward = info.get("forwardEps", np.nan)  # Forward EPS

                # Store collected data
                fundamental_data[symbol] = {
                    "forwardPE": info.get("forwardPE"),
                    "trailingPE": info.get("trailingPE"),
                    "enterpriseValue": info.get("enterpriseValue"),
                    "freeCashflow": info.get("freeCashflow"),
                    "returnOnEquity": info.get("returnOnEquity"),
                    "operatingCashflow": info.get("operatingCashflow"),
                    "netIncome": net_income,
                    "currentPrice": info.get("currentPrice"),
                    "industry": info.get("industry"),
                    "eps_trailing": eps_trailing,  # Added trailing EPS
                    "eps_forward": eps_forward,  # Added forward EPS
                }
            except Exception as e:
                print(f"Error fetching data for {symbol}: {e}")

            time.sleep(delay)  # Avoid rate-limiting

    fundamental_df = pd.DataFrame.from_dict(fundamental_data, orient='index')

    # Debug: Check if fundamental data is fetched correctly
    print("\nSample of Fundamental Data Fetched:")
    print(fundamental_df.head())

    return fundamental_df

# Example Execution (Uncomment to run)
# fundamental_data = fetch_fundamental_data(stock_universe)



In [148]:
# Main Execution: Fetching Data
if __name__ == "__main__":
    batch_size = 50  # Number of stocks per batch
    delay = 1        # Seconds to wait between requests
    
    # Fetch historical and fundamental data
    historical_data = fetch_historical_data(stock_universe, batch_size=batch_size, delay=delay)
    fundamental_data = fetch_fundamental_data(stock_universe, batch_size=batch_size, delay=delay)

    # Display fetched data summaries
    print("\nHistorical Data Summary:")
    print(historical_data.head())

    print("\nFundamental Data Summary:")
    print(fundamental_data.head())

🔍 Fetching historical data in batches...
Processing batch 1 (50 stocks)...
Processing batch 2 (50 stocks)...


$BF.B: possibly delisted; no price data found  (1d 1926-03-28 -> 2025-03-03)
$BRK.B: possibly delisted; no timezone found


Processing batch 3 (50 stocks)...
Processing batch 4 (50 stocks)...
Processing batch 5 (50 stocks)...
Processing batch 6 (50 stocks)...
Processing batch 7 (50 stocks)...
Processing batch 8 (50 stocks)...
Processing batch 9 (50 stocks)...
Processing batch 10 (50 stocks)...
Processing batch 11 (19 stocks)...

🔍 Sample of Historical Data Fetched:
      price_median  price_50th_percentile  \
A        30.398506              30.398506   
AAPL      0.451496               0.451496   
ABBV     68.121307              68.121307   
ABNB    138.990005             138.990005   
ABT      11.437333              11.437333   

                                             net_income  \
A     2024-10-31    1289000000.0
2023-10-31    12400...   
AAPL  2024-09-30    93736000000.0
2023-09-30    9699...   
ABBV  2024-12-31     4278000000.0
2023-12-31     486...   
ABNB  2024-12-31    2648000000.0
2023-12-31    47920...   
ABT   2024-12-31    13402000000.0
2023-12-31     572...   

                            

In [172]:
# Step 5: Calculate Metrics

def calculate_metrics(historical_data, fundamental_data):
    print("Calculating metrics based on methodology...\n")

    # Copy fundamental data for processing
    results = fundamental_data.copy()

    # Ensure numerical consistency for key financial columns
    numeric_columns = [
        "forwardPE", "trailingPE", "enterpriseValue",
        "freeCashflow", "returnOnEquity", "operatingCashflow", "netIncome"
    ]
    for col in numeric_columns:
        if col in results.columns:
            results[col] = pd.to_numeric(results[col], errors="coerce")

    # Initialize growth metrics storage
    growth_metrics = {}

    for symbol in historical_data.index:
        try:
            # Extract financial data
            revenue = historical_data.loc[symbol, "revenue"]
            ebitda = historical_data.loc[symbol, "ebitda"]
            eps_series = historical_data.loc[symbol, "eps_historical"] if "eps_historical" in historical_data.columns else None

            # Ensure revenue and EBITDA are time series
            revenue = revenue if isinstance(revenue, pd.Series) else pd.Series(dtype=float)
            ebitda = ebitda if isinstance(ebitda, pd.Series) else pd.Series(dtype=float)

            # Ensure EPS is in time series format
            if isinstance(eps_series, str):
                eps_series = pd.Series(eval(eps_series))  # Convert from string to Series
            elif not isinstance(eps_series, pd.Series):
                eps_series = pd.Series(dtype=float)

            # Convert to numeric and drop NaNs
            revenue = revenue.astype(float).dropna()
            ebitda = ebitda.astype(float).dropna()
            eps_series = eps_series.astype(float).dropna()

            # **Calculate Growth Metrics**
            revenue_growth = revenue.pct_change().mean() * 100 if len(revenue) > 1 else np.nan
            ebitda_growth = ebitda.pct_change().mean() * 100 if len(ebitda) > 1 else np.nan
            eps_growth = (eps_series.iloc[-1] / eps_series.iloc[-2] - 1) * 100 if len(eps_series) > 1 else np.nan

            # Store calculated growth metrics
            growth_metrics[symbol] = {
                "revenue_growth": revenue_growth,
                "ebitda_growth": ebitda_growth,
                "eps_growth": eps_growth
            }

        except Exception as e:
            print(f"Error calculating growth metrics for {symbol}: {e}")
            growth_metrics[symbol] = {
                "revenue_growth": np.nan,
                "ebitda_growth": np.nan,
                "eps_growth": np.nan
            }

    # Merge growth metrics into results
    growth_df = pd.DataFrame.from_dict(growth_metrics, orient="index")
    results = results.merge(growth_df, left_index=True, right_index=True, how="left")

    # **Industry-Level Benchmarks Calculation**
    industry_medians = (
        results.dropna(subset=["industry", "forwardPE", "returnOnEquity", "enterpriseValue", "freeCashflow"])
        .groupby("industry")[["forwardPE", "returnOnEquity", "enterpriseValue", "freeCashflow"]]
        .median()
        .add_suffix("_industry_median")
    )
    results = results.merge(industry_medians, left_on="industry", right_index=True, how="left")

    # **Value Metric Evaluations**
    results["P/E_eval"] = (
        (results["forwardPE"] < results["forwardPE_industry_median"]) &
        (results["forwardPE"] < historical_data["price_50th_percentile"])
    )
    results["P/FCF_eval"] = (
        (results["freeCashflow"] > results["freeCashflow_industry_median"]) &
        (results["freeCashflow"] > historical_data["price_median"])
    )
    results["EV/EBITDA_Growth"] = results["enterpriseValue"] / results["ebitda_growth"]
    results["EV/EBITDA_eval"] = (
        (results["enterpriseValue"] < results["enterpriseValue_industry_median"]) &
        (results["EV/EBITDA_Growth"] < 1.5)
    )
    results["PEG_eval"] = (
        (results["forwardPE"] / results["eps_growth"]) < 1.5  # Adjusted to avoid over-penalization
    )

    # **ROE Evaluation with Flexible Criteria**
    roe_25th_percentile = results["returnOnEquity"].quantile(0.25)  # Bottom 25% cutoff
    results["ROE_eval"] = (
        (results["returnOnEquity"] > results["returnOnEquity_industry_median"]) |
        (results["returnOnEquity"] > roe_25th_percentile)
    )

    # **Quality Metric Evaluations**
    results["FCF_Yield_eval"] = (
        (results["freeCashflow"].fillna(0) / results["enterpriseValue"].fillna(1)) >
        results["freeCashflow_industry_median"].fillna(0)
    )
    results["CFO_to_Net_Income_eval"] = (
        (results["operatingCashflow"].fillna(0) / results["netIncome"].replace({0: np.nan}).fillna(1)) > 1.5
    )

    # **Growth Metric Evaluations**
    results["EBITDA_Growth_eval"] = results["ebitda_growth"] > results["ebitda_growth"].median()
    
    # **Debugging print statements**
    print("\nChecking Count of Value, Quality, and Growth Metrics:")
    print(results[[
        "P/E_eval", "P/FCF_eval", "EV/EBITDA_eval", "PEG_eval",
        "ROE_eval", "FCF_Yield_eval", "CFO_to_Net_Income_eval",
        "EBITDA_Growth_eval"
    ]].sum())


    # **Composite Scoring Calculation**
    results["Value_Score"] = results[["P/E_eval", "P/FCF_eval", "EV/EBITDA_eval", "PEG_eval"]].sum(axis=1)
    results["Quality_Score"] = results[["ROE_eval", "FCF_Yield_eval", "CFO_to_Net_Income_eval"]].sum(axis=1)
    results["Growth_Score"] = results[["EBITDA_Growth_eval"]].sum(axis=1)

    results["Composite_Score"] = (
        (0.6 * results["Value_Score"]) +
        (0.2 * results["Quality_Score"]) +
        (0.2 * results["Growth_Score"])
    )

    return results


In [174]:
# Main Execution: Metric Calculation
if __name__ == "__main__":
    # Ensure data is not empty before processing
    if historical_data.empty or fundamental_data.empty:
        print("Error: One or both datasets are empty. Check data fetching process.")
    else:
        print("Data successfully fetched. Proceeding with metric calculations...\n")
        
        # Calculate metrics
        evaluated_results = calculate_metrics(historical_data, fundamental_data)

        # Display Summary
        print("\nEvaluated Metrics Summary (Top 10 Stocks):")
        print(evaluated_results.sort_values(by="Composite_Score", ascending=False).head(10))

        print("\nMetric calculation completed successfully.")


Data successfully fetched. Proceeding with metric calculations...

Calculating metrics based on methodology...


Checking Count of Value, Quality, and Growth Metrics:
P/E_eval                  136
P/FCF_eval                197
EV/EBITDA_eval            125
PEG_eval                  467
ROE_eval                  372
FCF_Yield_eval             37
CFO_to_Net_Income_eval    448
EBITDA_Growth_eval        234
dtype: int64

Evaluated Metrics Summary (Top 10 Stocks):
      forwardPE  trailingPE  enterpriseValue  freeCashflow  returnOnEquity  \
NXPI  16.495028   22.180040     6.287996e+10  1.545000e+09         0.27494   
TEL   17.133482   24.333334     4.903634e+10  2.194000e+09         0.14879   
JBL   15.476523   14.642722     1.810894e+10  2.205500e+09         0.62679   
EXPE  13.785516   22.118437     2.886155e+10  1.859250e+09         0.43832   
MLM   23.183302   14.902531     3.481705e+10  1.491875e+09         0.22822   
GPN    8.224999   17.090910     4.144414e+10  3.336414e+09         0

In [180]:
# Información de las Top 100 Acciones
# Ordenar por 'Composite_Score' en orden descendente
evaluated_results_sorted = evaluated_results.sort_values(by="Composite_Score", ascending=False)

# Guardar las primeras 100 filas en un nuevo DataFrame
top_100_results = evaluated_results_sorted.head(100)

# Mostrar los resultados ordenados
print("\nTop 100 Evaluated Metrics Summary:")
print(top_100_results[["Composite_Score", "Value_Score", "Quality_Score", "Growth_Score"]])


Top 100 Evaluated Metrics Summary:
      Composite_Score  Value_Score  Quality_Score  Growth_Score
NXPI              3.0            4              2             1
TEL               3.0            4              2             1
JBL               2.8            4              2             0
EXPE              2.8            4              2             0
MLM               2.8            4              2             0
...               ...          ...            ...           ...
GIS               1.8            2              2             1
CSX               1.8            2              2             1
KEYS              1.8            2              2             1
FSLR              1.8            2              2             1
MRK               1.8            2              2             1

[100 rows x 4 columns]


### Market Sentiment

## IBKR Market Scanner

In [None]:
# Initialize IB client
ib = IB()

try:
    # Connect to IBKR TWS/Gateway
    print("Connecting to IBKR...")
    ib.connect(host="127.0.0.1", port=7497, clientId=3)  # Synchronous connection
    print("Successfully connected to IBKR.")

    # Set market data type to delayed (avoiding real-time data fees)
    print("Setting market data type to delayed...")
    ib.reqMarketDataType(3)  # Synchronous call to set delayed data type
    print("Market data type set to delayed.")

    # Connection remains open for further tasks
    print("IBKR connection is active and ready for further operations.")

except Exception as e:
    print(f"An error occurred while connecting to IBKR: {e}")
    # If there's an error, attempt to disconnect gracefully
    try:
        ib.disconnect()
        print("Disconnected from IBKR due to an error.")
    except Exception as disconnect_error:
        print(f"Error during disconnection: {disconnect_error}")

In [None]:


async def fetch_market_scanners(ib, stock_universe):
    """
    Fetch market scanner data asynchronously for Top Gainers, Top Losers, and Unusual Volume.
    """
    print("Fetching market scanner data...")
    scanner_results = {
        "Top Gainers": [],
        "Top Losers": [],
        "Unusual Volume": []
    }
    
    try:
        # Define scanner subscriptions for each category
        scanner_subscriptions = {
            "Top Gainers": ScannerSubscription(instrument="STK", locationCode="STK.US.MAJOR", scanCode="TOP_PERC_GAIN"),
            "Top Losers": ScannerSubscription(instrument="STK", locationCode="STK.US.MAJOR", scanCode="TOP_PERC_LOSE"),
            "Unusual Volume": ScannerSubscription(instrument="STK", locationCode="STK.US.MAJOR", scanCode="HOT_BY_VOLUME")
        }
        
        # Fetch data for each scanner category
        for category, subscription in scanner_subscriptions.items():
            print(f"Fetching {category} asynchronously...")
            scan_results = await ib.reqScannerSubscriptionAsync(subscription)
            
            # Filter results to only include symbols from the stock universe
            scanner_results[category] = [
                result.contract.symbol for result in scan_results if result.contract.symbol in stock_universe
            ]
            print(f"{category} fetched successfully: {len(scanner_results[category])} stocks.")
    
    except Exception as e:
        print(f"Error fetching market scanner data: {e}")
    
    return scanner_results

async def integrate_scanner_with_filtered(top_100_stocks, scanner_results):
    """
    Map market scanner results to top 100 filtered stocks and assign Market Scanner Scores.
    """
    print("Integrating scanner results with filtered stocks...")
    
    # Initialize score columns
    top_100_stocks["Market_Scanner_Score"] = 0
    
    for index, row in top_100_stocks.iterrows():
        symbol = index  # Assuming the stock symbol is the index
        
        # Assign scores based on scanner results
        if symbol in scanner_results["Top Gainers"]:
            top_100_stocks.loc[index, "Market_Scanner_Score"] += 1
        if symbol in scanner_results["Top Losers"]:
            top_100_stocks.loc[index, "Market_Scanner_Score"] -= 1
        if symbol in scanner_results["Unusual Volume"]:
            top_100_stocks.loc[index, "Market_Scanner_Score"] += 1
    
    print("Integration completed.")
    return top_100_stocks

async def main():
    """
    Main asynchronous execution to fetch scanner data and update the top 100 filtered stocks.
    """
    # Ensure you have an active IBKR connection
    print("Using active IBKR connection for Market Scanner...")
    
    # Your existing 'evaluated_results_sorted' and 'top_100_results'
    # Ensure `top_100_results` is loaded with your previously filtered top 100 stocks
    top_100_stocks = top_100_results.copy()
    stock_universe = top_100_stocks.index.tolist()  # Universe is the top 100 stock symbols
    
    # Fetch scanner data for the top 100 stocks
    scanner_results = await fetch_market_scanners(ib, stock_universe)
    
    # Integrate scanner results with the filtered top 100 stocks
    updated_top_100 = await integrate_scanner_with_filtered(top_100_stocks, scanner_results)
    
    # Display updated results with Market Scanner Score
    print("\nUpdated Top 100 with Market Scanner Score:")
    print(updated_top_100[["Composite_Score", "Market_Scanner_Score"]].head())
    
    # Keep IBKR connection open for subsequent tasks
    print("IBKR connection remains open for further operations.")
    return updated_top_100

# Execution
if __name__ == "__main__":
    import asyncio
    
    
