In [1]:
import pandas as pd
import yfinance as yf
import numpy as np
import asyncio
import time
from ib_async import IB, Stock
from ib_async import IB, ScannerSubscription
from ib_async import IB

### VALUE & QUALITY

In [5]:
# Step 2: Define Metrics and Parameters
# Description: Define the value and quality metrics for stock screening, incorporating industry medians, historical ranges, or both.

# Value Metrics
value_metrics = {
    # Dual Benchmarking: Industry Median + Historical Range
    'P/E': {
        'benchmark': {
            'industry_median': 'below_median',
            'historical_percentile': '<50th'  # Below the 5-year historical median
        },
        'target': 'below both benchmarks'  # Must meet both conditions
    },
    # Dual Benchmarking: Industry Median + Historical Range
    'P/FCF': {
        'benchmark': {
            'industry_median': 'below_median',
            'historical_range': '<50th'  # Below historical range (inverse high FCF Yield)
        },
        'target': 'below both benchmarks'
    },
    # Industry Median + Growth-Adjusted Benchmark
    'EV/EBITDA': {
        'benchmark': {
            'industry_median': '<1.5',  # EV/EBITDA less than 1.5x industry median
            'historical_range': '<50th'  # Below 50th percentile of historical EV/EBITDA
        },
        'target': 'below both benchmarks'
    },
    # PEG Ratio: Company-Specific Target (Requires Earnings Growth Rate)
    'PEG': {
        'benchmark': None,  # Evaluated directly against a fixed target
        'target': '<1'  # Undervalued relative to growth
    }
}

# Quality Metrics
quality_metrics = {
    # Dual Benchmarking: Industry Median + Historical Stability
    'ROE': {
        'benchmark': {
            'industry_median': '>15%',
            'historical_stability': 'consistent'  # Stable ROE trends over time
        },
        'target': 'meets both benchmarks'
    },
    # Industry Median Only
    'FCF Yield': {
        'benchmark': {
            'industry_median': 'above_median'  # Stronger than peers
        },
        'target': 'above benchmark'
    },
    # Historical Growth Comparison (Requires Historical EBITDA Data)
    'EBITDA Growth': {
        'benchmark': {
            'historical_cagr': 'above_median'  # CAGR greater than historical median
        },
        'target': 'meets benchmark'
    },
    # Historical Stability Only (Requires Historical Revenue Data)
    'Revenue Growth Stability': {
        'benchmark': {
            'historical_stability': 'consistent YoY growth'  # Year-over-Year stability
        },
        'target': 'meets benchmark'
    }
}

# Print Confirmation
print("Metrics and parameters for Value and Quality Factors defined.")


Metrics and parameters for Value and Quality Factors defined.


In [4]:
# Step 1: Fetch Stock Universe
# S&P 500 Wikipedia
url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
table_sp = pd.read_html(url)
sp500_table = table_sp[0]

sp500_tickers = sp500_table['Symbol'].tolist() # List of Tickers
   
# NASDAQ-100 Wikipedia
url = 'https://en.wikipedia.org/wiki/NASDAQ-100'
tables_nas = pd.read_html(url)
nasdaq_table = tables_nas[4]
nasdaq_tickers = nasdaq_table['Symbol'].tolist()  # List of Tickers

# Stock Universe
stock_universe = sp500_tickers + nasdaq_tickers
print(f"Stock universe includes {len(stock_universe)} stocks.")
    

Stock universe includes 604 stocks.


In [6]:
# Drop Duplicates
stock_universe = list(set(stock_universe))
len(stock_universe)

520

In [8]:
# Step 2: Fetch Historical Data with Batching and Sleep
def fetch_historical_data(stock_list, batch_size=50, delay=1):
    print("Fetching historical data in batches...")
    historical_data = {}
    
    # Process the stock list in batches
    for i in range(0, len(stock_list), batch_size):
        batch = stock_list[i:i + batch_size]  # Get the current batch
        print(f"Processing batch {i // batch_size + 1} ({len(batch)} stocks)...")
        
        for symbol in batch:
            try:
                ticker = yf.Ticker(symbol)
                hist = ticker.history(period="max")
                financials = ticker.financials

                # Fetch raw data without calculations
                revenue = financials.loc['Total Revenue', :] if 'Total Revenue' in financials.index else None
                ebitda = financials.loc['EBITDA', :] if 'EBITDA' in financials.index else None

                historical_data[symbol] = {
                    "price_median": hist['Close'].median() if not hist.empty else np.nan,
                    "price_50th_percentile": np.percentile(hist['Close'], 50) if not hist.empty else np.nan,
                    "revenue": revenue,
                    "ebitda": ebitda,
                }
            except Exception as e:
                print(f"Error fetching historical data for {symbol}: {e}")
            
            time.sleep(delay)  # Pause to avoid rate-limiting
            
    return pd.DataFrame.from_dict(historical_data, orient='index')

In [10]:
# Step 3: Fetch Fundamental Data with Batching and Sleep
def fetch_fundamental_data(stock_list, batch_size=50, delay=1):
    print("Fetching fundamental data in batches...")
    fundamental_data = {}
    
    # Process the stock list in batches
    for i in range(0, len(stock_list), batch_size):
        batch = stock_list[i:i + batch_size]  # Get the current batch
        print(f"Processing batch {i // batch_size + 1} ({len(batch)} stocks)...")
        
        for symbol in batch:
            try:
                ticker = yf.Ticker(symbol)
                info = ticker.info

                fundamental_data[symbol] = {
                    "forwardPE": info.get("forwardPE"),
                    "trailingPE": info.get("trailingPE"),
                    "enterpriseValue": info.get("enterpriseValue"),
                    "freeCashflow": info.get("freeCashflow"),
                    "returnOnEquity": info.get("returnOnEquity"),
                    "operatingCashflow": info.get("operatingCashflow"),
                    "currentPrice": info.get("currentPrice"),
                    "industry": info.get("industry"),
                }
            except Exception as e:
                print(f"Error fetching fundamental data for {symbol}: {e}")
            
            time.sleep(delay)  # Pause to avoid rate-limiting
    
    return pd.DataFrame.from_dict(fundamental_data, orient='index')


In [12]:
# Main Execution: Fetching Data
if __name__ == "__main__":
    batch_size = 50  # Number of stocks per batch
    delay = 1        # Seconds to wait between requests
    
    # Fetch historical and fundamental data
    historical_data = fetch_historical_data(stock_universe, batch_size=batch_size, delay=delay)
    fundamental_data = fetch_fundamental_data(stock_universe, batch_size=batch_size, delay=delay)

    # Display fetched data summaries
    print("\nHistorical Data Summary:")
    print(historical_data.head())

    print("\nFundamental Data Summary:")
    print(fundamental_data.head())

Fetching historical data in batches...
Processing batch 1 (50 stocks)...
Processing batch 2 (50 stocks)...
Processing batch 3 (50 stocks)...
Processing batch 4 (50 stocks)...
Processing batch 5 (50 stocks)...
Processing batch 6 (50 stocks)...
Processing batch 7 (50 stocks)...
Processing batch 8 (50 stocks)...
Processing batch 9 (50 stocks)...


$BF.B: possibly delisted; no price data found  (1d 1925-12-24 -> 2024-11-29)


Processing batch 10 (50 stocks)...
Processing batch 11 (20 stocks)...


$BRK.B: possibly delisted; no timezone found


Fetching fundamental data in batches...
Processing batch 1 (50 stocks)...
Processing batch 2 (50 stocks)...
Processing batch 3 (50 stocks)...
Processing batch 4 (50 stocks)...
Processing batch 5 (50 stocks)...
Processing batch 6 (50 stocks)...
Processing batch 7 (50 stocks)...
Processing batch 8 (50 stocks)...
Processing batch 9 (50 stocks)...
Processing batch 10 (50 stocks)...
Processing batch 11 (20 stocks)...

Historical Data Summary:
      price_median  price_50th_percentile  \
MU       12.987206              12.987206   
BRO       6.252452               6.252452   
ITW      17.168869              17.168869   
FTNT      8.013000               8.013000   
NTRS     28.407188              28.407188   

                                                revenue  \
MU    2024-08-31    25111000000.0
2023-08-31    1554...   
BRO   2023-12-31    4199400000.0
2022-12-31    35632...   
ITW   2023-12-31    16107000000.0
2022-12-31    1593...   
FTNT  2023-12-31    5304800000.0
2022-12-31    4417

In [14]:
# Step 4: Calculate Metrics
def calculate_metrics(historical_data, fundamental_data):
    print("Calculating metrics based on methodology...")
    results = fundamental_data.copy()

    # Ensure numerical consistency
    numeric_columns = ['forwardPE', 'trailingPE', 'enterpriseValue', 
                       'freeCashflow', 'returnOnEquity', 'operatingCashflow']
    results[numeric_columns] = results[numeric_columns].apply(pd.to_numeric, errors='coerce')

    # Revenue and EBITDA growth calculations
    growth_metrics = {}
    for symbol in historical_data.index:
        try:
            revenue = historical_data.loc[symbol, 'revenue']
            ebitda = historical_data.loc[symbol, 'ebitda']

            # Explicitly handle missing values and enforce float type
            if revenue is not None:
                revenue = revenue.dropna()
                revenue_growth = revenue.pct_change().mean() * 100
                revenue_growth = float(revenue_growth)  # Ensure float type
            else:
                revenue_growth = np.nan

            if ebitda is not None:
                ebitda = ebitda.dropna()
                ebitda_growth = ebitda.pct_change().mean() * 100
                ebitda_growth = float(ebitda_growth)  # Ensure float type
            else:
                ebitda_growth = np.nan

            growth_metrics[symbol] = {
                "revenue_growth": revenue_growth,
                "ebitda_growth": ebitda_growth,
            }
        except Exception as e:
            print(f"Error calculating growth metrics for {symbol}: {e}")
            growth_metrics[symbol] = {"revenue_growth": np.nan, "ebitda_growth": np.nan}
    
    growth_df = pd.DataFrame.from_dict(growth_metrics, orient='index')
    results = results.merge(growth_df, left_index=True, right_index=True, how='left')

    # Industry-level benchmarks
    industry_medians = (
        results.groupby("industry")[["forwardPE", "enterpriseValue", "freeCashflow", "returnOnEquity"]]
        .median()
        .add_suffix("_industry_median")
    )
    results = results.merge(industry_medians, left_on="industry", right_index=True, how="left")

    # Value Metric Evaluations
    results["P/E_eval"] = (
        (results["forwardPE"] < results["forwardPE_industry_median"]) &
        (results["forwardPE"] < historical_data["price_50th_percentile"])
    )
    results["P/FCF_eval"] = (
        (results["freeCashflow"] > results["freeCashflow_industry_median"]) &
        (results["freeCashflow"] > historical_data["price_median"])
    )
    results["EV/EBITDA_eval"] = (
        (results["enterpriseValue"] < results["enterpriseValue_industry_median"]) &
        (results["ebitda_growth"] > 0)  # Positive EBITDA growth
    )
    results["PEG_eval"] = results["forwardPE"] / results["revenue_growth"] < 1

    # Quality Metric Evaluations
    results["ROE_eval"] = results["returnOnEquity"] > 15
    results["FCF_Yield_eval"] = results["freeCashflow"] > results["freeCashflow_industry_median"]
    results["EBITDA_Growth_eval"] = results["ebitda_growth"] > results["ebitda_growth"].median()
    results["Revenue_Stability_eval"] = results["revenue_growth"].notnull()  # Check for non-missing stable growth

    # Composite Scoring
    results["Value_Score"] = results[["P/E_eval", "P/FCF_eval", "EV/EBITDA_eval", "PEG_eval"]].sum(axis=1)
    results["Quality_Score"] = results[["ROE_eval", "FCF_Yield_eval", "EBITDA_Growth_eval", "Revenue_Stability_eval"]].sum(axis=1)
    results["Composite_Score"] = 0.7 * results["Value_Score"] + 0.3 * results["Quality_Score"]

    return results

In [16]:
# Main Execution: Metric Calculation
if __name__ == "__main__":
    # Calculate metrics
    evaluated_results = calculate_metrics(historical_data, fundamental_data)

Calculating metrics based on methodology...


  revenue_growth = revenue.pct_change().mean() * 100
  ebitda_growth = ebitda.pct_change().mean() * 100


In [18]:
# Información de las Top 10 Acciones
# Ordenar por 'Composite_Score' en orden descendente
evaluated_results_sorted = evaluated_results.sort_values(by="Composite_Score", ascending=False)

# Guardar las primeras 10 filas en un nuevo DataFrame
top_100_results = evaluated_results_sorted.head(100)

# Mostrar los resultados ordenados
print("\nTop 100 Evaluated Metrics Summary:")
print(top_100_results[["Composite_Score", "Value_Score", "Quality_Score"]])


Top 100 Evaluated Metrics Summary:
      Composite_Score  Value_Score  Quality_Score
NOC               3.7            4              3
LYB               3.7            4              3
NRG               3.7            4              3
PARA              3.7            4              3
BBY               3.0            3              3
...               ...          ...            ...
CMI               2.3            2              3
F                 2.3            2              3
HCA               2.3            2              3
ABBV              2.3            2              3
UNP               2.3            2              3

[100 rows x 3 columns]


### Market Sentiment

## IBKR Market Scanner

In [12]:
# Initialize IB client
ib = IB()

try:
    # Connect to IBKR TWS/Gateway
    print("Connecting to IBKR...")
    ib.connect(host="127.0.0.1", port=7497, clientId=3)  # Synchronous connection
    print("Successfully connected to IBKR.")

    # Set market data type to delayed (avoiding real-time data fees)
    print("Setting market data type to delayed...")
    ib.reqMarketDataType(3)  # Synchronous call to set delayed data type
    print("Market data type set to delayed.")

    # Connection remains open for further tasks
    print("IBKR connection is active and ready for further operations.")

except Exception as e:
    print(f"An error occurred while connecting to IBKR: {e}")
    # If there's an error, attempt to disconnect gracefully
    try:
        ib.disconnect()
        print("Disconnected from IBKR due to an error.")
    except Exception as disconnect_error:
        print(f"Error during disconnection: {disconnect_error}")

Connecting to IBKR...
An error occurred while connecting to IBKR: This event loop is already running
Disconnected from IBKR due to an error.


In [None]:


async def fetch_market_scanners(ib, stock_universe):
    """
    Fetch market scanner data asynchronously for Top Gainers, Top Losers, and Unusual Volume.
    """
    print("Fetching market scanner data...")
    scanner_results = {
        "Top Gainers": [],
        "Top Losers": [],
        "Unusual Volume": []
    }
    
    try:
        # Define scanner subscriptions for each category
        scanner_subscriptions = {
            "Top Gainers": ScannerSubscription(instrument="STK", locationCode="STK.US.MAJOR", scanCode="TOP_PERC_GAIN"),
            "Top Losers": ScannerSubscription(instrument="STK", locationCode="STK.US.MAJOR", scanCode="TOP_PERC_LOSE"),
            "Unusual Volume": ScannerSubscription(instrument="STK", locationCode="STK.US.MAJOR", scanCode="HOT_BY_VOLUME")
        }
        
        # Fetch data for each scanner category
        for category, subscription in scanner_subscriptions.items():
            print(f"Fetching {category} asynchronously...")
            scan_results = await ib.reqScannerSubscriptionAsync(subscription)
            
            # Filter results to only include symbols from the stock universe
            scanner_results[category] = [
                result.contract.symbol for result in scan_results if result.contract.symbol in stock_universe
            ]
            print(f"{category} fetched successfully: {len(scanner_results[category])} stocks.")
    
    except Exception as e:
        print(f"Error fetching market scanner data: {e}")
    
    return scanner_results

async def integrate_scanner_with_filtered(top_100_stocks, scanner_results):
    """
    Map market scanner results to top 100 filtered stocks and assign Market Scanner Scores.
    """
    print("Integrating scanner results with filtered stocks...")
    
    # Initialize score columns
    top_100_stocks["Market_Scanner_Score"] = 0
    
    for index, row in top_100_stocks.iterrows():
        symbol = index  # Assuming the stock symbol is the index
        
        # Assign scores based on scanner results
        if symbol in scanner_results["Top Gainers"]:
            top_100_stocks.loc[index, "Market_Scanner_Score"] += 1
        if symbol in scanner_results["Top Losers"]:
            top_100_stocks.loc[index, "Market_Scanner_Score"] -= 1
        if symbol in scanner_results["Unusual Volume"]:
            top_100_stocks.loc[index, "Market_Scanner_Score"] += 1
    
    print("Integration completed.")
    return top_100_stocks

async def main():
    """
    Main asynchronous execution to fetch scanner data and update the top 100 filtered stocks.
    """
    # Ensure you have an active IBKR connection
    print("Using active IBKR connection for Market Scanner...")
    
    # Your existing 'evaluated_results_sorted' and 'top_100_results'
    # Ensure `top_100_results` is loaded with your previously filtered top 100 stocks
    top_100_stocks = top_100_results.copy()
    stock_universe = top_100_stocks.index.tolist()  # Universe is the top 100 stock symbols
    
    # Fetch scanner data for the top 100 stocks
    scanner_results = await fetch_market_scanners(ib, stock_universe)
    
    # Integrate scanner results with the filtered top 100 stocks
    updated_top_100 = await integrate_scanner_with_filtered(top_100_stocks, scanner_results)
    
    # Display updated results with Market Scanner Score
    print("\nUpdated Top 100 with Market Scanner Score:")
    print(updated_top_100[["Composite_Score", "Market_Scanner_Score"]].head())
    
    # Keep IBKR connection open for subsequent tasks
    print("IBKR connection remains open for further operations.")
    return updated_top_100

# Execution
if __name__ == "__main__":
    import asyncio
    
    
