<a href="https://colab.research.google.com/github/JerryChenz/Screener_Proc_v1/blob/master/Screener.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from IPython.display import display

# Define regions with their GitHub raw URLs
# Replace 'yourusername' and 'yourrepo' with your actual GitHub details
regions = {
    'us': 'https://raw.githubusercontent.com/yourusername/yourrepo/main/data/cleaned_data/us_screen_data.csv',
    'cn': 'https://raw.githubusercontent.com/yourusername/yourrepo/main/data/cleaned_data/cn_screen_data.csv',
    'hk': 'https://raw.githubusercontent.com/yourusername/yourrepo/main/data/cleaned_data/hk_screen_data.csv'
}

def screen_companies(df):
    """
    Screen companies based on a combined ranking using three financial metrics:
    1. EBIT/Market Cap (higher is better)
    2. ROIC (higher is better)
    3. Composite indicator of Dividend per share/Price and Total Debt/Common Equity
    """
    # Calculate EBIT
    df['EBIT'] = df['Past Annual Sales'] - df['Past Annual Cogs'] - df['Past Annual Opex']

    # Calculate financial metrics
    df['EBIT/Market Cap'] = df['EBIT'] / df['Market Cap']
    df['ROIC'] = df['EBIT'] / df['Latest Invested Capital']
    df['D/P'] = df['Past Financial Year Dividends'] / df['Market Price']
    df['Total Debt/Common Equity'] = df['Latest Total Debt'] / df['Latest Common Equity']

    # Filter valid rows
    valid_mask = (
        (df['Market Price'] > 0) &
        (df['Market Cap'] > 0) &
        (df['Latest Invested Capital'] > 0) &
        (df['EBIT'].notna()) &
        (df['Past Financial Year Dividends'].notna()) &
        (df['Latest Common Equity'].notna()) &
        (df['Latest Total Debt'].notna())
    )
    df_valid = df[valid_mask].copy()

    if df_valid.empty:
        print("No valid data remaining after filtering.")
        return pd.DataFrame(columns=[
            'Ticker', 'Company Name', 'Industry', 'Market Price', 'Market Cap', 'Market Currency',
            'EBIT/Market Cap', 'ROIC', 'D/P', 'Total Debt/Common Equity', 'Combined_rank'
        ])

    # Identify companies with positive common equity
    df_valid['positive_equity'] = df_valid['Latest Common Equity'] > 0

    # Rank individual metrics
    df_valid['EBIT/Market Cap_rank'] = df_valid['EBIT/Market Cap'].rank(ascending=False, method='min')
    df_valid['ROIC_rank'] = df_valid['ROIC'].rank(ascending=False, method='min')
    df_valid['D/P_rank'] = df_valid['D/P'].rank(ascending=False, method='min')

    # Rank Total Debt/Common Equity, handling negative/zero equity
    M = df_valid['positive_equity'].sum()
    df_valid.loc[df_valid['positive_equity'], 'Total Debt/Common Equity_rank'] = (
        df_valid.loc[df_valid['positive_equity'], 'Total Debt/Common Equity'].rank(ascending=True, method='min')
    )
    sub_rank = df_valid.loc[~df_valid['positive_equity'], 'Latest Total Debt'].rank(ascending=True, method='min')
    df_valid.loc[~df_valid['positive_equity'], 'Total Debt/Common Equity_rank'] = M + sub_rank

    # Calculate composite indicator
    df_valid['composite_score'] = df_valid['D/P_rank'] + df_valid['Total Debt/Common Equity_rank']
    df_valid['composite_rank'] = df_valid['composite_score'].rank(ascending=True, method='min')

    # Calculate combined rank
    df_valid['Combined_rank'] = (
        df_valid['EBIT/Market Cap_rank'] +
        df_valid['ROIC_rank'] +
        df_valid['composite_rank']
    )

    # Sort by combined rank
    df_sorted = df_valid.sort_values('Combined_rank')

    # Select output columns
    output_columns = [
        'Ticker', 'Company Name', 'Industry', 'Market Price', 'Market Cap', 'Market Currency',
        'EBIT/Market Cap', 'ROIC', 'D/P', 'Total Debt/Common Equity', 'Combined_rank'
    ]
    df_output = df_sorted[output_columns]

    return df_output

# Store screened DataFrames
screened_dfs = {}

# Process each region
for region, url in regions.items():
    try:
        df = pd.read_csv(url)
        df_screened = screen_companies(df)
        screened_dfs[region] = df_screened
        if not df_screened.empty:
            print(f"Top 10 screened companies for {region}:")
            display(df_screened.head(10))
        else:
            print(f"No screened companies for {region}.")
    except Exception as e:
        print(f"Error processing {region}: {e}")

In [None]:
from google.colab import files

# Download CSVs for each region
for region, df in screened_dfs.items():
    if not df.empty:
        csv_filename = f"{region}_screened.csv"
        df.to_csv(csv_filename, index=False)
        files.download(csv_filename)
    else:
        print(f"No data to download for {region}.")