In [12]:
import os

os.environ["API_KEY"] = "leaving out for now "  

print(os.environ["API_KEY"])


leaving out for now 


In [None]:
import requests
import pandas as pd
import numpy as np
import os

API_KEY = os.environ.get("API_KEY")


# 1. Fetch S&P 500 Symbols
def get_sp500_symbols():
    """
    Fetch the up-to-date list of S&P 500 constituent symbols using FMP's sp500_constituent endpoint.
    Returns a list of ticker symbols (strings).
    """
    url = f"https://financialmodelingprep.com/api/v3/sp500_constituent?apikey={API_KEY}"
    symbols = []
    try:
        resp = requests.get(url)
        if resp.status_code == 200:
            data = resp.json()
            for comp in data:
                symbol = comp.get("symbol")
                if symbol:
                    symbols.append(symbol)
    except Exception as e:
        print("Error retrieving S&P 500 symbols:", e)
    return symbols

# Endpoints to fetch data from FMP
def safe_number(x, default=0.0):
    """
    Safely convert x to float. If conversion fails, return the specified default.
    """
    try:
        return float(x)
    except Exception:
        return default

def get_company_profile(symbol):
    """
    Get the company profile, which includes sector and industry info.
    Endpoint: /api/v3/profile/<symbol>
    Returns a dict with keys like 'sector' and 'industry'.
    """
    url = f"https://financialmodelingprep.com/api/v3/profile/{symbol}?apikey={API_KEY}"
    try:
        resp = requests.get(url)
        if resp.status_code == 200:
            data = resp.json()
            if isinstance(data, list) and len(data) > 0:
                return data[0]
    except Exception:
        pass
    return {}

def get_ratios_ttm(symbol):
    """
    TTM Ratios: e.g., priceEarningsRatioTTM, returnOnEquityTTM, debtEquityRatioTTM, etc.
    /api/v3/ratios-ttm/<symbol>
    """
    url = f"https://financialmodelingprep.com/api/v3/ratios-ttm/{symbol}?apikey={API_KEY}"
    try:
        resp = requests.get(url)
        if resp.status_code == 200:
            data = resp.json()
            if data:
                return data[0]
    except Exception:
        pass
    return {}

def get_financial_growth(symbol):
    """
    Annual growth data, e.g., revenueGrowth, netIncomeGrowth
    /api/v3/financial-growth/<symbol>?period=annual&limit=1
    """
    url = f"https://financialmodelingprep.com/api/v3/financial-growth/{symbol}?period=annual&limit=1&apikey={API_KEY}"
    try:
        resp = requests.get(url)
        if resp.status_code == 200:
            data = resp.json()
            if data:
                return data[0]
    except Exception:
        pass
    return {}

def get_company_rating(symbol):
    """
    FMP 'rating' data, which includes ratingScore among other fields (0-10).
    /api/v3/rating/<symbol>
    """
    url = f"https://financialmodelingprep.com/api/v3/rating/{symbol}?apikey={API_KEY}"
    try:
        r = requests.get(url)
        if r.status_code == 200:
            data = r.json()
            if data:
                return data[0]
    except Exception:
        pass
    return {}

def get_analyst_estimates(symbol):
    """
    Analyst estimates for future earnings/revenue. We'll fetch annual data, limit=1
    /api/v3/analyst-estimates/<symbol>?period=annual&limit=1
    We might examine 'estimatedEpsAvg' to compute forward EPS growth.
    """
    url = f"https://financialmodelingprep.com/api/v3/analyst-estimates/{symbol}?period=annual&limit=1&apikey={API_KEY}"
    try:
        r = requests.get(url)
        if r.status_code == 200:
            data = r.json()
            if data and isinstance(data, list):
                return data[0]  # the most recent future data
    except Exception:
        pass
    return {}

def get_stock_grade(symbol, limit=10):
    """
    /api/v3/grade/<symbol>?limit=<N>
    Returns a list of grading events. We'll parse 'newGrade' to build a numeric factor.
    """
    url = f"https://financialmodelingprep.com/api/v3/grade/{symbol}?limit={limit}&apikey={API_KEY}"
    try:
        r = requests.get(url)
        if r.status_code == 200:
            data = r.json()
            return data
    except Exception:
        pass
    return []

# Gathering All Data for All Symbols

def gather_fundamental_data(symbols):
    """
    For each symbol in symbols, fetch:
      - Sector, Industry from profile
      - TTM ratios (pe, roe, d/e, etc.)
      - Annual growth data (rev_growth, net_inc_growth)
      - ratingScore from rating
      - analyst estimates (forwardEPS)
      - stock grade (list of newGrade)

    Returns a list of dicts, one per symbol, with raw metrics.
    """
    all_data = []
    for i, sym in enumerate(symbols):
        # Basic container
        item = {
            "Symbol": sym,
            "Sector": "Unknown",
            "Industry": "Unknown",
            "pe_ttm": np.nan,
            "roe_ttm": np.nan,
            "de_ttm": np.nan,
            "rev_growth": np.nan,
            "net_inc_growth": np.nan,
            "rating_score": np.nan,
            "forward_eps_growth": np.nan,  # from analyst estimates
            "stock_grade_factor": np.nan   # from /grade endpoint
        }
        try:
            # 1) Profile
            profile = get_company_profile(sym)
            item["Sector"] = profile.get("sector", "Unknown")
            item["Industry"] = profile.get("industry", "Unknown")

            # 2) Ratios TTM
            ratios = get_ratios_ttm(sym)
            item["pe_ttm"] = safe_number(ratios.get("priceEarningsRatioTTM"), np.nan)
            item["roe_ttm"] = safe_number(ratios.get("returnOnEquityTTM"), np.nan)
            item["de_ttm"] = safe_number(ratios.get("debtEquityRatioTTM"), np.nan)

            # 3) Growth
            growth = get_financial_growth(sym)
            # Typically in fraction form (0.12 => 12%)
            item["rev_growth"] = safe_number(growth.get("revenueGrowth"), np.nan)
            item["net_inc_growth"] = safe_number(growth.get("netIncomeGrowth"), np.nan)

            # 4) Rating
            rating = get_company_rating(sym)
            # ratingScore is often 0-10
            item["rating_score"] = safe_number(rating.get("ratingScore"), np.nan)

            # 5) Analyst Estimates (forward EPS growth)
            est = get_analyst_estimates(sym)
            
            forward_eps = safe_number(est.get("estimatedEpsAvg"), np.nan)
            # If we had the TTM EPS from somewhere, we can do something like:
            #   forward_eps_growth = (forward_eps - ttm_eps) / abs(ttm_eps)
            item["forward_eps"] = forward_eps  

            # 6) Stock Grade
            grade_data = get_stock_grade(sym, limit=10)
            # Parse each "newGrade" and average them.
            # Possible newGrade values might be: "Outperform", "Buy", "Strong Buy", "Underperform", "Neutral" etc.
            # I'll define a small map:
            grade_map = {
                "Strong Buy": 1.0,
                "Buy": 0.8,
                "Outperform": 0.6,
                "Neutral": 0.4,
                "Underperform": 0.2,
                "Sell": 0.1,
                "Strong Sell": 0.0
            }
            if grade_data:
                vals = []
                for g in grade_data:
                    grd = g.get("newGrade", "")
                    # some might have "Outperform", "Perform", etc.
                    # do a simple match:
                    matched = 0.4  # default ~ neutral
                    for k, v in grade_map.items():
                        if k.lower() in grd.lower():
                            matched = v
                            break
                    vals.append(matched)
                if len(vals) > 0:
                    item["stock_grade_factor"] = float(np.mean(vals))

            all_data.append(item)
        except Exception as e:
            print(f"Error on symbol {sym}: {e}")

        if (i + 1) % 25 == 0:
            print(f"Fetched data for {i+1} symbols...")

    return all_data

# Relative Normalization & Composite Score

def logistic_zscore(z):
    """
    Convert z-score into a 0..1 range.
    Example: clamp z to [-3, +3], then scale to [0..1].
    """
    zz = np.clip(z, -3, 3)
    return (zz + 3) / 6.0

def compute_relative_scores(df):
    """
    For 'rev_growth', 'net_inc_growth', 'forward_eps_growth', we do a
    groupby(Industry) or groupby(Sector) to get mean/stdev. Then compute z-scores.
    We'll use Industry grouping for finer granularity. If Industry is missing, we fallback on Sector.

    We'll store new columns: 'rev_growth_rel', 'net_inc_growth_rel', 'forward_eps_growth_rel'
    each in [0..1] after z-> logistic transform.

    Note: Some industries might have < 2 companies, so std can be zero. Handle those instances gracefully
    """
    # We'll define an "IndustryOrSector" column that is industry if not Unknown, else sector
    df["IndustryOrSector"] = df.apply(
        lambda row: row["Industry"] if row["Industry"] != "Unknown" else row["Sector"], axis=1
    )

    rel_cols = ["rev_growth", "net_inc_growth", "forward_eps_growth"]

    df["forward_eps_growth"] = df["forward_eps"].fillna(0.0)  

    # 2) Group by IndustryOrSector, compute z-scores
    grouped = df.groupby("IndustryOrSector")
    for metric in rel_cols:
        mean_vals = grouped[metric].transform("mean")
        std_vals = grouped[metric].transform("std")

        # z = (val - mean) / std
        # handle std=0 => z=0
        z = (df[metric] - mean_vals) / std_vals.replace(0, np.nan)
        z = z.fillna(0)  # if std=0 or data is missing => z=0
        # convert z -> [0..1]
        df[metric + "_rel"] = logistic_zscore(z)

    return df

def invert_lower_is_better(x, cap=100.0):
    """
    For metrics like P/E or D/E where lower is better:
    norm = 1 / (1 + min(x, cap)) => in (0, 1]
    """
    if pd.isna(x):
        return 0.5  # fallback
    # clamp x to avoid extremes
    xx = min(x, cap)
    return 1.0 / (1.0 + xx)

def clamp_and_scale(val, low, high):
    """
    If val is in [low, high], scale that range to [0..1].
    If outside, clamp.
    """
    v = np.clip(val, low, high)
    return (v - low) / (high - low)

def compute_composite_score(row):
    """
    Combine absolute metrics + relative metrics into a single composite in [0..100].
    1) Absolute metrics:
       - P/E => invert_lower_is_better
       - D/E => invert_lower_is_better
       - ROE => clamp [0..50], scale to [0..1]
       - rating_score => [0..10] => [0..1]
       - stock_grade_factor => [0..1] (already)
    2) Relative metrics ( _rel columns are already [0..1] ):
       - rev_growth_rel
       - net_inc_growth_rel
       - forward_eps_growth_rel

    Sum them up with equal weight.
    Adjust the weights potentially on future run.
    """
    # absolute
    pe_val = invert_lower_is_better(row["pe_ttm"], cap=100)
    de_val = invert_lower_is_better(row["de_ttm"], cap=5)
    roe_raw = row["roe_ttm"]
    if pd.isna(roe_raw):
        roe_abs = 0.5
    else:
        # clip [0..50], scale
        roe_abs = clamp_and_scale(roe_raw, 0, 50)

    rating_raw = row["rating_score"]
    if pd.isna(rating_raw):
        rating_abs = 0.5
    else:
        rating_abs = clamp_and_scale(rating_raw, 0, 10)

    grade_fac = row["stock_grade_factor"]
    if pd.isna(grade_fac):
        grade_fac = 0.4  # assume "Neutral"

    # relative
    rev_g_rel = row.get("rev_growth_rel", 0.5)
    net_g_rel = row.get("net_inc_growth_rel", 0.5)
    fwd_eps_rel = row.get("forward_eps_growth_rel", 0.5)

    # 8 factors with equal weight:
    #   [pe_val, de_val, roe_abs, rating_abs, grade_fac, rev_g_rel, net_g_rel, fwd_eps_rel]
    # 8 factors total => weight=1/8 so equal weighted for now but I should do a grid search in future to find a better method
    factors = [
        pe_val,
        de_val,
        roe_abs,
        rating_abs,
        grade_fac,
        rev_g_rel,
        net_g_rel,
        fwd_eps_rel
    ]
    w = 1.0 / len(factors)
    comp = sum(f * w for f in factors) * 100.0
    return comp

def map_score_to_rating(score):
    """
    Map a 0..100 composite to a textual rating (i.e. strong sell to strong buy)
    """
    if score < 20:
        return "Strong Sell"
    elif score < 40:
        return "Sell"
    elif score < 60:
        return "Hold"
    elif score < 80:
        return "Buy"
    else:
        return "Strong Buy"

def main():
    # Get all S&P 500 symbols
    sp500_symbols = get_sp500_symbols()
    if not sp500_symbols:
        print("No S&P 500 symbols found. Exiting.")
        return
    print(f"Total S&P 500 symbols retrieved: {len(sp500_symbols)}")

    # fundamental data
    raw_list = gather_fundamental_data(sp500_symbols)
    df = pd.DataFrame(raw_list)
    print(f"Gathered raw data for {len(df)} symbols.")

    # Compute relative metrics (growth rates, forward EPS, etc.)
    df = compute_relative_scores(df)

    # Final composite score
    df["CompositeScore"] = df.apply(compute_composite_score, axis=1)
    df["Rating"] = df["CompositeScore"].apply(map_score_to_rating)

    # Sort by best to worst
    df.sort_values("CompositeScore", ascending=False, inplace=True)

    # Save Master CSV
    df.to_csv("/workspaces/Stock-Market-Prediction/FunSig/sp500_fundamental_scores.csv", index=False)
    print("Saved 'sp500_fundamental_scores.csv' with composite scores & ratings.")

    #  Aggregations by Sector, Industry

    sector_stats = df.groupby("Sector")["CompositeScore"].agg(["mean","median","min","max","count"]).reset_index()
    sector_stats.to_csv("/workspaces/Stock-Market-Prediction/FunSig/sp500_sector_stats.csv", index=False)

    industry_stats = df.groupby("Industry")["CompositeScore"].agg(["mean","median","min","max","count"]).reset_index()
    industry_stats.to_csv("/workspaces/Stock-Market-Prediction/FunSig/sp500_industry_stats.csv", index=False)

    print("Saved 'sp500_sector_stats.csv' and 'sp500_industry_stats.csv' for aggregated statistics.")

if __name__ == "__main__":
    main()


Total S&P 500 symbols retrieved: 503
Fetched data for 25 symbols...
Fetched data for 50 symbols...
Fetched data for 75 symbols...
Fetched data for 100 symbols...
Fetched data for 125 symbols...
Fetched data for 150 symbols...
Fetched data for 175 symbols...
Fetched data for 200 symbols...
Fetched data for 225 symbols...
Fetched data for 250 symbols...
Fetched data for 275 symbols...
Fetched data for 300 symbols...
Fetched data for 325 symbols...
Fetched data for 350 symbols...
Fetched data for 375 symbols...
Fetched data for 400 symbols...
Fetched data for 425 symbols...
Fetched data for 450 symbols...
Fetched data for 475 symbols...
Fetched data for 500 symbols...
Gathered raw data for 503 symbols.
Saved 'sp500_fundamental_scores.csv' with composite scores & ratings.
Saved 'sp500_sector_stats.csv' and 'sp500_industry_stats.csv' for aggregated statistics.
