In [None]:
#Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [None]:
#Load or simulate the market dataset
def load_market_data(dummy=True, path=None):
    if dummy:
        np.random.seed(42)
        n = 300
        df = pd.DataFrame({
            'startup_name': [f"Startup_{i+1}" for i in range(n)],
            'TAM': np.random.randint(100_000_000, 10_000_000_000, size=n),   # Total Addressable Market
            'SAM': np.random.randint(10_000_000, 1_000_000_000, size=n),     # Serviceable Available Market
            'SOM': np.random.randint(500_000, 50_000_000, size=n)            # Serviceable Obtainable Market
        })
        return df
    else:
        return pd.read_csv(path)  # Provide a CSV with 'TAM', 'SAM', 'SOM' columns

In [None]:
#Preprocess the market data
def preprocess_market_data(df):
    # Log-transform to handle large market size differences
    df['TAM_log'] = np.log1p(df['TAM'])
    df['SAM_log'] = np.log1p(df['SAM'])
    df['SOM_log'] = np.log1p(df['SOM'])

    # Compute market ratios
    df['SAM_TAM_ratio'] = (df['SAM'] / df['TAM']).clip(0, 1)
    df['SOM_SAM_ratio'] = (df['SOM'] / df['SAM']).clip(0, 1)

    return df

In [None]:
#Assign rule-based market score
def rule_based_market_score(df):
    scores = []

    for _, row in df.iterrows():
        score = 0

        # TAM scoring
        if row['TAM'] >= 5_000_000_000:
            score += 30
        elif row['TAM'] >= 1_000_000_000:
            score += 20
        elif row['TAM'] >= 100_000_000:
            score += 10

        # SAM/TAM Ratio scoring
        if row['SAM_TAM_ratio'] >= 0.5:
            score += 20
        elif row['SAM_TAM_ratio'] >= 0.2:
            score += 10

        # SOM/SAM Ratio scoring
        if row['SOM_SAM_ratio'] >= 0.2:
            score += 30
        elif row['SOM_SAM_ratio'] >= 0.1:
            score += 20
        else:
            score += 10

        # Cap score at 100
        scores.append(min(score, 100))

    df['market_score'] = scores
    return df

In [None]:
#Run full market scoring pipeline
df = load_market_data(dummy=True)
df = preprocess_market_data(df)
df = rule_based_market_score(df)

# Display top results
df[['startup_name', 'TAM', 'SAM', 'SOM', 'market_score']].head(10)