In [1]:
import pandas as pd
import numpy as np

market_features = pd.read_csv("data/processed/market_features.csv")
market_features.head()


Unnamed: 0,coingecko_id,pulled_at_utc,current_price_usd,market_cap_usd,volume_24h_usd,circulating_supply,max_supply,fdv_usd,token_name_x,symbol_x,...,category,chain,avg_vol_7d,avg_vol_30d,max_drawdown_365d,avg_vol_mcap_ratio,momentum_7v30,flag_zero_market_cap,flag_zero_supply,flag_missing_fdv
0,uniswap,2026-01-19T12:34:30.906620+00:00,4.98,3160631000.0,382896158,634610400.0,1000000000.0,4480843112,Uniswap,UNI,...,DEX,ethereum,0.050157,0.055325,-0.646855,0.086598,-0.018016,False,False,False
1,chainlink,2026-01-19T12:34:45.019802+00:00,12.75,9024027000.0,648920230,708100000.0,1000000000.0,12744001030,Chainlink,LINK,...,Infrastructure & Middleware,ethereum,0.043385,0.04623,-0.590724,0.065049,-0.013702,False,False,False
2,aave,2026-01-19T12:34:47.605564+00:00,163.46,2481823000.0,375268933,15183570.0,16000000.0,2615272322,Aave,AAVE,...,Lending & Borrowing,ethereum,0.046889,0.049209,-0.662779,0.119424,-0.015125,False,False,False
3,lido-dao,2026-01-19T12:35:14.316675+00:00,0.546995,462970900.0,56720533,846566800.0,1000000000.0,546880520,Lido Dao,LDO,...,Liquid staking,ethereum,0.054956,0.059079,-0.780226,0.131942,-0.033136,False,False,False
4,curve-dao-token,2026-01-19T12:35:17.069355+00:00,0.390634,573741000.0,74930533,1468781000.0,3030303000.0,915416077,Curve DAO,CRV,...,DEX,ethereum,0.053969,0.056678,-0.692343,0.202697,-0.010111,False,False,False


In [2]:
market_features["peer_group"] = (
    market_features["tier"].astype(str) + "_" +
    market_features["category"].astype(str)
)


In [3]:
def percentile_score(series, higher_is_better=True):
    pct = series.rank(pct=True)
    if not higher_is_better:
        pct = 1 - pct
    return (pct * 100).clip(0, 100)


In [4]:
market_features["vol_score"] = (
    market_features
    .groupby("peer_group")["avg_vol_30d"]
    .transform(lambda x: percentile_score(x, higher_is_better=False))
)


In [5]:
market_features["drawdown_score"] = (
    market_features
    .groupby("peer_group")["max_drawdown_365d"]
    .transform(lambda x: percentile_score(x, higher_is_better=True))
)


In [6]:
market_features["liquidity_score"] = (
    market_features
    .groupby("peer_group")["avg_vol_mcap_ratio"]
    .transform(lambda x: percentile_score(x, higher_is_better=True))
)

# absolute liquidity floor
market_features.loc[
    market_features["avg_vol_mcap_ratio"] < 0.002,
    "liquidity_score"
] *= 0.5


In [7]:
market_features["trend_stability_score"] = (
    100 - (market_features["momentum_7v30"].abs() * 100)
).clip(0, 100)


In [8]:
market_features["market_health_raw"] = (
    0.30 * market_features["vol_score"] +
    0.30 * market_features["drawdown_score"] +
    0.25 * market_features["liquidity_score"] +
    0.15 * market_features["trend_stability_score"]
)


In [9]:
market_features["penalty"] = 0

market_features.loc[market_features["flag_zero_market_cap"], "penalty"] -= 30
market_features.loc[market_features["flag_zero_supply"], "penalty"] -= 30
market_features.loc[market_features["flag_missing_fdv"], "penalty"] -= 10

market_features["market_health_score"] = (
    market_features["market_health_raw"] +
    market_features["penalty"]
).clip(0, 100)


In [10]:
market_features.to_csv(
    "data/processed/market_health_scores.csv",
    index=False
)

market_features[
    ["token_name","tier","category","market_health_score"]
].sort_values("market_health_score", ascending=False)


Unnamed: 0,token_name,tier,category,market_health_score
13,Shiba Inu,C,Meme,72.152704
11,Basic Attention Token,C,Web3 Utility,69.944139
4,Curve DAO,B,DEX,69.848332
1,Chainlink,A,Infrastructure & Middleware,69.794469
0,Uniswap,A,DEX,69.729767
16,1inch,B,DEX aggregator,69.725815
10,Yearn Finance,C,Yield Strategies,69.711041
6,Compound,B,Lending & Borrowing,69.599738
5,Synthetix,B,Derivatives,69.592054
8,Ethereum Name Service,B,Infrastructure & Middleware,69.518125
