In [1]:
import pandas as pd
import numpy as np

pd.set_option("display.max_columns", 50)


In [2]:
market_history = pd.read_csv("data/processed/market_history_daily.csv")
market_snapshot = pd.read_csv("data/processed/market_snapshot.csv")

market_history.head()


Unnamed: 0,coingecko_id,date,price_usd,market_cap_usd,volume_usd,token_name,symbol,tier,category,chain
0,uniswap,2025-01-20,13.275081,7997656000.0,667029700.0,Uniswap,UNI,A,DEX,ethereum
1,uniswap,2025-01-21,13.469417,8100521000.0,712594200.0,Uniswap,UNI,A,DEX,ethereum
2,uniswap,2025-01-22,13.487985,8099307000.0,472816700.0,Uniswap,UNI,A,DEX,ethereum
3,uniswap,2025-01-23,12.86683,7729917000.0,314210000.0,Uniswap,UNI,A,DEX,ethereum
4,uniswap,2025-01-24,12.797972,7688804000.0,364061100.0,Uniswap,UNI,A,DEX,ethereum


In [3]:
market_history = market_history.sort_values(
    ["coingecko_id", "date"]
).reset_index(drop=True)

market_history.groupby("coingecko_id")["date"].count().describe()


count     20.0
mean     365.0
std        0.0
min      365.0
25%      365.0
50%      365.0
75%      365.0
max      365.0
Name: date, dtype: float64

In [4]:
market_history["log_return"] = (
    np.log(market_history["price_usd"]) -
    np.log(market_history.groupby("coingecko_id")["price_usd"].shift(1))
)

market_history.head()


Unnamed: 0,coingecko_id,date,price_usd,market_cap_usd,volume_usd,token_name,symbol,tier,category,chain,log_return
0,1inch,2025-01-20,0.335552,471465600.0,70494680.0,1inch,1INCH,B,DEX aggregator,ethereum,
1,1inch,2025-01-21,0.338748,474226200.0,84411490.0,1inch,1INCH,B,DEX aggregator,ethereum,0.009478
2,1inch,2025-01-22,0.348113,485943400.0,44953530.0,1inch,1INCH,B,DEX aggregator,ethereum,0.027271
3,1inch,2025-01-23,0.327064,457542100.0,35788830.0,1inch,1INCH,B,DEX aggregator,ethereum,-0.062372
4,1inch,2025-01-24,0.317425,443903400.0,43789340.0,1inch,1INCH,B,DEX aggregator,ethereum,-0.029913


In [5]:
market_history["vol_7d"] = (
    market_history
    .groupby("coingecko_id")["log_return"]
    .rolling(window=7)
    .std()
    .reset_index(level=0, drop=True)
)

market_history["vol_30d"] = (
    market_history
    .groupby("coingecko_id")["log_return"]
    .rolling(window=30)
    .std()
    .reset_index(level=0, drop=True)
)

market_history[["coingecko_id","date","vol_7d","vol_30d"]].tail()


Unnamed: 0,coingecko_id,date,vol_7d,vol_30d
7295,yearn-finance,2026-01-15,0.034176,0.031295
7296,yearn-finance,2026-01-16,0.033807,0.031868
7297,yearn-finance,2026-01-17,0.033489,0.029005
7298,yearn-finance,2026-01-18,0.037725,0.030271
7299,yearn-finance,2026-01-19,0.045332,0.029771


In [6]:
market_history["cum_max_price"] = (
    market_history.groupby("coingecko_id")["price_usd"].cummax()
)

market_history["drawdown"] = (
    market_history["price_usd"] / market_history["cum_max_price"] - 1
)


In [7]:
drawdown_features = (
    market_history
    .groupby("coingecko_id")["drawdown"]
    .min()
    .reset_index()
    .rename(columns={"drawdown":"max_drawdown_365d"})
)

drawdown_features.head()


Unnamed: 0,coingecko_id,max_drawdown_365d
0,1inch,-0.599608
1,aave,-0.662779
2,balancer,-0.801773
3,basic-attention-token,-0.530917
4,celsius-degree-token,-0.90279


In [8]:
market_history["vol_mcap_ratio"] = (
    market_history["volume_usd"] / market_history["market_cap_usd"]
)

liquidity_features = (
    market_history
    .groupby("coingecko_id")["vol_mcap_ratio"]
    .mean()
    .reset_index()
    .rename(columns={"vol_mcap_ratio":"avg_vol_mcap_ratio"})
)

liquidity_features.head()


Unnamed: 0,coingecko_id,avg_vol_mcap_ratio
0,1inch,0.088042
1,aave,0.119424
2,balancer,0.107828
3,basic-attention-token,0.098813
4,celsius-degree-token,0.110283


In [9]:
market_history["price_ma_7d"] = (
    market_history.groupby("coingecko_id")["price_usd"]
    .rolling(7)
    .mean()
    .reset_index(level=0, drop=True)
)

market_history["price_ma_30d"] = (
    market_history.groupby("coingecko_id")["price_usd"]
    .rolling(30)
    .mean()
    .reset_index(level=0, drop=True)
)

market_history["momentum_7v30"] = (
    market_history["price_ma_7d"] /
    market_history["price_ma_30d"] - 1
)


In [10]:
momentum_features = (
    market_history
    .groupby("coingecko_id")["momentum_7v30"]
    .mean()
    .reset_index()
)


In [11]:
vol_features = (
    market_history
    .groupby("coingecko_id")
    .agg(
        avg_vol_7d=("vol_7d","mean"),
        avg_vol_30d=("vol_30d","mean")
    )
    .reset_index()
)

vol_features.head()


Unnamed: 0,coingecko_id,avg_vol_7d,avg_vol_30d
0,1inch,0.043285,0.047403
1,aave,0.046889,0.049209
2,balancer,0.040612,0.044372
3,basic-attention-token,0.044841,0.04777
4,celsius-degree-token,0.066352,0.072337


In [12]:
market_features = (
    market_snapshot
    .merge(vol_features, on="coingecko_id", how="left")
    .merge(drawdown_features, on="coingecko_id", how="left")
    .merge(liquidity_features, on="coingecko_id", how="left")
    .merge(momentum_features, on="coingecko_id", how="left")
)

market_features.head()


Unnamed: 0,coingecko_id,pulled_at_utc,current_price_usd,market_cap_usd,volume_24h_usd,circulating_supply,max_supply,fdv_usd,token_name_x,symbol_x,tier_x,category_x,chain_x,token_name_y,symbol_y,tier_y,category_y,chain_y,token_name,symbol,tier,category,chain,avg_vol_7d,avg_vol_30d,max_drawdown_365d,avg_vol_mcap_ratio,momentum_7v30
0,uniswap,2026-01-19T12:34:30.906620+00:00,4.98,3160631000.0,382896158,634610400.0,1000000000.0,4480843112,Uniswap,UNI,A,DEX,ethereum,Uniswap,UNI,A,DEX,ethereum,Uniswap,UNI,A,DEX,ethereum,0.050157,0.055325,-0.646855,0.086598,-0.018016
1,chainlink,2026-01-19T12:34:45.019802+00:00,12.75,9024027000.0,648920230,708100000.0,1000000000.0,12744001030,Chainlink,LINK,A,Infrastructure & Middleware,ethereum,Chainlink,LINK,A,Infrastructure & Middleware,ethereum,Chainlink,LINK,A,Infrastructure & Middleware,ethereum,0.043385,0.04623,-0.590724,0.065049,-0.013702
2,aave,2026-01-19T12:34:47.605564+00:00,163.46,2481823000.0,375268933,15183570.0,16000000.0,2615272322,Aave,AAVE,A,Lending & Borrowing,ethereum,Aave,AAVE,A,Lending & Borrowing,ethereum,Aave,AAVE,A,Lending & Borrowing,ethereum,0.046889,0.049209,-0.662779,0.119424,-0.015125
3,lido-dao,2026-01-19T12:35:14.316675+00:00,0.546995,462970900.0,56720533,846566800.0,1000000000.0,546880520,Lido Dao,LDO,A,Liquid staking,ethereum,Lido Dao,LDO,A,Liquid staking,ethereum,Lido Dao,LDO,A,Liquid staking,ethereum,0.054956,0.059079,-0.780226,0.131942,-0.033136
4,curve-dao-token,2026-01-19T12:35:17.069355+00:00,0.390634,573741000.0,74930533,1468781000.0,3030303000.0,915416077,Curve DAO,CRV,B,DEX,ethereum,Curve DAO,CRV,B,DEX,ethereum,Curve DAO,CRV,B,DEX,ethereum,0.053969,0.056678,-0.692343,0.202697,-0.010111


In [13]:
market_features["flag_zero_market_cap"] = market_features["market_cap_usd"] <= 0
market_features["flag_zero_supply"] = market_features["circulating_supply"] <= 0
market_features["flag_missing_fdv"] = market_features["fdv_usd"].isna()


In [14]:
market_features.to_csv("data/processed/market_features.csv", index=False)
market_features.shape


(20, 31)