# Exploring Data Available for Cryptocurrencies
- Main purpose: usage in ML model.

In [1]:
from pycoingecko import CoinGeckoAPI
import pandas as pd
import time

In [None]:
coin_id =  'solana'
cg = CoinGeckoAPI()

coin_data = cg.get_coin_by_id(id=coin_id, localization=False)
market_data = coin_data.get('market_data', {})
supply_data = {
    "circulating_supply": market_data.get("circulating_supply"),
    "total_supply": market_data.get("total_supply"),
    "max_supply": market_data.get("max_supply"),
    "market_cap_usd": market_data.get("market_cap", {}).get("usd"),
    "volume_24h_usd": market_data.get("total_volume", {}).get("usd"),
    "liquidity_score": coin_data.get("liquidity_score"),
}
developer_data = coin_data.get("developer_data", {})
community_data = coin_data.get("community_data", {})
trending = cg.get_search_trending()
trending_coins = [item['item']['id'] for item in trending['coins']]
is_trending = coin_id in trending_coins
categories = cg.get_coins_categories()
category_df = pd.DataFrame(categories)[['id', 'name', 'market_cap', 'market_cap_change_24h']]
historical = cg.get_coin_market_chart_by_id(id=coin_id, vs_currency='usd', days=2)
market_caps = historical.get('market_caps', [])
volumes = historical.get('total_volumes', [])
historical_df = pd.DataFrame({
    'timestamp': [pd.to_datetime(mc[0], unit='ms') for mc in market_caps],
    'market_cap': [mc[1] for mc in market_caps],
    'volume': [vol[1] for vol in volumes]
})


In [None]:
# some statrs from historical?
# calc market_cap / circulating_supply
# last 24h slopes, last 12 h, last 6 and 3

In [None]:
print("Supply & Market Cap:")
print(pd.Series(supply_data))

Supply & Market Cap:
circulating_supply    5.430340e+08
total_supply          6.100351e+08
max_supply                     NaN
market_cap_usd        1.315627e+11
volume_24h_usd        8.677208e+09
liquidity_score                NaN
dtype: float64


In [4]:
print("\nDeveloper Data:")
print(pd.Series(developer_data))


Developer Data:
forks                                                                      3516
stars                                                                     11071
subscribers                                                                 276
total_issues                                                               5177
closed_issues                                                              4611
pull_requests_merged                                                      23614
pull_request_contributors                                                   411
code_additions_deletions_4_weeks       {'additions': 10193, 'deletions': -5277}
commit_count_4_weeks                                                        171
last_4_weeks_commit_activity_series                                          []
dtype: object


In [5]:
print("\nCommunity Data:")
print(pd.Series(community_data))


Community Data:
facebook_likes                     NaN
reddit_average_posts_48h           0.0
reddit_average_comments_48h        0.0
reddit_subscribers                 0.0
reddit_accounts_active_48h         0.0
telegram_channel_user_count    68053.0
dtype: float64


In [6]:
print("\nTrending Coins:")
print(trending_coins)
print(f"Is '{coin_id}' trending? {'yes' if is_trending else 'no'}")


Trending Coins:
['aster-2', 'undeads-games', 'linea', 'trust-wallet-token', 'world-liberty-financial', 'pump-fun', 'stbl', 'avantis', 'syndicate-3', 'hyperliquid', 'lombard-protocol', 'bitcoin', 'avalanche-2', 'ethena', 'solana']
Is 'solana' trending? yes


In [7]:
print("\nTop Categories:")
category_df.head()


Top Categories:


Unnamed: 0,id,name,market_cap,market_cap_change_24h
0,smart-contract-platform,Smart Contract Platform,3461136000000.0,-0.351704
1,layer-1,Layer 1 (L1),3365626000000.0,-0.325658
2,proof-of-work-pow,Proof of Work (PoW),2398632000000.0,-0.11498
3,proof-of-stake-pos,Proof of Stake (PoS),969918300000.0,-0.653873
4,world-liberty-financial-portfolio,World Liberty Financial Portfolio,895949800000.0,-0.503301


In [10]:
# get category data for coin
coin_category = coin_data.get('categories', [])
coin_category

['Smart Contract Platform',
 'Solana Ecosystem',
 'Layer 1 (L1)',
 'Alleged SEC Securities',
 'FTX Holdings',
 'Multicoin Capital Portfolio',
 'Proof of Stake (PoS)',
 'Alameda Research Portfolio',
 'Andreessen Horowitz (a16z) Portfolio',
 'GMCI Layer 1 Index',
 'GMCI 30 Index',
 'Delphi Ventures Portfolio',
 'GMCI Index',
 'Polychain Capital Portfolio',
 'Made in USA',
 'Coinbase 50 Index']

In [12]:
coin_data.keys()

dict_keys(['id', 'symbol', 'name', 'web_slug', 'asset_platform_id', 'platforms', 'detail_platforms', 'block_time_in_minutes', 'hashing_algorithm', 'categories', 'preview_listing', 'public_notice', 'additional_notices', 'description', 'links', 'image', 'country_origin', 'genesis_date', 'sentiment_votes_up_percentage', 'sentiment_votes_down_percentage', 'ico_data', 'watchlist_portfolio_users', 'market_cap_rank', 'market_data', 'community_data', 'developer_data', 'status_updates', 'last_updated', 'tickers'])

In [11]:
coin_data

{'id': 'solana',
 'symbol': 'sol',
 'name': 'Solana',
 'web_slug': 'solana',
 'asset_platform_id': None,
 'platforms': {'': ''},
 'detail_platforms': {'': {'decimal_place': None, 'contract_address': ''}},
 'block_time_in_minutes': 0,
 'hashing_algorithm': None,
 'categories': ['Smart Contract Platform',
  'Solana Ecosystem',
  'Layer 1 (L1)',
  'Alleged SEC Securities',
  'FTX Holdings',
  'Multicoin Capital Portfolio',
  'Proof of Stake (PoS)',
  'Alameda Research Portfolio',
  'Andreessen Horowitz (a16z) Portfolio',
  'GMCI Layer 1 Index',
  'GMCI 30 Index',
  'Delphi Ventures Portfolio',
  'GMCI Index',
  'Polychain Capital Portfolio',
  'Made in USA',
  'Coinbase 50 Index'],
 'preview_listing': False,
 'public_notice': None,
 'additional_notices': [],
 'description': {'en': 'Solana is a highly functional open source project that banks on blockchain technology’s permissionless nature to provide decentralized finance (DeFi) solutions. It is a layer 1 network that offers fast speeds a

In [8]:
print("\nHistorical Market Cap & Volume (Last 7 days):")
historical_df.tail(10)


Historical Market Cap & Volume (Last 7 days):


Unnamed: 0,timestamp,market_cap,volume
39,2025-09-19 05:02:52.049,133087000000.0,9198181000.0
40,2025-09-19 06:02:35.456,133300700000.0,9129152000.0
41,2025-09-19 07:05:27.358,132482900000.0,9033182000.0
42,2025-09-19 08:02:09.693,132649000000.0,9091177000.0
43,2025-09-19 09:01:45.625,132356500000.0,8822270000.0
44,2025-09-19 10:02:50.308,131781000000.0,8952154000.0
45,2025-09-19 11:02:40.009,131683100000.0,8821991000.0
46,2025-09-19 12:01:55.593,130811700000.0,9083403000.0
47,2025-09-19 13:01:36.488,130841200000.0,8772106000.0
48,2025-09-19 14:15:19.000,131562700000.0,8678167000.0


In [None]:
def get_daily_vwap_last_7_days(coin_id: str, vs_currency='usd'):
    cg = CoinGeckoAPI()

    # Step 1: Fetch hourly price and volume data for 7 days
    data = cg.get_coin_market_chart_by_id(id=coin_id, vs_currency=vs_currency, days=7)
  
    # Step 2: Convert to DataFrame
    prices_df = pd.DataFrame(data['prices'], columns=['timestamp', 'price'])
    volumes_df = pd.DataFrame(data['total_volumes'], columns=['timestamp', 'volume'])

    # Step 3: Merge and process
    df = prices_df.copy()
    df['volume'] = volumes_df['volume']
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
    df['date'] = df['timestamp'].dt.date

    # Step 4: Calculate VWAP per day
    df['price_volume'] = df['price'] * df['volume']
    daily_vwap = df.groupby('date').agg({
        'price_volume': 'sum',
        'volume': 'sum'
    })
    daily_vwap['vwap'] = daily_vwap['price_volume'] / daily_vwap['volume']
    daily_vwap = daily_vwap#.reset_index()[['date', 'vwap']]

    return daily_vwap


vwap_df = get_daily_vwap_last_7_days(coin_id)
print("\nVWAP calculation details (last 5 rows):")
print(vwap_df.tail())# maybe can use data values of last row, and also calc slopes over last 3, 7 days


VWAP calculation details (last 5 rows):
            price_volume        volume        vwap
date                                              
2025-09-10  4.738711e+13  2.144138e+11  221.007784
2025-09-11  5.103335e+13  2.264077e+11  225.404645
2025-09-12  7.286384e+13  3.060103e+11  238.109114
2025-09-13  5.953886e+13  2.464469e+11  241.588972
2025-09-14  5.078906e+13  2.070294e+11  245.322893


## Get Coin Name

In [11]:
cg = CoinGeckoAPI()

# Load full list once
coin_list = cg.get_coins_list()
coin_df = pd.DataFrame(coin_list)
coin_df['symbol'] = coin_df['symbol'].str.upper()

def get_best_coin_id(symbol: str):
    symbol = symbol.upper()
    matches = coin_df[coin_df['symbol'] == symbol]

    if matches.empty:
        return None

    # 1. Prefer exact match on name like 'Ethereum' for ETH
    preferred_names = {
        'ETH': 'Ethereum',
        'BTC': 'Bitcoin',
        'BNB': 'BNB',
        'SOL': 'Solana',
        'ADA': 'Cardano',
        # Add more if needed
    }

    if symbol in preferred_names:
        exact_match = matches[matches['name'] == preferred_names[symbol]]
        if not exact_match.empty:
            return exact_match.iloc[0]['id']

    # 2. Fallback: return the most popular-looking match (e.g., shortest ID)
    matches_sorted = matches.sort_values(by='id', key=lambda x: x.str.len())
    return matches_sorted.iloc[0]['id']

# Example
symbol = 'BNB'
coin_id = get_best_coin_id(symbol)
print(f"Best CoinGecko ID for symbol {symbol}: {coin_id}")

Best CoinGecko ID for symbol BNB: binancecoin
