# Exploring Data Available for Cryptocurrencies
- Main purpose: usage in ML model.

In [8]:
from pycoingecko import CoinGeckoAPI
import pandas as pd
import time

cg = CoinGeckoAPI()

# --- 1. Select a coin (use id, e.g., 'bitcoin', 'ethereum') ---
coin_id =  'batcat'

# --- 2. Get coin data ---
coin_data = cg.get_coin_by_id(id=coin_id, localization=False)

# Market & Supply data
market_data = coin_data.get('market_data', {})
supply_data = {
    "circulating_supply": market_data.get("circulating_supply"),
    "total_supply": market_data.get("total_supply"),
    "max_supply": market_data.get("max_supply"),
    "market_cap_usd": market_data.get("market_cap", {}).get("usd"),
    "volume_24h_usd": market_data.get("total_volume", {}).get("usd"),
    "liquidity_score": coin_data.get("liquidity_score"),
}

# Developer data
developer_data = coin_data.get("developer_data", {})

# Community data
community_data = coin_data.get("community_data", {})

# --- 3. Trending coins ---
trending = cg.get_search_trending()
trending_coins = [item['item']['id'] for item in trending['coins']]

# --- 4. Categories (e.g., AI, DeFi, Gaming) ---
categories = cg.get_coins_categories()
category_df = pd.DataFrame(categories)[['id', 'name', 'market_cap', 'market_cap_change_24h']]

# --- 5. Historical data (market cap & volume) for last 7 days ---
historical = cg.get_coin_market_chart_by_id(id=coin_id, vs_currency='usd', days=7)
market_caps = historical.get('market_caps', [])
volumes = historical.get('total_volumes', [])

# Format historical market cap and volume
historical_df = pd.DataFrame({
    'timestamp': [pd.to_datetime(mc[0], unit='ms') for mc in market_caps],
    'market_cap': [mc[1] for mc in market_caps],
    'volume': [vol[1] for vol in volumes]
})

# --- 6. Display / Save Results ---
print("Supply & Market Cap:")
print(pd.Series(supply_data))

print("\nDeveloper Data:")
print(pd.Series(developer_data))

print("\nCommunity Data:")
print(pd.Series(community_data))

print("\nTrending Coins:")
print(trending_coins)

print("\nTop Categories:")
print(category_df.head())

print("\nHistorical Market Cap & Volume (Last 7 days):")
print(historical_df.tail())


Supply & Market Cap:
circulating_supply    9.999867e+08
total_supply          9.999867e+08
max_supply            9.999867e+08
market_cap_usd        6.028200e+04
volume_24h_usd        1.089400e+02
liquidity_score                NaN
dtype: float64

Developer Data:
forks                                                                       0
stars                                                                       0
subscribers                                                                 0
total_issues                                                                0
closed_issues                                                               0
pull_requests_merged                                                        0
pull_request_contributors                                                   0
code_additions_deletions_4_weeks       {'additions': None, 'deletions': None}
commit_count_4_weeks                                                        0
last_4_weeks_commit_activity_series

In [15]:
from pycoingecko import CoinGeckoAPI
import pandas as pd

cg = CoinGeckoAPI()

# Fetch all coins with id, symbol, name
coin_list = cg.get_coins_list()

# Convert to DataFrame for easier search
coin_df = pd.DataFrame(coin_list)

# Normalize symbols to uppercase
coin_df['symbol'] = coin_df['symbol'].str.upper()

# Example: Find coin_id for a symbol like 'ETH'
def get_coin_id_from_symbol(symbol: str):
    matches = coin_df[coin_df['symbol'] == symbol.upper()]
    if matches.empty:
        return None
    elif len(matches) == 1:
        return matches.iloc[0]['id']
    else:
        return matches[['id', 'name']]  # Multiple coins may share a symbol

# Test example
symbol = 'XRP'
coin_id = get_coin_id_from_symbol(symbol)
print(f"Coin ID for {symbol}:")
coin_id


Coin ID for XRP:


Unnamed: 0,id,name
2101,binance-peg-xrp,Binance-Peg XRP
7435,harrypotterobamapacman8inu,HarryPotterObamaPacMan8Inu
13596,ripple,XRP
17305,warioxrpdumbledoreyugioh69inu,WarioXRPDumbledoreYugioh69Inu


In [11]:
cg = CoinGeckoAPI()

# Load full list once
coin_list = cg.get_coins_list()
coin_df = pd.DataFrame(coin_list)
coin_df['symbol'] = coin_df['symbol'].str.upper()

def get_best_coin_id(symbol: str):
    symbol = symbol.upper()
    matches = coin_df[coin_df['symbol'] == symbol]

    if matches.empty:
        return None

    # 1. Prefer exact match on name like 'Ethereum' for ETH
    preferred_names = {
        'ETH': 'Ethereum',
        'BTC': 'Bitcoin',
        'BNB': 'BNB',
        'SOL': 'Solana',
        'ADA': 'Cardano',
        # Add more if needed
    }

    if symbol in preferred_names:
        exact_match = matches[matches['name'] == preferred_names[symbol]]
        if not exact_match.empty:
            return exact_match.iloc[0]['id']

    # 2. Fallback: return the most popular-looking match (e.g., shortest ID)
    matches_sorted = matches.sort_values(by='id', key=lambda x: x.str.len())
    return matches_sorted.iloc[0]['id']

# Example
symbol = 'BNB'
coin_id = get_best_coin_id(symbol)
print(f"Best CoinGecko ID for symbol {symbol}: {coin_id}")

Best CoinGecko ID for symbol BNB: binancecoin
