In [1]:
import pandas as pd
import requests
import os
import shutil
import time

#### **Notice**  
1. For data of volume, **Bitcon** data is incomplete, so we chose to eliminate the Bitcon in the volume section, and chose to plot the volume graph in the next place in the tvl order sort, i.e., the **Polygon**.  
2. For the chainname obtained through the tvl sort is not completely correct, in the api of the fee and volume there are some chains can not get the data, we are here to eliminate these data.  
3. We need data of 365*2+30 days (of tvl and price) to calculate the z-score to recognize abnormal values. We only plot data of 120 days.

In [2]:
# you can get the chain list straihgt from the API
url = 'https://api.llama.fi/v2/chains'
response = requests.get(url)
if response.status_code == 200: # data was fetched successfully
    data = response.json()
    df = pd.DataFrame(data)
else:
    raise Exception('Error fetching data from DeFiLlama API')

dex_all_chains_url = "https://api.llama.fi/overview/dexs?excludeTotalDataChart=true&excludeTotalDataChartBreakdown=true&dataType=dailyVolume"
dex_all_chains_response = requests.get(dex_all_chains_url)

dex_all_chains_data = dex_all_chains_response.json()
dex_all_chains = set(dex_all_chains_data.get('allChains', []))


fees_all_chains_url = "https://api.llama.fi/overview/fees?excludeTotalDataChart=true&excludeTotalDataChartBreakdown=true&dataType=dailyFees"
fees_all_chains_response = requests.get(fees_all_chains_url)

fees_all_chains_data = fees_all_chains_response.json()
fees_all_chains = set(fees_all_chains_data.get('allChains', []))

common_chains = dex_all_chains.intersection(fees_all_chains) # get the common chains between the two sets

df = df[['name', 'tvl']]
df = df[df['name'].isin(common_chains)]

# Sort the data to get the top 10 chains by TVL
df = df.sort_values(by='tvl', ascending=False).head(11)
print(df)

# build the folder structure, update data
base_folder = './data'
fee_folder = os.path.join(base_folder, 'fee')
tvl_folder = os.path.join(base_folder, 'tvl')
volume_folder = os.path.join(base_folder, 'volume')
price_folder = os.path.join(base_folder, 'price')

for folder in [fee_folder, tvl_folder, volume_folder, price_folder]:
    if os.path.exists(folder):
        shutil.rmtree(folder)  # delete the folder and its contents
    os.makedirs(folder)

         name           tvl
16   Ethereum  5.698114e+10
49     Solana  1.008245e+10
56    Bitcoin  6.867289e+09
94       Tron  5.649744e+09
10        BSC  4.981577e+09
17       Base  2.975321e+09
19   Arbitrum  2.633880e+09
97        Sui  1.474867e+09
14  Avalanche  1.247957e+09
22       CORE  8.917046e+08
61      Aptos  8.571662e+08


In [3]:
chain_order = df['name'].tolist()
chain_index = {chain: str(index + 1).zfill(2) for index, chain in enumerate(chain_order)}
# give chainname an index so we can plot in order


def fetch_historical_data(chain_name, metric):
    if metric == 'tvl':
        url = f'https://api.llama.fi/v2/historicalChainTvl/{chain_name}'
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json()
            df = pd.DataFrame(data)  # Directly create DataFrame from the list of dictionaries
            df['date'] = pd.to_datetime(df['date'], unit='s')
            csv_filename = os.path.join(tvl_folder, f"{chain_index[chain_name]}_{chain_name}_{metric}.csv")
            df = df[['date', metric]].tail(365*2+30)
            df.to_csv(csv_filename, index=False)
            print(f"Saved {metric} data for {chain_name} to {csv_filename}")
        else:
            raise Exception(f'Error fetching historical {metric} data for {chain_name}')
        
    elif metric == 'volume':
        url = (f'https://api.llama.fi/overview/dexs/{chain_name}'
                  '?excludeTotalDataChart=false'
                  '&excludeTotalDataChartBreakdown=true'
                  '&dataType=dailyVolume')
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json()
            df = pd.DataFrame(data['totalDataChart'], columns=['timestamp', metric])   
            df['date'] = pd.to_datetime(df['timestamp'], unit='s')
            df = df.sort_values(by='date')
            df = df[['date', metric]].tail(365*2+30)
            csv_filename = os.path.join(volume_folder, f"{chain_index[chain_name]}_{chain_name}_{metric}.csv")
            df.to_csv(csv_filename, index=False)
            print(f"Saved {metric} data for {chain_name} to {csv_filename}")
        else:
            raise Exception(f'Error fetching historical {metric} data for {chain_name}')
        
    elif metric == 'fee':
        url = (f'https://api.llama.fi/overview/fees/{chain_name}'
                  '?excludeTotalDataChart=false'
                  '&excludeTotalDataChartBreakdown=true'
                  '&dataType=dailyFees')
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json()
            df = pd.DataFrame(data['totalDataChart'], columns=['timestamp', metric])   
            df['date'] = pd.to_datetime(df['timestamp'], unit='s')
            df = df.sort_values(by='date')
            df = df[['date', metric]].tail(365*2+30)
            csv_filename = os.path.join(fee_folder, f"{chain_index[chain_name]}_{chain_name}_{metric}.csv")
            df.to_csv(csv_filename, index=False)
            print(f"Saved {metric} data for {chain_name} to {csv_filename}")
        else:
            raise Exception(f'Error fetching historical {metric} data for {chain_name}')
    else:
        raise ValueError('Invalid metric specified')

In [4]:
import sys
import os
import getpass
# find a way to use relative path, so that the function in dynamoUtil can be run in any machine
src_path = r'C:\Users\YuweiCao\Documents\GitHub\char-python-common\src'

if src_path not in sys.path:
    sys.path.append(src_path)

USER = getpass.getuser()
REPO_PATH = r'C:\Users\YuweiCao\Documents\GitHub\char-python-common'

sys.path.append(REPO_PATH)
sys.path.append(REPO_PATH + '/src')

from dynamoUtil import price_bar_query
from datetime import datetime, timedelta

mapping = {
    'Ethereum': 'ETHUSDT-OKX-00000000-FUT',
    'Tron': 'TRXUSDT-OKX-00000000-FUT',
    'Solana': 'SOLUSDT-OKX-00000000-FUT',
    'BSC': 'BNBUSDT-OKX-00000000-FUT',
    'Base': 'BASEUSDT-OKX-00000000-FUT',
    'Bitcoin': 'BTCUSDT-OKX-00000000-FUT',
    'Arbitrum': 'ARBUSDT-OKX-00000000-FUT',
    'Avalanche': 'AVAXUSDT-OKX-00000000-FUT',
    'Sui': 'SUIUSDT-OKX-00000000-FUT',
    'Aptos': 'APTUSDT-OKX-00000000-FUT',
    'Polygon': 'MATICUSDT-OKX-00000000-FUT'
}

# give df the mapping of the code
'''
    直接用map list对应不知道会不会有问题
'''

def fetch_price_data(chain_name):
    today = datetime.now().replace(hour=23, minute=0, second=0, microsecond=0)
    start_date = today - timedelta(days=365*2+30)

    today_str = today.strftime('%Y-%m-%d-%H-%M')
    start_date_str = start_date.strftime('%Y-%m-%d-%H-%M')
    
    instrument_code = mapping.get(chain_name)

    price_data = price_bar_query(instrument_code + '|1440', start_date_str, today_str, 'live')
    price_df = pd.DataFrame(price_data)
    print(f"Columns in DataFrame for {chain_name}: {price_df.columns}") 
    print(price_df.head())
    
    if price_df.empty:
        print(f"No valid data found for {chain_name}. Skipping.")
        return

    if 'timestamp' not in price_df.columns or 'close' not in price_df.columns:
        print(f"Required columns 'timestamp' or 'close' not found in the data for {chain_name}. Skipping.")
        return

    selected_columns = price_df[['timestamp', 'close']].copy()

    selected_columns['timestamp'] = pd.to_datetime(selected_columns['timestamp']).dt.date

    selected_columns.rename(columns={'timestamp': 'date', 'close': 'price'}, inplace=True)

    csv_filename = os.path.join(price_folder, f"{chain_index[chain_name]}_{chain_name}_price.csv")
    # csv_filename = os.path.join(price_folder_USD, f"{chain_index[chain_name]}_{chain_name}_price_USD.csv")
    selected_columns.to_csv(csv_filename, index=False)
    print(f"Saved price data for {chain_name} to {csv_filename}")


1.For tvl we store all 11 of them.  
2.For volume we skip Bitcoin, it's incomplete.  
3.For fee we store all 11 of them.
4.For price we skip Base(doesn't have coin), store POL and TRX using api.

In [5]:
for chain_name in df['name']:
    for metric in ['tvl', 'fee']:
        fetch_historical_data(chain_name, metric)
        
for chain_name in df['name']:
    if chain_name.lower() == 'bitcoin':
        print(f"Skipping {chain_name} for {metric}")
        continue # Skip Bitcoin as its volume data is incomplete
    metric = 'volume'
    fetch_historical_data(chain_name, metric)
    
for chain_name in df['name']:
    fetch_price_data(chain_name)

Saved tvl data for Ethereum to ./data\tvl\01_Ethereum_tvl.csv
Saved fee data for Ethereum to ./data\fee\01_Ethereum_fee.csv
Saved tvl data for Solana to ./data\tvl\02_Solana_tvl.csv
Saved fee data for Solana to ./data\fee\02_Solana_fee.csv
Saved tvl data for Bitcoin to ./data\tvl\03_Bitcoin_tvl.csv
Saved fee data for Bitcoin to ./data\fee\03_Bitcoin_fee.csv
Saved tvl data for Tron to ./data\tvl\04_Tron_tvl.csv
Saved fee data for Tron to ./data\fee\04_Tron_fee.csv
Saved tvl data for BSC to ./data\tvl\05_BSC_tvl.csv
Saved fee data for BSC to ./data\fee\05_BSC_fee.csv
Saved tvl data for Base to ./data\tvl\06_Base_tvl.csv
Saved fee data for Base to ./data\fee\06_Base_fee.csv
Saved tvl data for Arbitrum to ./data\tvl\07_Arbitrum_tvl.csv
Saved fee data for Arbitrum to ./data\fee\07_Arbitrum_fee.csv
Saved tvl data for Sui to ./data\tvl\08_Sui_tvl.csv
Saved fee data for Sui to ./data\fee\08_Sui_fee.csv
Saved tvl data for Avalanche to ./data\tvl\09_Avalanche_tvl.csv
Saved fee data for Avalanche

TypeError: unsupported operand type(s) for +: 'NoneType' and 'str'

In [6]:
base_url = "https://data-api.cryptocompare.com/futures/v1/historical/days"
params_trx = {
    'market': 'okex', 
    'instrument': 'TRX-USDT-VANILLA-PERPETUAL', 
    'groups': 'OHLC',  
    'limit': 365*2+30,  
    'aggregate': 1,  
    'fill': 'true',  
    'apply_mapping': 'true',  
    'api_key': 'YOUR_API_KEY'  
}

response_trx = requests.get(base_url, params=params_trx)

if response_trx.status_code == 200:
    data = response_trx.json()
    if 'Data' in data and isinstance(data['Data'], list):
        data_list = data['Data']

        extracted_data = [{'timestamp': entry['TIMESTAMP'], 'price': entry['CLOSE']}
                          for entry in data_list if 'TIMESTAMP' in entry and 'CLOSE' in entry]

        df = pd.DataFrame(extracted_data)

        df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s').dt.date

        df.rename(columns={'timestamp': 'date'}, inplace=True)

        if not df.empty:
            csv_filename = os.path.join(price_folder, "03_Tron_price.csv")
            df.tail(150).to_csv(csv_filename, index=False)
            print("Data saved to 03_Tron_price.csv")
        else:
            print("No data available for TRX-USDT on OKEx (OKX).")
    else:
        print(f"Unexpected data format: {data}")
else:
    print(f"Failed to fetch data. HTTP status code: {response_trx.status_code}")

Data saved to 03_Tron_price.csv


In [7]:
base_url = "https://data-api.cryptocompare.com/futures/v1/historical/days"
api_key = "YOUR_API_KEY"

today_timestamp = int(time.time()) 

params_1 = {
    "market": "okex",
    "instrument": "POL-USDT-VANILLA-PERPETUAL",
    "limit": 365*2+30,
    "aggregate": 1,
    "fill": "true",
    "apply_mapping": "true",
    "to_ts": today_timestamp, 
    "api_key": api_key
}

params_2 = {
    "market": "bitmex",
    "instrument": "MATIC-USDT-VANILLA-PERPETUAL",
    "limit": 365*2+30,
    "aggregate": 1,
    "fill": "true",
    "apply_mapping": "true",
    "to_ts": 1727062614,  
    "api_key": api_key
}

def fetch_data(params):
    response = requests.get(base_url, params=params)
    if response.status_code == 200:
        data = response.json()
        if 'Data' in data and isinstance(data['Data'], list):
            data_list = data['Data']

            if not data_list:
                print("No data returned.")
                return pd.DataFrame()

            print("Sample data entry:", data_list[0])  

            extracted_data = []
            for entry in data_list:
                if 'TIMESTAMP' in entry and 'CLOSE' in entry: 
                    extracted_data.append({
                        'timestamp': entry['TIMESTAMP'], 
                        'price': entry['CLOSE']
                    })
                else:
                    print("Missing expected fields in entry:", entry)

            if not extracted_data:
                print("No valid data extracted.")
                return pd.DataFrame()

            df = pd.DataFrame(extracted_data)

            df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s').dt.date

            df.rename(columns={'timestamp': 'date'}, inplace=True)

            return df
        else:
            print("Unexpected data format:", data)
            return pd.DataFrame()
    else:
        print(f"Failed to fetch data. HTTP status code: {response.status_code}")
        return pd.DataFrame()
    
data_1 = fetch_data(params_1)
data_2 = fetch_data(params_2)

if not data_1.empty and not data_2.empty:
    combined_data = pd.concat([data_1, data_2]).drop_duplicates(subset=["date"], keep="last") # combine and remove duplicates
    combined_data.sort_values(by="date", inplace=True)

    combined_data = combined_data.tail(365*2+30)
    csv_filename = os.path.join(price_folder, f"11_Polygon_price.csv")
    combined_data.to_csv(csv_filename, index=False)
    print("Data saved to 11_Polygon_price.csv")
else:
    print("Fail to fetch enough data.")

Sample data entry: {'UNIT': 'DAY', 'TIMESTAMP': 1727136000, 'TYPE': '914', 'MARKET': 'okex', 'INSTRUMENT': 'POL-USDT-SWAP', 'MAPPED_INSTRUMENT': 'POL-USDT-VANILLA-PERPETUAL', 'INDEX_UNDERLYING': 'POL', 'QUOTE_CURRENCY': 'USDT', 'SETTLEMENT_CURRENCY': 'USDT', 'CONTRACT_CURRENCY': 'POL', 'DENOMINATION_TYPE': 'VANILLA', 'INDEX_UNDERLYING_ID': 10343, 'QUOTE_CURRENCY_ID': 7, 'SETTLEMENT_CURRENCY_ID': 7, 'CONTRACT_CURRENCY_ID': 10343, 'TRANSFORM_FUNCTION': '', 'OPEN': 0.417, 'HIGH': 0.417, 'LOW': 0.3905, 'CLOSE': 0.4096, 'FIRST_TRADE_TIMESTAMP': 1727166616, 'LAST_TRADE_TIMESTAMP': 1727222160, 'FIRST_TRADE_PRICE': 0.417, 'HIGH_TRADE_PRICE': 0.417, 'HIGH_TRADE_TIMESTAMP': 1727166616, 'LOW_TRADE_PRICE': 0.3905, 'LOW_TRADE_TIMESTAMP': 1727166616, 'LAST_TRADE_PRICE': 0.4096, 'TOTAL_TRADES': 5847, 'TOTAL_TRADES_BUY': 3503, 'TOTAL_TRADES_SELL': 2344, 'TOTAL_TRADES_UNKNOWN': 0, 'NUMBER_OF_CONTRACTS': 363952, 'VOLUME': 3639520, 'QUOTE_VOLUME': 1479825.706, 'VOLUME_BUY': 1766230, 'QUOTE_VOLUME_BUY': 7