In [27]:
# https://www.relataly.com/crypto-market-cluster-analysis-using-affinity-propagation-python/8114/

In [28]:
# ! pip install cryptocmd
# ! pip install seaborn

In [2]:
from cryptocmd import CmcScraper
import pandas as pd 
import matplotlib.pyplot as plt 
import numpy as np 
import seaborn as sns
from sklearn import cluster, covariance, manifold
import requests
import json
import warnings
warnings.filterwarnings('ignore')

In [11]:
with open('./config.json', 'r') as json_file:
    config = json.load(json_file)

NUMBER_OF_CRYPTOCURRENCIES = config['number_of_cryptocurrencies']
SYMBOLS_MINIMUM_AGE = config['symbols_minimum_age']  # days
TIMEOUT = config['timeout']  # seconds
UPDATE_BASIC_DATA = config['update_basic_data']
UPDATE_HISTORICAL_DATA = config['update_historical_data']
UPDATE_CHANGES_DATA = config['update_changes_data']
UPDATE_CLUSTERS = config['update_clusters']

config

{'number_of_cryptocurrencies': 500,
 'symbols_minimum_age': 730,
 'timeout': 10,
 'update_basic_data': False,
 'update_historical_data': False,
 'update_changes_data': False,
 'update_clusters': False}

# Loading Data 

In [31]:
def create_basic_data():

    # get basic data
    print('Fetching basic data...')
    url = f'https://api.coinmarketcap.com/data-api/v3/cryptocurrency/listing?start=1&limit={NUMBER_OF_CRYPTOCURRENCIES}&sortBy=market_cap&sortType=desc&convert=USD&cryptoType=all&tagType=all&audited=false'
    response = requests.get(url, timeout = TIMEOUT)
    data = json.loads(response.text)
    basic_data = pd.DataFrame(data['data']['cryptoCurrencyList'])

    # clean basic data
    basic_data = basic_data[
        (basic_data['isActive'] == 1) & 
        (basic_data['dateAdded'].apply(lambda x: int(x[:4])) < 2021) & 
        (basic_data['lastUpdated'].apply(lambda x: int(x[:4])) > 2022) &
        (basic_data['tags'].apply(lambda x: 'stablecoin' not in x)) &  # TODO: Add Tether
        (basic_data['marketPairCount'] > 5)
    ]

    # export basic data
    basic_data.reset_index(drop = True, inplace = True)
    basic_data.to_csv('./data/basic_data.csv')

    # return basic data
    print('Basic data fetched.')
    return basic_data


def import_basic_data():

    # import basic data
    basic_data = pd.read_csv('./data/basic_data.csv', index_col = 'Unnamed: 0')
    
    # return basic data
    return basic_data


if UPDATE_BASIC_DATA:
    basic_data = create_basic_data()
else:
    try:
        basic_data = import_basic_data()
    except FileNotFoundError:
        basic_data = create_basic_data()

basic_data

Unnamed: 0,id,name,symbol,slug,tags,cmcRank,marketPairCount,circulatingSupply,selfReportedCirculatingSupply,totalSupply,maxSupply,isActive,lastUpdated,dateAdded,quotes,isAudited,auditInfoList,platform
0,1,Bitcoin,BTC,bitcoin,"['mineable', 'pow', 'sha-256', 'store-of-value...",1,10208,1.937584e+07,0.0,1.937584e+07,2.100000e+07,1,2023-05-17T09:17:00.000Z,2010-07-13T00:00:00.000Z,"[{'name': 'USD', 'price': 26835.51406259315, '...",False,,
1,1027,Ethereum,ETH,ethereum,"['pos', 'smart-contracts', 'ethereum-ecosystem...",2,6899,1.202822e+08,0.0,1.202822e+08,,1,2023-05-17T09:17:00.000Z,2015-08-07T00:00:00.000Z,"[{'name': 'USD', 'price': 1804.9869200961405, ...",False,[],
2,1839,BNB,BNB,bnb,"['marketplace', 'centralized-exchange', 'payme...",4,1365,1.558593e+08,0.0,1.558593e+08,,1,2023-05-17T09:17:00.000Z,2017-07-25T00:00:00.000Z,"[{'name': 'USD', 'price': 309.7291352691476, '...",True,"[{'coinId': '1839', 'auditor': 'CertiK', 'audi...",
3,52,XRP,XRP,xrp,"['medium-of-exchange', 'enterprise-solutions',...",6,946,5.183782e+10,0.0,9.998897e+10,1.000000e+11,1,2023-05-17T09:17:00.000Z,2013-08-04T00:00:00.000Z,"[{'name': 'USD', 'price': 0.4428081863307909, ...",False,,
4,2010,Cardano,ADA,cardano,"['dpos', 'pos', 'platform', 'research', 'smart...",7,787,3.485252e+10,0.0,3.579395e+10,4.500000e+10,1,2023-05-17T09:17:00.000Z,2017-10-01T00:00:00.000Z,"[{'name': 'USD', 'price': 0.36627678844742534,...",False,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
310,693,Verge,XVG,verge,"['mineable', 'multiple-algorithms', 'medium-of...",490,58,1.651938e+10,0.0,1.651938e+10,1.655500e+10,1,2023-05-17T09:17:00.000Z,2014-10-25T00:00:00.000Z,"[{'name': 'USD', 'price': 0.001917374455564105...",False,,
311,7431,Akash Network,AKT,akash-network,"['cosmos-ecosystem', 'ai-big-data', 'distribut...",491,31,1.138613e+08,0.0,1.628636e+08,3.885390e+08,1,2023-05-17T09:17:00.000Z,2020-10-16T00:00:00.000Z,"[{'name': 'USD', 'price': 0.277834548952715, '...",False,,
312,5225,FC Barcelona Fan Token,BAR,fc-barcelona-fan-token,"['sports', 'fan-token', 'binance-launchpad', '...",494,28,9.405722e+06,9405722.0,4.000000e+07,4.000000e+07,1,2023-05-17T09:17:00.000Z,2020-06-24T00:00:00.000Z,"[{'name': 'USD', 'price': 3.293269209054734, '...",False,,"{'id': 8, 'name': 'Chiliz', 'symbol': 'CHZ', '..."
313,258,Groestlcoin,GRS,groestlcoin,"['mineable', 'pow', 'groestl', 'medium-of-exch...",497,29,8.256522e+07,0.0,8.256522e+07,1.050000e+08,1,2023-05-17T09:17:00.000Z,2014-04-11T00:00:00.000Z,"[{'name': 'USD', 'price': 0.3697839402865394, ...",False,,


In [32]:
def create_historical_data(basic_data):

    # create symbols list and historical data dataframe
    symbols = basic_data['symbol'].to_list()
    historical_data = pd.DataFrame()
    n = 0
    number_of_symbols = len(symbols)

    # loop in symbols and get historical data
    for symbol in symbols:

        if ',' in symbol:
            symbol = symbol.split(',')[0]

        n += 1
        print(f'Fetching historical data for {symbol} ({n}/{number_of_symbols})')
        scraper = CmcScraper(symbol)

        try:
            symbol_historical_data = scraper.get_dataframe()
        except:
            print(f'Error in fetching historical data for {symbol}')
            continue

        symbol_historical_data.columns = ['date', 'open', 'high', 'low', 'close', 'volume', 'marketcap']
        symbol_historical_data.insert(0, 'symbol', symbol)
        symbol_historical_data.insert(6, 'avg', (symbol_historical_data['open'] + symbol_historical_data['close']) / 2)
        symbol_historical_data.insert(7, 'change', (symbol_historical_data['close'] - symbol_historical_data['open']) / symbol_historical_data['open'])

        historical_data = pd.concat([historical_data, symbol_historical_data])

    # export historical data
    historical_data.reset_index(drop = True, inplace = True)
    historical_data.to_csv('./data/historical_data.csv')

    # return historical data
    print('Historical data fetched.')
    return historical_data


def import_historical_data():

    # import historical data
    historical_data = pd.read_csv('./data/historical_data.csv', index_col = 'Unnamed: 0')

    # return historical data
    return historical_data


if UPDATE_HISTORICAL_DATA:
    historical_data = create_historical_data(basic_data)
else:
    try:
        historical_data = import_historical_data()
    except FileNotFoundError:
        historical_data = create_historical_data(basic_data)

historical_data

Unnamed: 0,symbol,date,open,high,low,close,avg,change,volume,marketcap
0,BTC,2023-05-16,27171.514579,27299.304812,26878.947972,27036.650710,27104.082645,-0.004963,1.273224e+10,5.238464e+11
1,BTC,2023-05-15,26931.384632,27646.348119,26766.096916,27192.693221,27062.038927,0.009703,1.441323e+10,5.268443e+11
2,BTC,2023-05-14,26788.974292,27150.976792,26661.354904,26930.637846,26859.806069,0.005288,1.001486e+10,5.217422e+11
3,BTC,2023-05-13,26807.769044,27030.482960,26710.873803,26784.078561,26795.923803,-0.000884,9.999172e+09,5.188737e+11
4,BTC,2023-05-12,26987.662514,27055.647228,25878.428830,26804.990671,26896.326592,-0.006769,1.931360e+10,5.192540e+11
...,...,...,...,...,...,...,...,...,...,...
476776,ULT,2019-01-11,0.006864,0.006913,0.006369,0.006740,0.006802,-0.018056,2.291536e+04,1.610006e+06
476777,ULT,2019-01-10,0.007067,0.007104,0.006379,0.006881,0.006974,-0.026301,2.727608e+04,1.643767e+06
476778,ULT,2019-01-09,0.006843,0.007393,0.005951,0.007065,0.006954,0.032333,3.898662e+04,0.000000e+00
476779,ULT,2019-01-08,0.006206,0.006875,0.006206,0.006841,0.006524,0.102381,2.511397e+04,0.000000e+00


# Clustering

In [33]:
def create_changes_data(historical_data):

    changes_data = historical_data[['date', 'symbol', 'change']]
    symbols_age = changes_data.groupby('symbol').count()['change']
    
    # keep symbols which have at least two years of changes data
    changes_data = changes_data[historical_data['symbol'].isin(dict(symbols_age[symbols_age >= SYMBOLS_MINIMUM_AGE]))]
    print(f'delete symbols which do not have at least {SYMBOLS_MINIMUM_AGE} days of changes data: {[x for x in dict(symbols_age[symbols_age < SYMBOLS_MINIMUM_AGE]).keys()]}')

    # keep the last two years of changes data
    changes_data = changes_data.groupby('symbol').head(SYMBOLS_MINIMUM_AGE)

    # keep symbols which have the last date of changes data
    symbols_last_date = changes_data.groupby('symbol').first()['date']
    last_date = symbols_last_date['BTC']
    changes_data = changes_data[historical_data['symbol'].isin(dict(symbols_last_date[symbols_last_date == last_date]))]
    print(f'delete symbols which do not have the last day of changes data: {[x for x in dict(symbols_last_date[symbols_last_date != last_date]).keys()]}')

    # keep symbols which have the first date of changes data
    symbols_first_date = changes_data.groupby('symbol').last()['date']
    first_date = symbols_first_date['BTC']
    changes_data = changes_data[historical_data['symbol'].isin(dict(symbols_first_date[symbols_first_date == first_date]))]
    print(f'delete symbols which do not have the first day of changes data: {[x for x in dict(symbols_first_date[symbols_first_date != first_date]).keys()]}')
    
    #create the pivot table of changes data
    changes_data = changes_data.pivot(index = 'date', columns = 'symbol')
    changes_data = changes_data['change']
    
    changes_data.index.name = None
    changes_data.columns.name = None

    # export changes data
    changes_data.to_csv('./data/changes_data.csv')

    # return changes data
    return changes_data


def import_changes_data():

    # import changes data
    changes_data = pd.read_csv('./data/changes_data.csv', index_col = 'Unnamed: 0')

    # return changes data
    return changes_data


if UPDATE_CHANGES_DATA:
    changes_data = create_changes_data(historical_data)
else:
    try:
        changes_data = import_changes_data()
    except FileNotFoundError:
        changes_data = create_changes_data(historical_data)

changes_data

Unnamed: 0,1INCH,AAVE,ABBC,ACH,ADA,ADS,AERGO,AGIX,AKT,ALGO,...,XYO,YFI,YFII,ZEC,ZEN,ZEON,ZIL,ZRX,ankrETH,stETH
2021-05-17,-0.083917,0.089130,0.095968,-0.119050,-0.116497,-0.039780,-0.106534,-0.052754,-0.002437,-0.079952,...,-0.086203,-0.024438,-0.050844,-0.101355,-0.090599,0.073557,-0.073809,-0.064328,-0.083254,-0.130722
2021-05-18,0.124842,0.104940,-0.017899,0.070049,-0.012051,0.036617,0.105873,0.076460,-0.005125,0.064164,...,0.162054,0.134930,0.061655,0.084522,0.006404,-0.220628,0.037117,0.092093,0.037580,0.030994
2021-05-19,-0.383851,-0.335111,-0.182394,-0.273651,-0.261171,-0.335110,0.098949,-0.311233,-0.168581,-0.309627,...,-0.211343,-0.370148,-0.423980,-0.417518,-0.363757,-0.885342,-0.361351,-0.371863,-0.267830,-0.259835
2021-05-20,0.128582,0.135478,-0.160410,-0.044901,0.229971,0.339043,0.071677,0.137969,0.045271,0.190812,...,0.153769,0.130971,0.245743,0.249180,0.164232,1.560125,0.297744,0.169424,0.106078,0.108076
2021-05-21,-0.135486,-0.201289,-0.071998,-0.111275,-0.138865,-0.051309,-0.142962,-0.197620,-0.093040,-0.116729,...,-0.160759,-0.155533,-0.136754,-0.140645,0.197200,-0.236331,-0.207528,-0.102186,-0.121599,-0.127130
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-05-12,0.013715,0.013205,-0.005927,0.012711,0.029062,-0.131866,0.000679,0.073661,-0.000269,0.011498,...,0.039014,0.040854,0.010066,0.011033,0.035947,0.007814,0.013213,0.015130,0.004569,0.005416
2023-05-13,-0.010961,-0.015656,0.034152,-0.039669,-0.014694,-0.010283,-0.002063,-0.041681,0.067702,-0.013672,...,-0.030154,-0.024871,-0.013024,-0.009180,-0.020233,-0.011899,-0.004441,-0.009146,-0.004878,-0.005836
2023-05-14,0.008985,0.000955,0.036142,0.059245,0.015708,-0.014436,-0.000450,0.002766,0.002981,0.003958,...,0.007802,0.009412,0.010219,0.005397,0.006239,0.003264,0.009837,0.021026,0.000005,0.001638
2023-05-15,0.001387,0.009084,-0.001199,0.046901,-0.008914,0.029138,0.012942,0.036558,-0.009188,0.024440,...,0.006167,0.009976,-0.009982,0.003811,-0.004721,0.003982,0.013443,0.005343,0.010902,0.007793


In [34]:
def create_clusters(changes_data):
    
    # create clustering data
    symbols = changes_data.columns
    clustering_data = np.array(changes_data / changes_data.std())

    # create clustering modol
    edge_model = covariance.GraphicalLassoCV()
    edge_model.fit(clustering_data)
    cluster_centers_indices, labels = cluster.affinity_propagation(edge_model.covariance_, random_state = 1)
    n = labels.max() + 1

    # create clusters
    clusters = []
    for i in range(n):
        sub_cluster = list(symbols[labels == i])
        clusters.append(sub_cluster)

    # export clusters
    with open('./data/clusters.json', 'w') as json_file:
        json.dump(clusters, json_file, indent = 4)

    # return clusters
    print(f'{n} Clusters:')
    [print(f'Cluster {i}: {clusters[i]}') for i in range(len(clusters))]
    return clusters


def import_clusters():

    # import clusters
    with open('./data/clusters.json', 'r') as json_file:
        clusters = json.load(json_file)

    # return clusters
    return clusters


if UPDATE_CLUSTERS:
    clusters = create_clusters(changes_data)
else:
    try:
        clusters = import_clusters()
    except FileNotFoundError:
        clusters = create_clusters(changes_data)

print(f'{len(clusters)} Clusters:')
printClusters = [print(f'Cluster {i}: {clusters[i]}') for i in range(len(clusters))]

94 Clusters:
Cluster 0: ['ABBC']
Cluster 1: ['ACH']
Cluster 2: ['AGIX']
Cluster 3: ['AMP']
Cluster 4: ['AMPL']
Cluster 5: ['AOG']
Cluster 6: ['ARRR']
Cluster 7: ['ASD']
Cluster 8: ['BCH', 'BSV', 'BTG', 'DASH', 'EOS', 'ETC', 'LPT', 'LTC', 'XLM', 'ZEC', 'ZEN']
Cluster 9: ['BDX']
Cluster 10: ['BFC']
Cluster 11: ['BIFI', 'BNB', 'CAKE', 'CHSB', 'FUN', 'GT', 'HT', 'KCS', 'OKB', 'ORC', 'POLS', 'TWT', 'WBNB', 'WRX', 'XNO', 'XVS']
Cluster 12: ['BTTOLD']
Cluster 13: ['CEEK']
Cluster 14: ['CEL']
Cluster 15: ['CELO']
Cluster 16: ['CFX']
Cluster 17: ['CNNC']
Cluster 18: ['COCOS']
Cluster 19: ['CREAM']
Cluster 20: ['CSPR']
Cluster 21: ['CTC']
Cluster 22: ['CTXC']
Cluster 23: ['DAG']
Cluster 24: ['DERO']
Cluster 25: ['DEXE']
Cluster 26: ['1INCH', 'AAVE', 'ALPHA', 'COMP', 'DPI', 'FTM', 'INJ', 'KSM', 'LRC', 'MKR', 'NEAR', 'PERP', 'REN', 'ROSE', 'SNX', 'SUSHI', 'UNI', 'WOO', 'YFI', 'YFII']
Cluster 27: ['DXD']
Cluster 28: ['AXS', 'BAR', 'BAT', 'CHR', 'CHZ', 'ENJ', 'MANA', 'MX', 'OGN', 'SAND', 'TVK', 'WAX