In [1]:
# https://www.relataly.com/crypto-market-cluster-analysis-using-affinity-propagation-python/8114/

In [2]:
# ! pip install cryptocmd
# ! pip install seaborn

In [3]:
from cryptocmd import CmcScraper
import pandas as pd 
import matplotlib.pyplot as plt 
import numpy as np 
import seaborn as sns
from sklearn import cluster, covariance, manifold
import requests
import json
import warnings
warnings.filterwarnings('ignore')

In [4]:
NUMBER_OF_CRYPTOCURRENCIES = 500
TIMEOUT = 10  # seconds

UPDATE_BASIC_DATA = False
UPDATE_HISTORICAL_DATA = False

# Create and Import Data 

In [5]:
def createBasicData():

    # get basic data
    print('Fetching basic data...')
    url = f'https://api.coinmarketcap.com/data-api/v3/cryptocurrency/listing?start=1&limit={NUMBER_OF_CRYPTOCURRENCIES}&sortBy=market_cap&sortType=desc&convert=USD&cryptoType=all&tagType=all&audited=false'
    response = requests.get(url, timeout = TIMEOUT)
    data = json.loads(response.text)
    basicData = pd.DataFrame(data['data']['cryptoCurrencyList'])

    # clean basic data
    basicData = basicData[
        (basicData['isActive'] == 1) & 
        (basicData['dateAdded'].apply(lambda x: int(x[:4])) < 2021) & 
        (basicData['lastUpdated'].apply(lambda x: int(x[:4])) > 2022) &
        (basicData['tags'].apply(lambda x: 'stablecoin' not in x)) &  # TODO: Add Tether
        (basicData['marketPairCount'] > 5)
    ]

    # export basic data
    basicData.to_csv('./data/basic_data.csv')

    # return basic data
    print('Basic data fetched.')
    basicData.reset_index(drop = True, inplace = True)
    return basicData


def importBasicData():

    # import basic data
    basicData = pd.read_csv('./data/basic_data.csv')
    basicData = basicData.iloc[: , 1:]
    
    # return basic data
    basicData.reset_index(drop = True, inplace = True)
    return basicData


if UPDATE_BASIC_DATA:
    basicData = createBasicData()
else:
    try:
        basicData = importBasicData()
    except FileNotFoundError:
        basicData = createBasicData()

basicData

Unnamed: 0,id,name,symbol,slug,tags,cmcRank,marketPairCount,circulatingSupply,selfReportedCirculatingSupply,totalSupply,maxSupply,isActive,lastUpdated,dateAdded,quotes,isAudited,auditInfoList,platform
0,1,Bitcoin,BTC,bitcoin,"['mineable', 'pow', 'sha-256', 'store-of-value...",1,10208,1.937382e+07,0.000000e+00,1.937382e+07,2.100000e+07,1,2023-05-15T07:30:00.000Z,2010-07-13T00:00:00.000Z,"[{'name': 'USD', 'price': 27459.02420767592, '...",False,,
1,1027,Ethereum,ETH,ethereum,"['pos', 'smart-contracts', 'ethereum-ecosystem...",2,6878,1.229623e+08,0.000000e+00,1.229623e+08,,1,2023-05-15T07:30:00.000Z,2015-08-07T00:00:00.000Z,"[{'name': 'USD', 'price': 1831.7221409061003, ...",False,[],
2,1839,BNB,BNB,bnb,"['marketplace', 'centralized-exchange', 'payme...",4,1362,1.558597e+08,0.000000e+00,1.558597e+08,,1,2023-05-15T07:30:00.000Z,2017-07-25T00:00:00.000Z,"[{'name': 'USD', 'price': 315.600424749181, 'v...",True,"[{'coinId': '1839', 'auditor': 'CertiK', 'audi...",
3,52,XRP,XRP,xrp,"['medium-of-exchange', 'enterprise-solutions',...",6,944,5.183782e+10,0.000000e+00,9.998897e+10,1.000000e+11,1,2023-05-15T07:30:00.000Z,2013-08-04T00:00:00.000Z,"[{'name': 'USD', 'price': 0.42639842408915835,...",False,,
4,2010,Cardano,ADA,cardano,"['dpos', 'pos', 'platform', 'research', 'smart...",7,786,3.485272e+10,0.000000e+00,3.577770e+10,4.500000e+10,1,2023-05-15T07:30:00.000Z,2017-10-01T00:00:00.000Z,"[{'name': 'USD', 'price': 0.374423306574823, '...",False,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
312,693,Verge,XVG,verge,"['mineable', 'multiple-algorithms', 'medium-of...",491,58,1.651934e+10,0.000000e+00,1.651934e+10,1.655500e+10,1,2023-05-15T07:30:00.000Z,2014-10-25T00:00:00.000Z,"[{'name': 'USD', 'price': 0.001920657817291577...",False,,
313,258,Groestlcoin,GRS,groestlcoin,"['mineable', 'pow', 'groestl', 'medium-of-exch...",496,29,8.255091e+07,0.000000e+00,8.255091e+07,1.050000e+08,1,2023-05-15T07:30:00.000Z,2014-04-11T00:00:00.000Z,"[{'name': 'USD', 'price': 0.3700025098308978, ...",False,,
314,4090,Wirex Token,WXT,wirex-token,"['asset-management', 'centralized-exchange', '...",497,24,1.000000e+10,0.000000e+00,1.000000e+10,1.000000e+10,1,2023-05-15T07:30:00.000Z,2019-07-09T00:00:00.000Z,"[{'name': 'USD', 'price': 0.003019193888558518...",True,"[{'coinId': '4090', 'auditor': 'CertiK', 'audi...","{'id': 27, 'name': 'Stellar', 'symbol': 'XLM',..."
315,7440,BarnBridge,BOND,barnbridge,"['defi', 'dao', 'yield-farming', 'governance',...",498,74,7.910262e+06,9.332076e+06,1.000000e+07,1.000000e+07,1,2023-05-15T07:30:00.000Z,2020-10-19T00:00:00.000Z,"[{'name': 'USD', 'price': 3.7726743688765776, ...",True,"[{'coinId': '7440', 'auditor': 'CertiK', 'audi...",


In [6]:
def createHistoricalData(basicData):

    # create symbols list and historical data dataframe
    symbols = basicData['symbol'].to_list()
    historicalData = pd.DataFrame()
    n = 0
    numberOfSymbols = len(symbols)

    # loop in symbols and get historical data
    for symbol in symbols:

        if ',' in symbol:
            symbol = symbol.split(',')[0]

        n += 1
        print(f'Fetching historical data for {symbol} ({n}/{numberOfSymbols})')
        scraper = CmcScraper(symbol)

        try:
            symbolHistoricalData = scraper.get_dataframe()
        except:
            print(f'Error in fetching historical data for {symbol}')
            continue

        symbolHistoricalData.columns = ['date', 'open', 'high', 'low', 'close', 'volume', 'marketcap']
        symbolHistoricalData.insert(0, 'symbol', symbol)
        symbolHistoricalData.insert(6, 'avg', (symbolHistoricalData['open'] + symbolHistoricalData['close']) / 2)
        symbolHistoricalData.insert(7, 'change', (symbolHistoricalData['close'] - symbolHistoricalData['open']) / symbolHistoricalData['open'])

        historicalData = pd.concat([historicalData, symbolHistoricalData])

    # export historical data
    historicalData.to_csv('./data/historical_data.csv')

    # return historical data
    print('Historical data fetched.')
    historicalData.reset_index(drop = True, inplace = True)
    return historicalData


def importHistoricalData():

    # import historical data
    historicalData = pd.read_csv('./data/historical_data.csv')
    historicalData = historicalData.iloc[: , 1:]

    # return historical data
    historicalData.reset_index(drop = True, inplace = True)
    return historicalData


if UPDATE_HISTORICAL_DATA:
    historicalData = createHistoricalData(basicData)
else:
    try:
        historicalData = importHistoricalData()
    except FileNotFoundError:
        historicalData = createHistoricalData(basicData)

historicalData

Unnamed: 0,symbol,date,open,high,low,close,avg,change,volume,marketcap
0,BTC,2023-05-14,26788.974292,27150.976792,26661.354904,26930.637846,26859.806069,0.005288,1.001486e+10,5.217422e+11
1,BTC,2023-05-13,26807.769044,27030.482960,26710.873803,26784.078561,26795.923803,-0.000884,9.999172e+09,5.188737e+11
2,BTC,2023-05-12,26987.662514,27055.647228,25878.428830,26804.990671,26896.326592,-0.006769,1.931360e+10,5.192540e+11
3,BTC,2023-05-11,27621.086872,27621.941986,26781.827002,27000.788271,27310.937571,-0.022457,1.672434e+10,5.230230e+11
4,BTC,2023-05-10,27654.636777,28322.688328,26883.669883,27621.756227,27638.196502,-0.001189,2.065603e+10,5.350262e+11
...,...,...,...,...,...,...,...,...,...,...
474097,BOND,2020-10-30,138.097817,147.187193,137.979919,145.501869,141.799843,0.053615,1.138718e+06,6.570573e+06
474098,BOND,2020-10-29,127.999888,141.505010,124.102887,138.097817,133.048853,0.078890,7.382822e+05,6.236221e+06
474099,BOND,2020-10-28,137.537416,147.702934,127.186443,127.999888,132.768652,-0.069345,2.663917e+06,5.780219e+06
474100,BOND,2020-10-27,177.791174,185.925320,109.829969,137.537482,157.664328,-0.226410,4.131186e+06,0.000000e+00


# Clustering

In [7]:
def createChangesData(historicalData):

    changesData = historicalData[['date', 'symbol', 'change']]
    symbolsAge = changesData.groupby('symbol').count()['change']
    
    # keep symbols which have at least two years of changes data
    changesData = changesData[historicalData['symbol'].isin(dict(symbolsAge[symbolsAge >= 730]))]
    print(f'delete symbols which do not have at least two years of changes data: {[x for x in dict(symbolsAge[symbolsAge < 730]).keys()]}')

    # keep the last two years of changes data
    changesData = changesData.groupby('symbol').head(730)

    # keep symbols which have the last date of changes data
    symbolsLastDate = changesData.groupby('symbol').first()['date']
    lastDate = symbolsLastDate['BTC']
    changesData = changesData[historicalData['symbol'].isin(dict(symbolsLastDate[symbolsLastDate == lastDate]))]
    print(f'delete symbols which do not have the last day of changes data: {[x for x in dict(symbolsLastDate[symbolsLastDate != lastDate]).keys()]}')

    # keep symbols which have the first date of changes data
    symbolsFirstDate = changesData.groupby('symbol').last()['date']
    firstDate = symbolsFirstDate['BTC']
    changesData = changesData[historicalData['symbol'].isin(dict(symbolsFirstDate[symbolsFirstDate == firstDate]))]
    print(f'delete symbols which do not have the first day of changes data: {[x for x in dict(symbolsFirstDate[symbolsFirstDate != firstDate]).keys()]}')
    
    #create the pivot table of changes data
    changesData = changesData.pivot(index = 'date', columns = 'symbol')
    changesData = changesData['change']
    
    changesData.index.name = None
    changesData.columns.name = None

    # return changes data
    return changesData


changesData = createChangesData(historicalData)
changesData

delete symbols which do not have at least two years of changes data: ['ACA', 'BTT', 'CFG', 'CORE', 'CQT', 'FCT', 'FLR', 'HIFI', 'TON', 'WEVER', 'WHBAR', 'WTRX']
delete symbols which do not have the last day of changes data: []
delete symbols which do not have the first day of changes data: ['AVINOC', 'CNX', 'LOCUS']


Unnamed: 0,1INCH,AAVE,ABBC,ACH,ADA,ADS,AERGO,AGIX,ALGO,ALPHA,...,XYO,YFI,YFII,ZEC,ZEN,ZEON,ZIL,ZRX,ankrETH,stETH
2021-05-15,-0.083220,-0.131665,-0.081620,-0.124162,0.083425,-0.150404,-0.064519,0.150867,-0.062463,-0.125322,...,-0.030908,-0.138942,-0.072608,-0.115543,-0.140801,-0.187051,-0.074110,-0.092368,-0.736920,-0.103279
2021-05-16,-0.023810,0.020371,-0.110980,0.008674,0.063404,0.058682,-0.002286,-0.061318,0.011243,-0.023672,...,-0.075491,-0.013425,0.017249,-0.027575,-0.014273,-0.139745,-0.014486,0.018487,-0.709512,0.034050
2021-05-17,-0.083917,0.089130,0.095968,-0.119050,-0.116497,-0.039780,-0.106534,-0.052754,-0.079952,-0.059007,...,-0.086203,-0.024438,-0.050844,-0.101355,-0.090599,0.073557,-0.073809,-0.064328,-0.083254,-0.130722
2021-05-18,0.124842,0.104940,-0.017899,0.070049,-0.012051,0.036617,0.105873,0.076460,0.064164,0.062784,...,0.162054,0.134930,0.061655,0.084522,0.006404,-0.220628,0.037117,0.092093,0.037580,0.030994
2021-05-19,-0.383851,-0.335111,-0.182394,-0.273651,-0.261171,-0.335110,0.098949,-0.311233,-0.309627,-0.388818,...,-0.211343,-0.370148,-0.423980,-0.417518,-0.363757,-0.885342,-0.361351,-0.371863,-0.267830,-0.259835
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-05-10,0.015339,0.004240,-0.004884,0.029610,0.016903,-0.010325,0.005931,0.056702,0.021871,0.039265,...,-0.011585,-0.037617,-0.048785,0.005139,0.022770,0.013812,0.021348,0.034374,-0.004780,0.000044
2023-05-11,-0.030726,-0.036844,-0.076328,-0.055683,-0.026808,0.005516,-0.048045,-0.071885,-0.035831,-0.073919,...,-0.034130,-0.054561,-0.033477,-0.023309,-0.031919,-0.017598,-0.045591,-0.050205,-0.019461,-0.023326
2023-05-12,0.013715,0.013205,-0.005927,0.012711,0.029062,-0.131866,0.000679,0.073661,0.011498,0.036063,...,0.039014,0.040854,0.010066,0.011033,0.035947,0.007814,0.013213,0.015130,0.004569,0.005416
2023-05-13,-0.010961,-0.015656,0.034152,-0.039669,-0.014694,-0.010283,-0.002063,-0.041681,-0.013672,-0.020697,...,-0.030154,-0.024871,-0.013024,-0.009180,-0.020233,-0.011899,-0.004441,-0.009146,-0.004878,-0.005836


In [14]:
def createClusters(changesData):
    
    symbols = changesData.columns
    clusteringData = np.array(changesData / changesData.std())

    edgeModel = covariance.GraphicalLassoCV()
    edgeModel.fit(clusteringData)

    clusterCentersIndices, labels = cluster.affinity_propagation(edgeModel.covariance_, random_state = 1)
    clusters = []
    n = labels.max() + 1

    print(f'{n} Clusters:')
    for i in range(n):
        thisCluster = list(symbols[labels == i])
        clusters.append(thisCluster)

    [print(f'Cluster {i}: {clusters[i]}') for i in range(len(clusters))]
    return clusters


clusters = createClusters(changesData)

35 Clusters:
Cluster 0: ['AMP', 'MOB', 'XYO']
Cluster 1: ['AOG']
Cluster 2: ['BCH', 'BSV', 'BTG', 'DASH', 'EOS', 'ETC', 'KEEP', 'LPT', 'LTC', 'XLM', 'ZEC', 'ZEN']
Cluster 3: ['BNB', 'CAKE', 'CHSB', 'GT', 'HT', 'KCS', 'MNW', 'MX', 'OKB', 'POLS', 'TWT', 'UOS', 'WBNB', 'WRX', 'XNO', 'XVS']
Cluster 4: ['CFX', 'CNNC', 'DEXE', 'ETN', 'XDC']
Cluster 5: ['BDX', 'CEL', 'CSPR', 'FTT', 'IDEX']
Cluster 6: ['BTTOLD', 'CTC', 'MCB', 'POLY', 'QNT']
Cluster 7: ['1INCH', 'AAVE', 'ALPHA', 'BFC', 'COMP', 'DPI', 'FLEX', 'FTM', 'FXS', 'INJ', 'KSM', 'LCX', 'LRC', 'MKR', 'NEAR', 'PERP', 'REN', 'ROSE', 'SNX', 'SUSHI', 'UNI', 'WOO', 'YFI']
Cluster 8: ['DXD', 'HNT', 'PLA']
Cluster 9: ['ELF', 'GLM', 'SURE']
Cluster 10: ['AXS', 'BAT', 'CHR', 'CHZ', 'ENJ', 'MANA', 'OGN', 'SAND', 'TVK', 'WAXP', 'WEMIX']
Cluster 11: ['ARAW', 'ESCE', 'RKN']
Cluster 12: ['ADS', 'AR', 'AVAX', 'BAL', 'BNT', 'CRO', 'CRV', 'DEXT', 'DOGE', 'ETH', 'FUN', 'GNO', 'LDO', 'LYXe', 'MATIC', 'MXC', 'NEXO', 'RPL', 'RUNE', 'SOL', 'TEL', 'UMA', 'WNXM'