In [1]:
# https://www.relataly.com/crypto-market-cluster-analysis-using-affinity-propagation-python/8114/

In [2]:
# ! pip install cryptocmd
# ! pip install seaborn

In [3]:
from cryptocmd import CmcScraper
import pandas as pd 
import matplotlib.pyplot as plt 
import numpy as np 
import seaborn as sns
from sklearn import cluster, covariance, manifold
import requests
import json

In [4]:
NUMBER_OF_CRYPTOCURRENCIES = 500
TIMEOUT = 10  # seconds

UPDATE_BASIC_DATA = False
UPDATE_HISTORICAL_DATA = False

In [5]:
def createBasicData():

    # get basic data
    print('Fetching basic data...')
    url = f'https://api.coinmarketcap.com/data-api/v3/cryptocurrency/listing?start=1&limit={NUMBER_OF_CRYPTOCURRENCIES}&sortBy=market_cap&sortType=desc&convert=USD&cryptoType=all&tagType=all&audited=false'
    response = requests.get(url, timeout = TIMEOUT)
    data = json.loads(response.text)
    basicData = pd.DataFrame(data['data']['cryptoCurrencyList'])

    # clean basic data
    basicData = basicData[
        (basicData['isActive'] == 1) & 
        (basicData['dateAdded'].apply(lambda x: int(x[:4])) < 2021) & 
        (basicData['lastUpdated'].apply(lambda x: int(x[:4])) > 2022) &
        (basicData['tags'].apply(lambda x: 'stablecoin' not in x)) &
        (basicData['marketPairCount'] > 5)
    ]

    # export basic data
    basicData.to_csv('./data/basic_data.csv')

    # return basic data
    print('Basic data fetched.')
    return basicData

def importBasicData():

    # import basic data
    basicData = pd.read_csv('./data/basic_data.csv')
    
    # return basic data
    return basicData

In [6]:
def createHistoricalData(basicData):

    # create symbols list and historical data dataframe
    symbols = basicData['symbol'].to_list()
    historicalData = pd.DataFrame()
    n = 0
    numberOfSymbols = len(symbols)

    # loop in symbols and get historical data
    for symbol in symbols:

        if ',' in symbol:
            symbol = symbol.split(',')[0]

        n += 1
        print(f'Fetching historical data for {symbol} ({n}/{numberOfSymbols})')
        scraper = CmcScraper(symbol)

        try:
            symbolHistoricalData = scraper.get_dataframe()
        except:
            print(f'Error in fetching historical data for {symbol}')
            continue

        symbolHistoricalData.columns = ['date', 'open', 'high', 'low', 'close', 'volume', 'marketcap']
        symbolHistoricalData.insert(0, 'symbol', symbol)
        symbolHistoricalData.insert(6, 'avg', (symbolHistoricalData['open'] + symbolHistoricalData['close']) / 2)
        symbolHistoricalData.insert(7, 'change', (symbolHistoricalData['open'] - symbolHistoricalData['close']) / symbolHistoricalData['open'])

        historicalData = pd.concat([historicalData, symbolHistoricalData])

    # export historical data
    historicalData.to_csv('./data/historical_data.csv')

    # return historical data
    print('Historical data fetched.')
    return historicalData

def importHistoricalData():

    # import historical data
    historicalData = pd.read_csv('./data/historical_data.csv')
    
    # return historical data
    return historicalData

In [7]:
if UPDATE_BASIC_DATA:
    basicData = createBasicData()
else:
    try:
        basicData = importBasicData()
    except FileNotFoundError:
        basicData = createBasicData()

if UPDATE_HISTORICAL_DATA:
    historicalData = createHistoricalData(basicData)
else:
    try:
        historicalData = importHistoricalData()
    except FileNotFoundError:
        historicalData = createHistoricalData(basicData)

Fetching basic data...
Basic data fetched.
Fetching historical data for BTC (1/317)
Fetching historical data for ETH (2/317)
Fetching historical data for BNB (3/317)
Fetching historical data for XRP (4/317)
Fetching historical data for ADA (5/317)
Fetching historical data for DOGE (6/317)
Fetching historical data for SOL (7/317)
Fetching historical data for MATIC (8/317)
Fetching historical data for DOT (9/317)
Fetching historical data for TRX (10/317)
Fetching historical data for LTC (11/317)
Fetching historical data for SHIB (12/317)
Fetching historical data for AVAX (13/317)
Fetching historical data for WBTC (14/317)
Fetching historical data for LINK (15/317)
Fetching historical data for LEO (16/317)
Fetching historical data for ATOM (17/317)
Fetching historical data for UNI (18/317)
Fetching historical data for XMR (19/317)
Fetching historical data for OKB (20/317)
Fetching historical data for ETC (21/317)
Fetching historical data for XLM (22/317)
Fetching historical data for BCH (