In [51]:
# https://www.relataly.com/crypto-market-cluster-analysis-using-affinity-propagation-python/8114/

In [52]:
# ! pip install cryptocmd
# ! pip install seaborn

In [53]:
from cryptocmd import CmcScraper
import pandas as pd 
import matplotlib.pyplot as plt 
import numpy as np 
import seaborn as sns
from sklearn import cluster, covariance, manifold
import requests
import json

In [54]:
NUMBER_OF_CRYPTOCURRENCIES = 500
TIMEOUT = 10  # seconds

UPDATE_BASIC_DATA = False
UPDATE_HISTORICAL_DATA = False

# Create and Import Data 

In [55]:
def createBasicData():

    # get basic data
    print('Fetching basic data...')
    url = f'https://api.coinmarketcap.com/data-api/v3/cryptocurrency/listing?start=1&limit={NUMBER_OF_CRYPTOCURRENCIES}&sortBy=market_cap&sortType=desc&convert=USD&cryptoType=all&tagType=all&audited=false'
    response = requests.get(url, timeout = TIMEOUT)
    data = json.loads(response.text)
    basicData = pd.DataFrame(data['data']['cryptoCurrencyList'])

    # clean basic data
    basicData = basicData[
        (basicData['isActive'] == 1) & 
        (basicData['dateAdded'].apply(lambda x: int(x[:4])) < 2021) & 
        (basicData['lastUpdated'].apply(lambda x: int(x[:4])) > 2022) &
        (basicData['tags'].apply(lambda x: 'stablecoin' not in x)) &  # TODO: Add Tether
        (basicData['marketPairCount'] > 5)
    ]

    # export basic data
    basicData.to_csv('./data/basic_data.csv')

    # return basic data
    print('Basic data fetched.')
    basicData.reset_index(drop = True, inplace = True)
    return basicData


def importBasicData():

    # import basic data
    basicData = pd.read_csv('./data/basic_data.csv')
    basicData = basicData.iloc[: , 1:]
    
    # return basic data
    basicData.reset_index(drop = True, inplace = True)
    return basicData

In [56]:
def createHistoricalData(basicData):

    # create symbols list and historical data dataframe
    symbols = basicData['symbol'].to_list()
    historicalData = pd.DataFrame()
    n = 0
    numberOfSymbols = len(symbols)

    # loop in symbols and get historical data
    for symbol in symbols:

        if ',' in symbol:
            symbol = symbol.split(',')[0]

        n += 1
        print(f'Fetching historical data for {symbol} ({n}/{numberOfSymbols})')
        scraper = CmcScraper(symbol)

        try:
            symbolHistoricalData = scraper.get_dataframe()
        except:
            print(f'Error in fetching historical data for {symbol}')
            continue

        symbolHistoricalData.columns = ['date', 'open', 'high', 'low', 'close', 'volume', 'marketcap']
        symbolHistoricalData.insert(0, 'symbol', symbol)
        symbolHistoricalData.insert(6, 'avg', (symbolHistoricalData['open'] + symbolHistoricalData['close']) / 2)
        symbolHistoricalData.insert(7, 'change', (symbolHistoricalData['close'] - symbolHistoricalData['open']) / symbolHistoricalData['open'])

        historicalData = pd.concat([historicalData, symbolHistoricalData])

    # export historical data
    historicalData.to_csv('./data/historical_data.csv')

    # return historical data
    print('Historical data fetched.')
    historicalData.reset_index(drop = True, inplace = True)
    return historicalData


def importHistoricalData():

    # import historical data
    historicalData = pd.read_csv('./data/historical_data.csv')
    historicalData = historicalData.iloc[: , 1:]

    # return historical data
    historicalData.reset_index(drop = True, inplace = True)
    return historicalData

In [57]:
if UPDATE_BASIC_DATA:
    basicData = createBasicData()
else:
    try:
        basicData = importBasicData()
    except FileNotFoundError:
        basicData = createBasicData()

if UPDATE_HISTORICAL_DATA:
    historicalData = createHistoricalData(basicData)
else:
    try:
        historicalData = importHistoricalData()
    except FileNotFoundError:
        historicalData = createHistoricalData(basicData)

# Clustering

In [122]:
def createChangesData(historicalData):

    changesData = historicalData[['symbol', 'change']]
    symbolsAge = changesData.groupby('symbol').count()['change']

    # keep symbols which have at least two years of changes data
    changesData = changesData[historicalData['symbol'].isin(dict(symbolsAge[symbolsAge >= 730]))]

    # keep the last two years of changes data
    changesData = changesData.groupby('symbol').head(730)

    # return changes data
    return changesData

In [123]:
changesData = createChangesData(historicalData)
changesData

Unnamed: 0,symbol,change
0,BTC,0.000884
1,BTC,0.006769
2,BTC,0.022457
3,BTC,0.001189
4,BTC,0.001310
...,...,...
479606,AKT,0.005125
479607,AKT,0.002437
479608,AKT,0.046933
479609,AKT,-0.050424


In [128]:
# changesData.pivot(columns = 'symbol', values = 'change')