# Clustering Crypto

In [76]:
# Initial imports
import requests
import pandas as pd
import matplotlib.pyplot as plt
import hvplot.pandas
import plotly.express as px
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from pathlib import Path

### Fetching Cryptocurrency Data

In [100]:
# Use the following endpoint to fetch json data
url = "https://min-api.cryptocompare.com/data/all/coinlist"

In [415]:
# Create a DataFrame 
# HINT: You will need to use the 'Data' key from the json response, then transpose the DataFrame.
r = requests.get(url)
data = r.json()

In [416]:
list(data.keys())



In [417]:
# Alternatively, use the provided csv file:
file_path = Path("Resources/crypto_data.csv")

# Create a DataFrame
data = data['Data']
crypto_df = pd.DataFrame.from_dict(data)

In [418]:
crypto_df

Unnamed: 0,42,300,365,404,433,611,808,888,1337,2015,...,QDX,EMPIRE,BCNA,SOUND,DREAMS,CATZ,SOSNOVKINO,TST,SNK,MBX
Id,4321,749869,33639,21227,926547,20909,28223,29462,20824,3744,...,944705,944707,927428,944710,944713,944696,196612,944717,944716,944724
Url,/coins/42/overview,/coins/300/overview,/coins/365/overview,/coins/404/overview,/coins/433/overview,/coins/611/overview,/coins/808/overview,/coins/888/overview,/coins/1337/overview,/coins/2015/overview,...,/coins/qdx/overview,/coins/empire/overview,/coins/bcna/overview,/coins/sound/overview,/coins/dreams/overview,/coins/catz/overview,/coins/sosnovkino/overview,/coins/tst/overview,/coins/snk/overview,/coins/mbx/overview
ImageUrl,/media/35650717/42.jpg,/media/27010595/300.png,/media/352070/365.png,/media/35650851/404-300x300.jpg,/media/34836095/433.png,/media/35650940/611-sixeleven.png,/media/351513/808.png,/media/351639/888.png,/media/35520987/elite.png,/media/20180/2015.png,...,/media/39108992/qdx.png,/media/39108993/empire.png,/media/39108994/bcna.png,/media/39108995/sound.png,/media/39108997/dreams.png,/media/39108988/catz.png,/media/1383865/snk.png,/media/39108998/tst.png,/media/39108999/snk.png,/media/39109004/mbx.png
ContentCreatedOn,1427211129,1517935016,1480032918,1466100361,1541597321,1465914773,1473980395,1475534352,1465838687,1425316878,...,1637245576,1637246468,1543411914,1637248574,1637249613,1637242143,1501035708,1637251491,1637251368,1637254217
Name,42,300,365,404,433,611,808,888,1337,2015,...,QDX,EMPIRE,BCNA,SOUND,DREAMS,CATZ,SOSNOVKINO,TST,SNK,MBX
Symbol,42,300,365,404,433,611,808,888,1337,2015,...,QDX,EMPIRE,BCNA,SOUND,DREAMS,CATZ,SOSNOVKINO,TST,SNK,MBX
CoinName,42 Coin,300 token,365Coin,404Coin,433 Token,SixEleven,808,Octocoin,EliteCoin,2015 coin,...,Quidax,Empire Token,BitCanna,Sound Coin,Dreams Quest,CatzCoin,Sosnovkino,Standard Token,Snake Token,MobieCoin
FullName,42 Coin (42),300 token (300),365Coin (365),404Coin (404),433 Token (433),SixEleven (611),808 (808),Octocoin (888),EliteCoin (1337),2015 coin (2015),...,Quidax (QDX),Empire Token (EMPIRE),BitCanna (BCNA),Sound Coin (SOUND),Dreams Quest (DREAMS),CatzCoin (CATZ),Sosnovkino (SOSNOVKINO),Standard Token (TST),Snake Token (SNK),MobieCoin (MBX)
Description,Everything about 42 coin is 42 - apart from th...,300 token is an ERC20 token. This Token was cr...,365Coin is a Proof of Work and Proof of Stake ...,404 is a PoW/PoS hybrid cryptocurrency that al...,433 Token is a decentralised soccer platform t...,"611 is a Namecoin based cryptocurrency, and it...",808 is a coin develop for the music community ...,OCTO is a Counterparty asset with focus on dec...,1337 coin was created as an experimental Proof...,Twenty15 Coin was conceived to be an asset bac...,...,"As the native token of Quidax, QDX powers key ...",Empire Token is a decentralized token on the B...,BitCanna will provide a decentralized payment ...,Sound Coin is a full-on-chain liquidity protoc...,Dreams Quest is building a metaverse-based eco...,Launched on 1 May 2021 by a team based in Aust...,Sosnovkino is a decentralized land development...,The Standard Protocol is a decentralized finan...,Cryptosnake is an incremental money-making gam...,MobiePay is a way for both consumers and merch...
AssetTokenStatus,,,,,Finished,,,,,,...,,,Finished,,,,Finished,Upcoming,,


In [419]:
coin_supply_df = pd.read_csv(file_path)
coin_supply_df.head()

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.0,0


In [420]:
coin_supply_df = coin_supply_df.set_index('CoinName')
coin_supply_df.head()

Unnamed: 0_level_0,Unnamed: 0,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
CoinName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
42 Coin,42,Scrypt,True,PoW/PoS,41.99995,42
365Coin,365,X11,True,PoW/PoS,,2300000000
404Coin,404,Scrypt,True,PoW/PoS,1055185000.0,532000000
SixEleven,611,SHA-256,True,PoW,,611000
808,808,SHA-256,True,PoW/PoS,0.0,0


In [421]:
coin_supply_df.drop(columns=[
    'Unnamed: 0',
    'Algorithm',
    'IsTrading',
    'ProofType',
], inplace=True)
coin_supply_df.head()

Unnamed: 0_level_0,TotalCoinsMined,TotalCoinSupply
CoinName,Unnamed: 1_level_1,Unnamed: 2_level_1
42 Coin,41.99995,42
365Coin,,2300000000
404Coin,1055185000.0,532000000
SixEleven,,611000
808,0.0,0


TotalCoinSupply was missing from the website.

In [422]:
crypto_df = crypto_df.T
crypto_df.head()

Unnamed: 0,Id,Url,ImageUrl,ContentCreatedOn,Name,Symbol,CoinName,FullName,Description,AssetTokenStatus,...,MaxSupply,MktCapPenalty,IsUsedInDefi,IsUsedInNft,PlatformType,BuiltOn,SmartContractAddress,DecimalPoints,Difficulty,AlgorithmType
42,4321,/coins/42/overview,/media/35650717/42.jpg,1427211129,42,42,42 Coin,42 Coin (42),Everything about 42 coin is 42 - apart from th...,,...,0.0,0.0,0.0,0.0,,,,,,
300,749869,/coins/300/overview,/media/27010595/300.png,1517935016,300,300,300 token,300 token (300),300 token is an ERC20 token. This Token was cr...,,...,300.0,0.0,0.0,0.0,token,ETH,0xaec98a708810414878c3bcdf46aad31ded4a4557,18.0,,
365,33639,/coins/365/overview,/media/352070/365.png,1480032918,365,365,365Coin,365Coin (365),365Coin is a Proof of Work and Proof of Stake ...,,...,0.0,0.0,0.0,0.0,,,,,,
404,21227,/coins/404/overview,/media/35650851/404-300x300.jpg,1466100361,404,404,404Coin,404Coin (404),404 is a PoW/PoS hybrid cryptocurrency that al...,,...,0.0,0.0,0.0,0.0,,,,,,
433,926547,/coins/433/overview,/media/34836095/433.png,1541597321,433,433,433 Token,433 Token (433),433 Token is a decentralised soccer platform t...,Finished,...,,,,,,,,,,


In [423]:
crypto_df = crypto_df.set_index('CoinName')
crypto_df.head()

Unnamed: 0_level_0,Id,Url,ImageUrl,ContentCreatedOn,Name,Symbol,FullName,Description,AssetTokenStatus,Algorithm,...,MaxSupply,MktCapPenalty,IsUsedInDefi,IsUsedInNft,PlatformType,BuiltOn,SmartContractAddress,DecimalPoints,Difficulty,AlgorithmType
CoinName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
42 Coin,4321,/coins/42/overview,/media/35650717/42.jpg,1427211129,42,42,42 Coin (42),Everything about 42 coin is 42 - apart from th...,,Scrypt,...,0.0,0.0,0.0,0.0,,,,,,
300 token,749869,/coins/300/overview,/media/27010595/300.png,1517935016,300,300,300 token (300),300 token is an ERC20 token. This Token was cr...,,,...,300.0,0.0,0.0,0.0,token,ETH,0xaec98a708810414878c3bcdf46aad31ded4a4557,18.0,,
365Coin,33639,/coins/365/overview,/media/352070/365.png,1480032918,365,365,365Coin (365),365Coin is a Proof of Work and Proof of Stake ...,,X11,...,0.0,0.0,0.0,0.0,,,,,,
404Coin,21227,/coins/404/overview,/media/35650851/404-300x300.jpg,1466100361,404,404,404Coin (404),404 is a PoW/PoS hybrid cryptocurrency that al...,,Scrypt,...,0.0,0.0,0.0,0.0,,,,,,
433 Token,926547,/coins/433/overview,/media/34836095/433.png,1541597321,433,433,433 Token (433),433 Token is a decentralised soccer platform t...,Finished,,...,,,,,,,,,,


### Data Preprocessing

In [424]:
crypto_df.columns

Index(['Id', 'Url', 'ImageUrl', 'ContentCreatedOn', 'Name', 'Symbol',
       'FullName', 'Description', 'AssetTokenStatus', 'Algorithm', 'ProofType',
       'SortOrder', 'Sponsored', 'Taxonomy', 'Rating', 'IsTrading',
       'TotalCoinsMined', 'CirculatingSupply', 'BlockNumber',
       'NetHashesPerSecond', 'BlockReward', 'BlockTime', 'AssetLaunchDate',
       'AssetWhitepaperUrl', 'AssetWebsiteUrl', 'MaxSupply', 'MktCapPenalty',
       'IsUsedInDefi', 'IsUsedInNft', 'PlatformType', 'BuiltOn',
       'SmartContractAddress', 'DecimalPoints', 'Difficulty', 'AlgorithmType'],
      dtype='object')

In [425]:
# Keep only necessary columns:
# 'CoinName','Algorithm','IsTrading','ProofType','TotalCoinsMined','TotalCoinSupply'
crypto_df.drop(columns = [
    'AlgorithmType',
    'AssetLaunchDate',
    'AssetTokenStatus',
    'AssetWebsiteUrl',
    'AssetWhitepaperUrl',
    'BlockNumber',
    'BlockReward',
    'BlockTime',
    'BuiltOn',
    'CirculatingSupply',
    'ContentCreatedOn',
    'DecimalPoints',
    'Description',
    'Difficulty',
    'FullName',
    'Id',
    'ImageUrl',
    'IsUsedInDefi',
    'IsUsedInNft',
    'MaxSupply',
    'MktCapPenalty',
    'Name',
    'NetHashesPerSecond',
    'PlatformType',
    'Rating',
    'SmartContractAddress',
    'SortOrder',
    'Sponsored',
    'Taxonomy',
    'TotalCoinsMined',
    'Url'
], inplace = True)


In [426]:
crypto_df.columns

Index(['Symbol', 'Algorithm', 'ProofType', 'IsTrading'], dtype='object')

Need to join TotalCoinSupply to crypto_df

In [427]:
crypto_df

Unnamed: 0_level_0,Symbol,Algorithm,ProofType,IsTrading
CoinName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
42 Coin,42,Scrypt,PoW/PoS,True
300 token,300,,,True
365Coin,365,X11,PoW/PoS,True
404Coin,404,Scrypt,PoW/PoS,True
433 Token,433,,,False
...,...,...,...,...
CatzCoin,CATZ,,,False
Sosnovkino,SOSNOVKINO,,,False
Standard Token,TST,,,False
Snake Token,SNK,,,False


In [428]:
coin_supply_df

Unnamed: 0_level_0,TotalCoinsMined,TotalCoinSupply
CoinName,Unnamed: 1_level_1,Unnamed: 2_level_1
42 Coin,4.199995e+01,42
365Coin,,2300000000
404Coin,1.055185e+09,532000000
SixEleven,,611000
808,0.000000e+00,0
...,...,...
BitcoinPlus,1.283270e+05,1000000
DivotyCoin,2.149121e+07,100000000
Giotto Coin,,233100000
OpenSourceCoin,,21000000


In [429]:
joined_df = pd.merge(crypto_df, coin_supply_df, on='CoinName', how='outer')
joined_df

Unnamed: 0_level_0,Symbol,Algorithm,ProofType,IsTrading,TotalCoinsMined,TotalCoinSupply
CoinName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
42 Coin,42,Scrypt,PoW/PoS,True,4.199995e+01,42
300 token,300,,,True,,
365Coin,365,X11,PoW/PoS,True,,2300000000
404Coin,404,Scrypt,PoW/PoS,True,1.055185e+09,532000000
433 Token,433,,,False,,
...,...,...,...,...,...,...
ClubCoin,,,,,1.036219e+08,160000000
Radium,,,,,3.821246e+06,9000000
Creditbit,,,,,1.690102e+07,16504333
Adamant,,,,,,200000000


In [430]:
crypto_df = joined_df.copy()
crypto_df

Unnamed: 0_level_0,Symbol,Algorithm,ProofType,IsTrading,TotalCoinsMined,TotalCoinSupply
CoinName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
42 Coin,42,Scrypt,PoW/PoS,True,4.199995e+01,42
300 token,300,,,True,,
365Coin,365,X11,PoW/PoS,True,,2300000000
404Coin,404,Scrypt,PoW/PoS,True,1.055185e+09,532000000
433 Token,433,,,False,,
...,...,...,...,...,...,...
ClubCoin,,,,,1.036219e+08,160000000
Radium,,,,,3.821246e+06,9000000
Creditbit,,,,,1.690102e+07,16504333
Adamant,,,,,,200000000


In [431]:
# Keep only cryptocurrencies that are trading
crypto_df = crypto_df.loc[crypto_df['IsTrading'] == True]

In [432]:
# Keep only cryptocurrencies with a working algorithm
crypto_df = crypto_df.loc[crypto_df['Algorithm'] != 'N/A']

In [433]:
# Remove the "IsTrading" column
crypto_df.drop(columns = ['IsTrading'], inplace = True)

In [434]:
# Remove rows with at least 1 null value
crypto_df.isnull().sum()

Symbol               0
Algorithm            0
ProofType            0
TotalCoinsMined    991
TotalCoinSupply    536
dtype: int64

In [435]:
crypto_df = crypto_df.dropna()
crypto_df.isnull().sum()

Symbol             0
Algorithm          0
ProofType          0
TotalCoinsMined    0
TotalCoinSupply    0
dtype: int64

In [436]:
# Remove rows with cryptocurrencies having no coins mined
crypto_df = crypto_df.loc[crypto_df['TotalCoinsMined'] != 0]

In [437]:
crypto_df = crypto_df.loc[crypto_df['TotalCoinSupply'] != 0]

In [438]:
# Drop rows where there are 'N/A' text values
crypto_df = crypto_df[crypto_df!='N/A']

In [439]:
crypto_df.reset_index(inplace=True)
crypto_df

Unnamed: 0,CoinName,Symbol,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42 Coin,42,Scrypt,PoW/PoS,4.199995e+01,42
1,404Coin,404,Scrypt,PoW/PoS,1.055185e+09,532000000
2,EliteCoin,1337,X13,PoW/PoS,2.927942e+10,314159265359
3,BitcoinDark,BTCD,SHA-256,PoW/PoS,1.288862e+06,22000000
4,PayCoin,XPY,SHA-256,PoS,1.199533e+07,12500000
...,...,...,...,...,...,...
505,MegaCoin,MEC,Scrypt,PoW,3.815200e+07,42000000
506,ZCash,ZEC,Equihash,PoW,7.383056e+06,21000000
507,Oxycoin,OXYC,DPoS,DPoS,1.122382e+09,0
508,PAXEX,PAXEX,X11,PoS,4.504639e+06,100000000


In [440]:
crypto_df = crypto_df.loc[crypto_df['TotalCoinSupply'] != '0']
crypto_df

Unnamed: 0,CoinName,Symbol,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42 Coin,42,Scrypt,PoW/PoS,4.199995e+01,42
1,404Coin,404,Scrypt,PoW/PoS,1.055185e+09,532000000
2,EliteCoin,1337,X13,PoW/PoS,2.927942e+10,314159265359
3,BitcoinDark,BTCD,SHA-256,PoW/PoS,1.288862e+06,22000000
4,PayCoin,XPY,SHA-256,PoS,1.199533e+07,12500000
...,...,...,...,...,...,...
504,MonaCoin,MONA,Scrypt,PoW,6.819467e+07,105120000
505,MegaCoin,MEC,Scrypt,PoW,3.815200e+07,42000000
506,ZCash,ZEC,Equihash,PoW,7.383056e+06,21000000
508,PAXEX,PAXEX,X11,PoS,4.504639e+06,100000000


In [441]:
# Store the 'CoinName'column in its own DataFrame prior to dropping it from crypto_df
coinname_df = pd.DataFrame(
    data = crypto_df, columns = ['Symbol','CoinName']
)
coinname_df.head()

Unnamed: 0,Symbol,CoinName
0,42,42 Coin
1,404,404Coin
2,1337,EliteCoin
3,BTCD,BitcoinDark
4,XPY,PayCoin


In [442]:
# Drop the 'CoinName' column since it's not going to be used on the clustering algorithm
#crypto_df.reset_index(inplace=True)
crypto_df = crypto_df.drop(columns = 'CoinName')

In [443]:
crypto_df.sample(30)

Unnamed: 0,Symbol,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
139,BSEND,XEVAN,PoW/PoS,24387720.0,139000000.0
110,BTA,Scrypt,PoW,5052556.0,5000000.0
404,TPAY,POS 3.0,PoS,20445860.0,25000000.0
132,NEBU,PoS,PoS,20000000.0,20000000.0
220,GRW,Scrypt,PoS,295135500.0,2000000000.0
438,CDN,Scrypt,AuxPoW,99843410.0,100000000.0
357,PLUS1,HMQ1725,PoW,8645406.0,21000000.0
162,CFC,PoS,PoS,148716800.0,39999898.0
292,AC3,X11,PoW,80316210.0,550000000.0
262,TZC,NeoScrypt,PoW/PoS,182638400.0,400000000.0


In [444]:
crypto_df = crypto_df.set_index('Symbol')
crypto_df

Unnamed: 0_level_0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
42,Scrypt,PoW/PoS,4.199995e+01,42
404,Scrypt,PoW/PoS,1.055185e+09,532000000
1337,X13,PoW/PoS,2.927942e+10,314159265359
BTCD,SHA-256,PoW/PoS,1.288862e+06,22000000
XPY,SHA-256,PoS,1.199533e+07,12500000
...,...,...,...,...
MONA,Scrypt,PoW,6.819467e+07,105120000
MEC,Scrypt,PoW,3.815200e+07,42000000
ZEC,Equihash,PoW,7.383056e+06,21000000
PAXEX,X11,PoS,4.504639e+06,100000000


In [445]:
# Create dummy variables for text features
crypto_dummies= pd.get_dummies(crypto_df[['Algorithm', 'ProofType']], drop_first=True)
X = pd.concat([crypto_df.drop(['Algorithm', 'ProofType'],axis=1), crypto_dummies],axis=1)
X.head()

Unnamed: 0_level_0,TotalCoinsMined,TotalCoinSupply,Algorithm_536,Algorithm_BLAKE256,Algorithm_Blake,Algorithm_Blake2S,Algorithm_Blake2b,Algorithm_C11,Algorithm_Cloverhash,Algorithm_Counterparty,...,ProofType_PoW,ProofType_PoW/Hive,ProofType_PoW/PoS,ProofType_PoW/PoS,ProofType_PoW/PoSe,ProofType_PoW/nPoS,ProofType_Pos,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
42,41.99995,42,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
404,1055185000.0,532000000,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
1337,29279420000.0,314159265359,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
BTCD,1288862.0,22000000,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
XPY,11995330.0,12500000,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [447]:
# Standardize data
#crypto_scaled = StandardScaler().fit_transform(crypto_dummies)
#print(crypto_scaled[0:1])
data_scaler = StandardScaler()
data_scaler.fit_transform(X)

array([[-0.10703153, -0.15174782, -0.04588315, ..., -0.04588315,
        -0.04588315, -0.04588315],
       [-0.08488081, -0.14427197, -0.04588315, ..., -0.04588315,
        -0.04588315, -0.04588315],
       [ 0.50760998,  4.26292812, -0.04588315, ..., -0.04588315,
        -0.04588315, -0.04588315],
       ...,
       [-0.10687655, -0.15145272, -0.04588315, ..., -0.04588315,
        -0.04588315, -0.04588315],
       [-0.10693697, -0.15034259, -0.04588315, ..., -0.04588315,
        -0.04588315, -0.04588315],
       [-0.10273906, -0.14820015, -0.04588315, ..., -0.04588315,
        -0.04588315, -0.04588315]])

In [448]:
crypto_df = crypto_df[['Algorithm', 'ProofType', 'TotalCoinSupply', 'TotalCoinsMined']]
crypto_df

Unnamed: 0_level_0,Algorithm,ProofType,TotalCoinSupply,TotalCoinsMined
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
42,Scrypt,PoW/PoS,42,4.199995e+01
404,Scrypt,PoW/PoS,532000000,1.055185e+09
1337,X13,PoW/PoS,314159265359,2.927942e+10
BTCD,SHA-256,PoW/PoS,22000000,1.288862e+06
XPY,SHA-256,PoS,12500000,1.199533e+07
...,...,...,...,...
MONA,Scrypt,PoW,105120000,6.819467e+07
MEC,Scrypt,PoW,42000000,3.815200e+07
ZEC,Equihash,PoW,21000000,7.383056e+06
PAXEX,X11,PoS,100000000,4.504639e+06


### Reducing Dimensions Using PCA

In [449]:
# Use PCA to reduce dimensions to 3 principal components
pca = PCA(n_components=3)
crypto_pca = pca.fit_transform(X)

In [450]:
# Create a DataFrame with the principal components data
pca_df = pd.DataFrame(
    data=crypto_pca, columns=["PC 1", "PC 2", "PC 3"], index=X.index
)
pca_df.head()

Unnamed: 0_level_0,PC 1,PC 2,PC 3
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
42,-11902260000.0,-972115000.0,0.783028
404,-10914590000.0,-1620906000.0,0.782694
1337,274937000000.0,130468200000.0,1.024813
BTCD,-11882560000.0,-962250600.0,0.706616
XPY,-11885450000.0,-976269800.0,0.099065


### Clustering Crytocurrencies Using K-Means

#### Find the Best Value for `k` Using the Elbow Curve

In [451]:
inertia = []
k = list(range(1, 11))

# Calculate the inertia for the range of k values
for i in k:
    km = KMeans(n_clusters=i, random_state=0)
    km.fit(crypto_pca)
    inertia.append(km.inertia_)

# Create the Elbow Curve using hvPlot
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.hvplot.line(x="k", y="inertia", xticks=k, title="Elbow Curve")


KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=2.



Running K-Means with `k=<your best value for k here>`

In [452]:
# Initialize the K-Means model
model = KMeans(n_clusters=5, random_state=0)
# Fit the model
model.fit(pca_df)
# Predict clusters
predictions = model.predict(pca_df)
# Create a new DataFrame including predicted clusters and cryptocurrencies features
pca_df['class'] = model.labels_
pca_df.head()
#df_merged = crypto_df.join(coinname_df, how='inner')
#df_merged = df_merged.append(pca_df)

#df_merged#May need to reorder columns

Unnamed: 0_level_0,PC 1,PC 2,PC 3,class
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
42,-11902260000.0,-972115000.0,0.783028,0
404,-10914590000.0,-1620906000.0,0.782694,0
1337,274937000000.0,130468200000.0,1.024813,3
BTCD,-11882560000.0,-962250600.0,0.706616,0
XPY,-11885450000.0,-976269800.0,0.099065,0


In [455]:
df_merged = pd.concat([crypto_df, pca_df], axis=1)
df_merged

Unnamed: 0_level_0,Algorithm,ProofType,TotalCoinSupply,TotalCoinsMined,PC 1,PC 2,PC 3,class
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
42,Scrypt,PoW/PoS,42,4.199995e+01,-1.190226e+10,-9.721150e+08,0.783028,0
404,Scrypt,PoW/PoS,532000000,1.055185e+09,-1.091459e+10,-1.620906e+09,0.782694,0
1337,X13,PoW/PoS,314159265359,2.927942e+10,2.749370e+11,1.304682e+11,1.024813,3
BTCD,SHA-256,PoW/PoS,22000000,1.288862e+06,-1.188256e+10,-9.622506e+08,0.706616,0
XPY,SHA-256,PoS,12500000,1.199533e+07,-1.188545e+10,-9.762698e+08,0.099065,0
...,...,...,...,...,...,...,...,...
MONA,Scrypt,PoW,105120000,6.819467e+07,-1.177714e+10,-9.787366e+08,-0.613216,0
MEC,Scrypt,PoW,42000000,3.815200e+07,-1.184683e+10,-9.842102e+08,-0.613249,0
ZEC,Equihash,PoW,21000000,7.383056e+06,-1.188038e+10,-9.680305e+08,-0.706314,0
PAXEX,X11,PoS,100000000,4.504639e+06,-1.181336e+10,-9.261037e+08,0.191923,0


In [458]:
coinname_df = coinname_df.set_index('Symbol')
coinname_df

Unnamed: 0_level_0,CoinName
Symbol,Unnamed: 1_level_1
42,42 Coin
404,404Coin
1337,EliteCoin
BTCD,BitcoinDark
XPY,PayCoin
...,...
MONA,MonaCoin
MEC,MegaCoin
ZEC,ZCash
PAXEX,PAXEX


In [460]:
df_merged_with_coinname = pd.concat([df_merged, coinname_df], axis=1)
df_merged_with_coinname

Unnamed: 0_level_0,Algorithm,ProofType,TotalCoinSupply,TotalCoinsMined,PC 1,PC 2,PC 3,class,CoinName
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
42,Scrypt,PoW/PoS,42,4.199995e+01,-1.190226e+10,-9.721150e+08,0.783028,0,42 Coin
404,Scrypt,PoW/PoS,532000000,1.055185e+09,-1.091459e+10,-1.620906e+09,0.782694,0,404Coin
1337,X13,PoW/PoS,314159265359,2.927942e+10,2.749370e+11,1.304682e+11,1.024813,3,EliteCoin
BTCD,SHA-256,PoW/PoS,22000000,1.288862e+06,-1.188256e+10,-9.622506e+08,0.706616,0,BitcoinDark
XPY,SHA-256,PoS,12500000,1.199533e+07,-1.188545e+10,-9.762698e+08,0.099065,0,PayCoin
...,...,...,...,...,...,...,...,...,...
MONA,Scrypt,PoW,105120000,6.819467e+07,-1.177714e+10,-9.787366e+08,-0.613216,0,MonaCoin
MEC,Scrypt,PoW,42000000,3.815200e+07,-1.184683e+10,-9.842102e+08,-0.613249,0,MegaCoin
ZEC,Equihash,PoW,21000000,7.383056e+06,-1.188038e+10,-9.680305e+08,-0.706314,0,ZCash
PAXEX,X11,PoS,100000000,4.504639e+06,-1.181336e+10,-9.261037e+08,0.191923,0,PAXEX


In [462]:
df_merged = df_merged_with_coinname[['CoinName', 'Algorithm', 'ProofType', 'TotalCoinsMined', 'TotalCoinSupply', 'PC 1', 'PC 2', 'PC 3', 'class']]
df_merged

Unnamed: 0_level_0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply,PC 1,PC 2,PC 3,class
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
42,42 Coin,Scrypt,PoW/PoS,4.199995e+01,42,-1.190226e+10,-9.721150e+08,0.783028,0
404,404Coin,Scrypt,PoW/PoS,1.055185e+09,532000000,-1.091459e+10,-1.620906e+09,0.782694,0
1337,EliteCoin,X13,PoW/PoS,2.927942e+10,314159265359,2.749370e+11,1.304682e+11,1.024813,3
BTCD,BitcoinDark,SHA-256,PoW/PoS,1.288862e+06,22000000,-1.188256e+10,-9.622506e+08,0.706616,0
XPY,PayCoin,SHA-256,PoS,1.199533e+07,12500000,-1.188545e+10,-9.762698e+08,0.099065,0
...,...,...,...,...,...,...,...,...,...
MONA,MonaCoin,Scrypt,PoW,6.819467e+07,105120000,-1.177714e+10,-9.787366e+08,-0.613216,0
MEC,MegaCoin,Scrypt,PoW,3.815200e+07,42000000,-1.184683e+10,-9.842102e+08,-0.613249,0
ZEC,ZCash,Equihash,PoW,7.383056e+06,21000000,-1.188038e+10,-9.680305e+08,-0.706314,0
PAXEX,PAXEX,X11,PoS,4.504639e+06,100000000,-1.181336e+10,-9.261037e+08,0.191923,0


### Visualizing Results

#### 3D-Scatter with Clusters

In [463]:
# Create a 3D-Scatter with the PCA data and the clusters
fig = px.scatter_3d(
    df_merged,
    x="PC 1",
    y="PC 2",
    z="PC 3",
    hover_name='CoinName',
    hover_data= ['Algorithm'],
    color="class",
    symbol="class",
)
fig.update_layout(legend=dict(x=0, y=1))
fig.show()

#### Table of Tradable Cryptocurrencies

In [470]:
# Table with tradable cryptos
columns = ['CoinName', 'Algorithm', 'ProofType', 'TotalCoinSupply', 'TotalCoinsMined', 'class']

In [474]:
# Print the total number of tradable cryptocurrencies
df_merged.hvplot.table(columns, width=400, fontscale=80)

#### Scatter Plot with Tradable Cryptocurrencies

In [475]:
# Scale data to create the scatter plot
df_merged['TotalCoinsMined'] = df_merged['TotalCoinsMined'].astype(float) / 1000000
df_merged['TotalCoinSupply'] = df_merged['TotalCoinSupply'].astype(float) / 1000000

In [476]:
# Plot the scatter with x="TotalCoinsMined" and y="TotalCoinSupply"
df_merged.hvplot(
    kind="scatter", 
    x="TotalCoinsMined", 
    y="TotalCoinSupply", 
    c='class', 
    colormap="viridis", 
    hover_cols=['CoinName']
)