# Clustering Crypto

In [76]:
# Initial imports
import requests
import pandas as pd
import matplotlib.pyplot as plt
import hvplot.pandas
import plotly.express as px
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from pathlib import Path

### Fetching Cryptocurrency Data

In [100]:
# Use the following endpoint to fetch json data
url = "https://min-api.cryptocompare.com/data/all/coinlist"

In [274]:
# Create a DataFrame 
# HINT: You will need to use the 'Data' key from the json response, then transpose the DataFrame.
r = requests.get(url)
data = r.json()

In [275]:
list(data.keys())



In [276]:
# Alternatively, use the provided csv file:
file_path = Path("Resources/crypto_data.csv")

# Create a DataFrame
data = data['Data']
crypto_df = pd.DataFrame.from_dict(data)

In [277]:
crypto_df

Unnamed: 0,42,300,365,404,433,611,808,888,1337,2015,...,DARCRUS,DAR,ENVIENTA,ENV,AQUAC,AQUA,NEETCOIN,NEET,CHESSCOIN,CHESS
Id,4321,749869,33639,21227,926547,20909,28223,29462,20824,3744,...,32694,944491,929311,944516,928136,944519,933733,944521,22407,944523
Url,/coins/42/overview,/coins/300/overview,/coins/365/overview,/coins/404/overview,/coins/433/overview,/coins/611/overview,/coins/808/overview,/coins/888/overview,/coins/1337/overview,/coins/2015/overview,...,/coins/darcrus/overview,/coins/dar/overview,/coins/envienta/overview,/coins/env/overview,/coins/aquac/overview,/coins/aqua/overview,/coins/neetcoin/overview,/coins/neet/overview,/coins/chesscoin/overview,/coins/chess/overview
ImageUrl,/media/35650717/42.jpg,/media/27010595/300.png,/media/352070/365.png,/media/35650851/404-300x300.jpg,/media/34836095/433.png,/media/35650940/611-sixeleven.png,/media/351513/808.png,/media/351639/888.png,/media/35520987/elite.png,/media/20180/2015.png,...,/media/351930/dar.png,/media/38554128/dar.png,/media/35521085/env.png,/media/38554143/env.png,/media/37746822/aqua.png,/media/38554145/aqua.png,/media/36640308/neet.png,/media/38554146/neet.png,/media/35651325/chess300x300.png,/media/38554148/chess.png
ContentCreatedOn,1427211129,1517935016,1480032918,1466100361,1541597321,1465914773,1473980395,1475534352,1465838687,1425316878,...,1479138362,1636369568,1552058315,1636468300,1546440522,1636470727,1581413172,1636471373,1467280350,1636471810
Name,42,300,365,404,433,611,808,888,1337,2015,...,DARCRUS,DAR,ENVIENTA,ENV,AQUAC,AQUA,NEETCOIN,NEET,CHESSCOIN,CHESS
Symbol,42,300,365,404,433,611,808,888,1337,2015,...,DARCRUS,DAR,ENVIENTA,ENV,AQUAC,AQUA,NEETCOIN,NEET,CHESSCOIN,CHESS
CoinName,42 Coin,300 token,365Coin,404Coin,433 Token,SixEleven,808,Octocoin,EliteCoin,2015 coin,...,Darcrus,Mines of Dalarnia,Envienta,ENVOY,Aquachain,Planet Finance,Neetcoin,NEET Finance,ChessCoin,Tranchess
FullName,42 Coin (42),300 token (300),365Coin (365),404Coin (404),433 Token (433),SixEleven (611),808 (808),Octocoin (888),EliteCoin (1337),2015 coin (2015),...,Darcrus (DARCRUS),Mines of Dalarnia (DAR),Envienta (ENVIENTA),ENVOY (ENV),Aquachain (AQUAC),Planet Finance (AQUA),Neetcoin (NEETCOIN),NEET Finance (NEET),ChessCoin (CHESSCOIN),Tranchess (CHESS)
Description,Everything about 42 coin is 42 - apart from th...,300 token is an ERC20 token. This Token was cr...,365Coin is a Proof of Work and Proof of Stake ...,404 is a PoW/PoS hybrid cryptocurrency that al...,433 Token is a decentralised soccer platform t...,"611 is a Namecoin based cryptocurrency, and it...",808 is a coin develop for the music community ...,OCTO is a Counterparty asset with focus on dec...,1337 coin was created as an experimental Proof...,Twenty15 Coin was conceived to be an asset bac...,...,Darcrus is an asset issued on the NXT blockcha...,Mines of Dalarnia is an action-adventure game....,ENVIENTA is an open source hardware-developing...,ENVOY aims to build community driven NFT produ...,"Based on proof-of-work chain technology, Aquac...",Planet Finance is a financial protocol consist...,"""NEET"" is an acronym ""Not in Education, Employ...","NEET, an acronym for ""Not in Education, Employ...",ChessCoin is a PoW&amp;PoS hybrid cryptocurren...,Tranchess Swap is the marketplace to trade QUE...
AssetTokenStatus,,,,,Finished,,,,,,...,Finished,,Finished,,,,,,,


In [278]:
coin_supply_df = pd.read_csv(file_path)
coin_supply_df.head()

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.0,0


In [279]:
coin_supply_df = coin_supply_df.set_index('CoinName')
coin_supply_df.head()

Unnamed: 0_level_0,Unnamed: 0,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
CoinName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
42 Coin,42,Scrypt,True,PoW/PoS,41.99995,42
365Coin,365,X11,True,PoW/PoS,,2300000000
404Coin,404,Scrypt,True,PoW/PoS,1055185000.0,532000000
SixEleven,611,SHA-256,True,PoW,,611000
808,808,SHA-256,True,PoW/PoS,0.0,0


In [280]:
coin_supply_df.drop(columns=[
    'Unnamed: 0',
    'Algorithm',
    'IsTrading',
    'ProofType',
    'TotalCoinsMined'
], inplace=True)
coin_supply_df.head()

Unnamed: 0_level_0,TotalCoinSupply
CoinName,Unnamed: 1_level_1
42 Coin,42
365Coin,2300000000
404Coin,532000000
SixEleven,611000
808,0


TotalCoinSupply was missing from the website.

In [281]:
crypto_df = crypto_df.T
crypto_df.head()

Unnamed: 0,Id,Url,ImageUrl,ContentCreatedOn,Name,Symbol,CoinName,FullName,Description,AssetTokenStatus,...,MaxSupply,MktCapPenalty,IsUsedInDefi,IsUsedInNft,PlatformType,BuiltOn,SmartContractAddress,DecimalPoints,Difficulty,AlgorithmType
42,4321,/coins/42/overview,/media/35650717/42.jpg,1427211129,42,42,42 Coin,42 Coin (42),Everything about 42 coin is 42 - apart from th...,,...,0.0,0.0,0.0,0.0,,,,,,
300,749869,/coins/300/overview,/media/27010595/300.png,1517935016,300,300,300 token,300 token (300),300 token is an ERC20 token. This Token was cr...,,...,300.0,0.0,0.0,0.0,token,ETH,0xaec98a708810414878c3bcdf46aad31ded4a4557,18.0,,
365,33639,/coins/365/overview,/media/352070/365.png,1480032918,365,365,365Coin,365Coin (365),365Coin is a Proof of Work and Proof of Stake ...,,...,0.0,0.0,0.0,0.0,,,,,,
404,21227,/coins/404/overview,/media/35650851/404-300x300.jpg,1466100361,404,404,404Coin,404Coin (404),404 is a PoW/PoS hybrid cryptocurrency that al...,,...,0.0,0.0,0.0,0.0,,,,,,
433,926547,/coins/433/overview,/media/34836095/433.png,1541597321,433,433,433 Token,433 Token (433),433 Token is a decentralised soccer platform t...,Finished,...,,,,,,,,,,


In [282]:
crypto_df = crypto_df.set_index('CoinName')
crypto_df.head()

Unnamed: 0_level_0,Id,Url,ImageUrl,ContentCreatedOn,Name,Symbol,FullName,Description,AssetTokenStatus,Algorithm,...,MaxSupply,MktCapPenalty,IsUsedInDefi,IsUsedInNft,PlatformType,BuiltOn,SmartContractAddress,DecimalPoints,Difficulty,AlgorithmType
CoinName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
42 Coin,4321,/coins/42/overview,/media/35650717/42.jpg,1427211129,42,42,42 Coin (42),Everything about 42 coin is 42 - apart from th...,,Scrypt,...,0.0,0.0,0.0,0.0,,,,,,
300 token,749869,/coins/300/overview,/media/27010595/300.png,1517935016,300,300,300 token (300),300 token is an ERC20 token. This Token was cr...,,,...,300.0,0.0,0.0,0.0,token,ETH,0xaec98a708810414878c3bcdf46aad31ded4a4557,18.0,,
365Coin,33639,/coins/365/overview,/media/352070/365.png,1480032918,365,365,365Coin (365),365Coin is a Proof of Work and Proof of Stake ...,,X11,...,0.0,0.0,0.0,0.0,,,,,,
404Coin,21227,/coins/404/overview,/media/35650851/404-300x300.jpg,1466100361,404,404,404Coin (404),404 is a PoW/PoS hybrid cryptocurrency that al...,,Scrypt,...,0.0,0.0,0.0,0.0,,,,,,
433 Token,926547,/coins/433/overview,/media/34836095/433.png,1541597321,433,433,433 Token (433),433 Token is a decentralised soccer platform t...,Finished,,...,,,,,,,,,,


### Data Preprocessing

In [283]:
crypto_df.columns

Index(['Id', 'Url', 'ImageUrl', 'ContentCreatedOn', 'Name', 'Symbol',
       'FullName', 'Description', 'AssetTokenStatus', 'Algorithm', 'ProofType',
       'SortOrder', 'Sponsored', 'Taxonomy', 'Rating', 'IsTrading',
       'TotalCoinsMined', 'CirculatingSupply', 'BlockNumber',
       'NetHashesPerSecond', 'BlockReward', 'BlockTime', 'AssetLaunchDate',
       'AssetWhitepaperUrl', 'AssetWebsiteUrl', 'MaxSupply', 'MktCapPenalty',
       'IsUsedInDefi', 'IsUsedInNft', 'PlatformType', 'BuiltOn',
       'SmartContractAddress', 'DecimalPoints', 'Difficulty', 'AlgorithmType'],
      dtype='object')

In [284]:
# Keep only necessary columns:
# 'CoinName','Algorithm','IsTrading','ProofType','TotalCoinsMined','TotalCoinSupply'
crypto_df.drop(columns = [
    'AlgorithmType',
    'AssetLaunchDate',
    'AssetTokenStatus',
    'AssetWebsiteUrl',
    'AssetWhitepaperUrl',
    'BlockNumber',
    'BlockReward',
    'BlockTime',
    'BuiltOn',
    'CirculatingSupply',
    'ContentCreatedOn',
    'DecimalPoints',
    'Description',
    'Difficulty',
    'FullName',
    'Id',
    'ImageUrl',
    'IsUsedInDefi',
    'IsUsedInNft',
    'MaxSupply',
    'MktCapPenalty',
    'Name',
    'NetHashesPerSecond',
    'PlatformType',
    'Rating',
    'SmartContractAddress',
    'SortOrder',
    'Sponsored',
    'Taxonomy',
    'Url'
], inplace = True)


In [285]:
crypto_df.columns

Index(['Symbol', 'Algorithm', 'ProofType', 'IsTrading', 'TotalCoinsMined'], dtype='object')

Need to join TotalCoinSupply to crypto_df

In [286]:
crypto_df

Unnamed: 0_level_0,Symbol,Algorithm,ProofType,IsTrading,TotalCoinsMined
CoinName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
42 Coin,42,Scrypt,PoW/PoS,True,0
300 token,300,,,True,300
365Coin,365,X11,PoW/PoS,True,0
404Coin,404,Scrypt,PoW/PoS,True,0
433 Token,433,,,False,
...,...,...,...,...,...
Planet Finance,AQUA,,,True,97701.434187
Neetcoin,NEETCOIN,Scrypt,PoW/PoS,True,
NEET Finance,NEET,,,True,439846310077170.5
ChessCoin,CHESSCOIN,Scrypt,PoW/PoS,True,


In [287]:
coin_supply_df

Unnamed: 0_level_0,TotalCoinSupply
CoinName,Unnamed: 1_level_1
42 Coin,42
365Coin,2300000000
404Coin,532000000
SixEleven,611000
808,0
...,...
BitcoinPlus,1000000
DivotyCoin,100000000
Giotto Coin,233100000
OpenSourceCoin,21000000


In [288]:
joined_df = pd.merge(crypto_df, coin_supply_df, on='CoinName', how='outer')
joined_df

Unnamed: 0_level_0,Symbol,Algorithm,ProofType,IsTrading,TotalCoinsMined,TotalCoinSupply
CoinName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
42 Coin,42,Scrypt,PoW/PoS,True,0,42
300 token,300,,,True,300,
365Coin,365,X11,PoW/PoS,True,0,2300000000
404Coin,404,Scrypt,PoW/PoS,True,0,532000000
433 Token,433,,,False,,
...,...,...,...,...,...,...
ClubCoin,,,,,,160000000
Radium,,,,,,9000000
Creditbit,,,,,,16504333
Adamant,,,,,,200000000


In [289]:
crypto_df = joined_df.copy()
crypto_df

Unnamed: 0_level_0,Symbol,Algorithm,ProofType,IsTrading,TotalCoinsMined,TotalCoinSupply
CoinName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
42 Coin,42,Scrypt,PoW/PoS,True,0,42
300 token,300,,,True,300,
365Coin,365,X11,PoW/PoS,True,0,2300000000
404Coin,404,Scrypt,PoW/PoS,True,0,532000000
433 Token,433,,,False,,
...,...,...,...,...,...,...
ClubCoin,,,,,,160000000
Radium,,,,,,9000000
Creditbit,,,,,,16504333
Adamant,,,,,,200000000


In [290]:
# Keep only cryptocurrencies that are trading
crypto_df = crypto_df.loc[crypto_df['IsTrading'] == True]

In [291]:
# Keep only cryptocurrencies with a working algorithm
crypto_df = crypto_df.loc[crypto_df['Algorithm'] != 'N/A']

In [292]:
# Remove the "IsTrading" column
crypto_df.drop(columns = ['IsTrading'], inplace = True)

In [293]:
# Remove rows with at least 1 null value
crypto_df.isnull().sum()

Symbol                0
Algorithm             0
ProofType             0
TotalCoinsMined    1209
TotalCoinSupply     536
dtype: int64

In [294]:
crypto_df = crypto_df.dropna()
crypto_df.isnull().sum()

Symbol             0
Algorithm          0
ProofType          0
TotalCoinsMined    0
TotalCoinSupply    0
dtype: int64

In [295]:
# Remove rows with cryptocurrencies having no coins mined
crypto_df = crypto_df.loc[crypto_df['TotalCoinsMined'] != 0]

In [296]:
# Drop rows where there are 'N/A' text values
crypto_df = crypto_df[crypto_df!='N/A']

In [297]:
crypto_df.reset_index(inplace=True)
crypto_df

Unnamed: 0,CoinName,Symbol,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,Triangles Coin,TRI,X13,PoW/PoS,190094.828277,120000
1,CometCoin,CMTC,Scrypt,PoW,872830,2000000
2,OpenChat,CHAT,Scrypt,PoW/PoS,1000000000,1000000000
3,Quantum Resistant Ledger,QRL,RandomX,PoW,75324107.95989,105000000
4,Pura,PURA,X11,PoW,188358976.839698,350000000
...,...,...,...,...,...,...
77,MonaCoin,MONA,Scrypt,PoW,83808137.471579,105120000
78,MegaCoin,MEC,Scrypt,PoW,39739204.9931,42000000
79,ZCash,ZEC,Equihash,PoW,11805140.9508,21000000
80,Oxycoin,OXYC,DPoS,DPoS,1122382283.37,0


In [298]:
# Store the 'CoinName'column in its own DataFrame prior to dropping it from crypto_df
coinname_df = pd.DataFrame(
    data = crypto_df, columns = ['CoinName']
)
coinname_df.head()

Unnamed: 0,CoinName
0,Triangles Coin
1,CometCoin
2,OpenChat
3,Quantum Resistant Ledger
4,Pura


In [299]:
# Drop the 'CoinName' column since it's not going to be used on the clustering algorithm
#crypto_df.reset_index(inplace=True)
crypto_df = crypto_df.drop(columns = 'CoinName')

In [300]:
crypto_df.sample(30)

Unnamed: 0,Symbol,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
15,FLO,Scrypt,PoW,139519886.24104,160000000
23,SERO,Ethash,ProgPoW/PoS,334031560.0,1000000000
68,KMD,Equihash,dPoW/PoW,130807383.253723,200000000
77,MONA,Scrypt,PoW,83808137.471579,105120000
35,NMC,SHA-256,PoW,17826375.0,21000000
8,TPAY,POS 3.0,PoS,21880393.0,25000000
25,POT,Scrypt,PoW/PoS,227022448.203136,420000000
36,XMR,RandomX,PoW,18031268.522733,0
3,QRL,RandomX,PoW,75324107.95989,105000000
80,OXYC,DPoS,DPoS,1122382283.37,0


In [301]:
crypto_df = crypto_df.set_index('Symbol')
crypto_df

Unnamed: 0_level_0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
TRI,X13,PoW/PoS,190094.828277,120000
CMTC,Scrypt,PoW,872830,2000000
CHAT,Scrypt,PoW/PoS,1000000000,1000000000
QRL,RandomX,PoW,75324107.95989,105000000
PURA,X11,PoW,188358976.839698,350000000
...,...,...,...,...
MONA,Scrypt,PoW,83808137.471579,105120000
MEC,Scrypt,PoW,39739204.9931,42000000
ZEC,Equihash,PoW,11805140.9508,21000000
OXYC,DPoS,DPoS,1122382283.37,0


In [314]:
# Create dummy variables for text features
crypto_dummies= pd.get_dummies(crypto_df, columns=['Algorithm', 'ProofType'])
crypto_dummies.head()

Unnamed: 0_level_0,TotalCoinSupply,TotalCoinsMined,Algorithm_BLAKE256,Algorithm_Blake2b,Algorithm_CryptoNight,Algorithm_CryptoNight-Lite,Algorithm_DPoS,Algorithm_ECC 256K1,Algorithm_Equihash,Algorithm_EtcHash,...,ProofType_PoS/LPoS,ProofType_PoST,ProofType_PoW,ProofType_PoW/PoS,ProofType_PoW/PoSe,ProofType_PoW/nPoS,ProofType_ProgPoW/PoS,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
TRI,120000,190094.828277,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
CMTC,2000000,872830.0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
CHAT,1000000000,1000000000.0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
QRL,105000000,75324107.95989,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
PURA,350000000,188358976.839698,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0


In [315]:
# Standardize data
crypto_scaled = StandardScaler().fit_transform(crypto_dummies)
print(crypto_scaled[0:1])

[[-0.14541922 -0.15027555 -0.11111111 -0.15811388 -0.19487094 -0.11111111
  -0.2548236  -0.11111111 -0.35112344 -0.11111111 -0.22645541 -0.11111111
  -0.11111111 -0.11111111 -0.11111111 -0.11111111 -0.11111111 -0.19487094
  -0.11111111 -0.11111111 -0.15811388 -0.11111111 -0.11111111 -0.11111111
  -0.15811388 -0.32879797 -0.15811388 -0.11111111 -0.49236596 -0.11111111
  -0.11111111 -0.22645541 -0.11111111  5.13160144 -0.15811388 -0.11111111
  -0.30550505 -0.11111111 -0.11111111 -0.11111111 -0.22645541 -0.11111111
  -0.11111111 -1.07605517  2.30384294 -0.11111111 -0.11111111 -0.11111111
  -0.11111111 -0.11111111 -0.11111111]]


In [312]:
crypto_df = crypto_df[['Algorithm', 'ProofType', 'TotalCoinSupply', 'TotalCoinsMined']]
crypto_df

Unnamed: 0_level_0,Algorithm,ProofType,TotalCoinSupply,TotalCoinsMined
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
TRI,X13,PoW/PoS,120000,190094.828277
CMTC,Scrypt,PoW,2000000,872830
CHAT,Scrypt,PoW/PoS,1000000000,1000000000
QRL,RandomX,PoW,105000000,75324107.95989
PURA,X11,PoW,350000000,188358976.839698
...,...,...,...,...
MONA,Scrypt,PoW,105120000,83808137.471579
MEC,Scrypt,PoW,42000000,39739204.9931
ZEC,Equihash,PoW,21000000,11805140.9508
OXYC,DPoS,DPoS,0,1122382283.37


### Reducing Dimensions Using PCA

In [316]:
# Use PCA to reduce dimensions to 3 principal components
pca = PCA(n_components=3)

crypto_pca = pca.fit_transform(crypto_scaled)

In [317]:
# Create a DataFrame with the principal components data
pca_df = pd.DataFrame(
    data=crypto_pca, columns=["PC 1", "PC 2", "PC 3"]
)
pca_df.head()

Unnamed: 0,PC 1,PC 2,PC 3
0,-0.25159,1.879545,-1.012576
1,-0.513886,-0.806709,-0.090417
2,-0.317759,1.055483,-0.707034
3,-0.52171,-1.447065,0.228893
4,-0.453517,-0.388506,-0.160925


### Clustering Crytocurrencies Using K-Means

#### Find the Best Value for `k` Using the Elbow Curve

In [318]:
inertia = []
k = list(range(1, 11))

# Calculate the inertia for the range of k values
for i in k:
    km = KMeans(n_clusters=i, random_state=0)
    km.fit(crypto_pca)
    inertia.append(km.inertia_)

# Create the Elbow Curve using hvPlot
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.hvplot.line(x="k", y="inertia", xticks=k, title="Elbow Curve")


KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.



Running K-Means with `k=<your best value for k here>`

In [319]:
# Initialize the K-Means model
model = KMeans(n_clusters=5, random_state=0)
# Fit the model
model.fit(pca_df)
# Predict clusters
predictions = model.predict(pca_df)
# Create a new DataFrame including predicted clusters and cryptocurrencies features
pca_df['class'] = model.labels_

df_merged = crypto_df.join(coinname_df, how='inner')
df_merged = df_merged.append(pca_df)

df_merged#May need to reorder columns

Unnamed: 0,Algorithm,ProofType,TotalCoinSupply,TotalCoinsMined,CoinName,PC 1,PC 2,PC 3,class
0,,,,,,-0.251590,1.879545,-1.012576,2.0
1,,,,,,-0.513886,-0.806709,-0.090417,0.0
2,,,,,,-0.317759,1.055483,-0.707034,2.0
3,,,,,,-0.521710,-1.447065,0.228893,0.0
4,,,,,,-0.453517,-0.388506,-0.160925,0.0
...,...,...,...,...,...,...,...,...,...
77,,,,,,-0.512999,-0.806893,-0.090373,0.0
78,,,,,,-0.513510,-0.806787,-0.090398,0.0
79,,,,,,-0.515794,-1.265437,0.151927,0.0
80,,,,,,1.706882,1.506331,-0.759176,4.0


### Visualizing Results

#### 3D-Scatter with Clusters

In [320]:
# Create a 3D-Scatter with the PCA data and the clusters
fig = px.scatter_3d(
    df_merged,
    x="PC 1",
    y="PC 2",
    z="PC 3",
    hover_name='CoinName',
    hover_data= ['Algorithm'],
    color="class",
    symbol="class",
)
fig.update_layout(legend=dict(x=0, y=1))
fig.show()

#### Table of Tradable Cryptocurrencies

In [321]:
# Table with tradable cryptos
columns = ['CoinName', 'Algorithm', 'ProofType', 'TotalCoinSupply', 'TotalCoinsMined', 'class']

In [322]:
# Print the total number of tradable cryptocurrencies
df_merged.hvplot.table(columns)

#### Scatter Plot with Tradable Cryptocurrencies

In [323]:
# Scale data to create the scatter plot
df_merged['TotalCoinsMined'] = df_merged['TotalCoinsMined'].astype(float) / 1000000
df_merged['TotalCoinSupply'] = df_merged['TotalCoinSupply'].astype(float) / 1000000

In [324]:
# Plot the scatter with x="TotalCoinsMined" and y="TotalCoinSupply"
df_merged.hvplot(
    kind="scatter", 
    x="TotalCoinsMined", 
    y="TotalCoinSupply", 
    c='class', 
    colormap="viridis", 
    hover_cols=['CoinName']
)