# Clustering Crypto

In [18]:
# Initial imports
import requests
import pandas as pd
import matplotlib.pyplot as plt
import hvplot.pandas
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

### Fetching Cryptocurrency Data

In [19]:
# Use the following endpoint to fetch json data
url = "https://min-api.cryptocompare.com/data/all/coinlist"

In [20]:
# Create a DataFrame 
# HINT: You will need to use the 'Data' key from the json response, then transpose the DataFrame.
r = requests.get(url)
data = r.json()
data = data['Data']
crypto_df = pd.DataFrame.from_dict(data)
crypto_df = crypto_df.T
crypto_df.head()

Unnamed: 0,Id,Url,ImageUrl,ContentCreatedOn,Name,Symbol,CoinName,FullName,Description,AssetTokenStatus,...,MaxSupply,MktCapPenalty,IsUsedInDefi,IsUsedInNft,PlatformType,AlgorithmType,Difficulty,BuiltOn,SmartContractAddress,DecimalPoints
42,4321,/coins/42/overview,/media/35650717/42.jpg,1427211129,42,42,42 Coin,42 Coin (42),Everything about 42 coin is 42 - apart from th...,,...,42.0,0.0,0.0,0.0,blockchain,scrypt,0.000244,,,
300,749869,/coins/300/overview,/media/27010595/300.png,1517935016,300,300,300 token,300 token (300),300 token is an ERC20 token. This Token was cr...,,...,300.0,0.0,0.0,0.0,token,,,ETH,0xaec98a708810414878c3bcdf46aad31ded4a4557,18.0
365,33639,/coins/365/overview,/media/352070/365.png,1480032918,365,365,365Coin,365Coin (365),365Coin is a Proof of Work and Proof of Stake ...,,...,-1.0,0.0,0.0,0.0,blockchain,,,,,
404,21227,/coins/404/overview,/media/35650851/404-300x300.jpg,1466100361,404,404,404Coin,404Coin (404),404 is a PoW/PoS hybrid cryptocurrency that al...,,...,-1.0,0.0,0.0,0.0,blockchain,,,,,
433,926547,/coins/433/overview,/media/34836095/433.png,1541597321,433,433,433 Token,433 Token (433),433 Token is a decentralised soccer platform t...,Finished,...,,,,,,,,,,


In [21]:
# Alternatively, use the provided csv file:
# file_path = Path("Resources/crypto_data.csv")

# Create a DataFramefrom pathlib import Path
file_path = Path("Resources/crypto_data.csv")
crypto_df = pd.read_csv(file_path, index_col=0)
crypto_df

NameError: name 'Path' is not defined

### Data Preprocessing

In [22]:
# Keep only necessary columns:
# 'CoinName','Algorithm','IsTrading','ProofType','TotalCoinsMined','TotalCoinSupply'
# Keep only cryptocurrencies that are trading
crypto_df = crypto_df[crypto_df['IsTrading'] == True]

In [23]:
crypto_df

Unnamed: 0,Id,Url,ImageUrl,ContentCreatedOn,Name,Symbol,CoinName,FullName,Description,AssetTokenStatus,...,MaxSupply,MktCapPenalty,IsUsedInDefi,IsUsedInNft,PlatformType,AlgorithmType,Difficulty,BuiltOn,SmartContractAddress,DecimalPoints
42,4321,/coins/42/overview,/media/35650717/42.jpg,1427211129,42,42,42 Coin,42 Coin (42),Everything about 42 coin is 42 - apart from th...,,...,42,0,0,0,blockchain,scrypt,0.000244,,,
300,749869,/coins/300/overview,/media/27010595/300.png,1517935016,300,300,300 token,300 token (300),300 token is an ERC20 token. This Token was cr...,,...,300,0,0,0,token,,,ETH,0xaec98a708810414878c3bcdf46aad31ded4a4557,18
365,33639,/coins/365/overview,/media/352070/365.png,1480032918,365,365,365Coin,365Coin (365),365Coin is a Proof of Work and Proof of Stake ...,,...,-1,0,0,0,blockchain,,,,,
404,21227,/coins/404/overview,/media/35650851/404-300x300.jpg,1466100361,404,404,404Coin,404Coin (404),404 is a PoW/PoS hybrid cryptocurrency that al...,,...,-1,0,0,0,blockchain,,,,,
611,20909,/coins/611/overview,/media/35650940/611-sixeleven.png,1465914773,611,611,SixEleven,SixEleven (611),"611 is a Namecoin based cryptocurrency, and it...",,...,0,0,0,0,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
MULTI,947685,/coins/multi/overview,/media/39838579/multi.png,1649172848,MULTI,MULTI,Multichain,Multichain (MULTI),Multichain is a Router for web3. It is an infr...,,...,-1,0,0,0,token,,,"ETH,BNB,MATIC,AVAX,FTM",0x65Ef703f5594D2573eb71Aaf55BC0CB548492df4,18
ANY,936227,/coins/any/overview,/media/37305349/any.png,1598442863,ANY,ANY,Anyswap,Anyswap (ANY),Anyswap is a fully decentralized cross-chain s...,,...,100000000,99.99,1,0,token,,,,,
BANANA,943188,/coins/banana/overview,/media/38553358/banana.png,1629990923,BANANA,BANANA,ApeSwap,ApeSwap (BANANA),ApeSwap is a Decentralized Autonomous Organiza...,,...,-1,0,0,0,token,,,BNB,0x603c7f932ED1fc6575303D8Fb018fDCBb0f39a95,18
GAMEFI,947696,/coins/gamefi/overview,/media/39838585/gamefi.png,1649239487,GAMEFI,GAMEFI,GameFi Token,GameFi Token (GAMEFI),GFI is the driving force of GameFi Protocol ec...,,...,-1,0,0,0,token,,,BNB,0xDD6C6C114Db071EFE0BAB6051268227CE64C3fFe,18


In [24]:
# Keep only cryptocurrencies with a working algorithm
crypto_df = crypto_df[crypto_df["Algorithm"] != "N/A"]

In [25]:
crypto_df

Unnamed: 0,Id,Url,ImageUrl,ContentCreatedOn,Name,Symbol,CoinName,FullName,Description,AssetTokenStatus,...,MaxSupply,MktCapPenalty,IsUsedInDefi,IsUsedInNft,PlatformType,AlgorithmType,Difficulty,BuiltOn,SmartContractAddress,DecimalPoints
42,4321,/coins/42/overview,/media/35650717/42.jpg,1427211129,42,42,42 Coin,42 Coin (42),Everything about 42 coin is 42 - apart from th...,,...,42,0,0,0,blockchain,scrypt,0.000244,,,
365,33639,/coins/365/overview,/media/352070/365.png,1480032918,365,365,365Coin,365Coin (365),365Coin is a Proof of Work and Proof of Stake ...,,...,-1,0,0,0,blockchain,,,,,
404,21227,/coins/404/overview,/media/35650851/404-300x300.jpg,1466100361,404,404,404Coin,404Coin (404),404 is a PoW/PoS hybrid cryptocurrency that al...,,...,-1,0,0,0,blockchain,,,,,
611,20909,/coins/611/overview,/media/35650940/611-sixeleven.png,1465914773,611,611,SixEleven,SixEleven (611),"611 is a Namecoin based cryptocurrency, and it...",,...,0,0,0,0,,,,,,
808,28223,/coins/808/overview,/media/351513/808.png,1473980395,808,808,808,808 (808),808 is a coin develop for the music community ...,,...,0,0,0,0,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CRAFTCOIN,29711,/coins/craftcoin/overview,/media/351681/craft.png,1475818685,CRAFTCOIN,CRAFTCOIN,Craftcoin,Craftcoin (CRAFTCOIN),CraftCoin is a cryptocurrency based on Litecoi...,,...,,,,,,,,,,
DESTINY,22330,/coins/destiny/overview,/media/351087/des.png,1467197507,DESTINY,DESTINY,Destiny,Destiny (DESTINY),Destiny is a Scrypt based cryptocurrency that ...,,...,,,,,,,,,,
GLOWSHA,5397,/coins/glowsha/overview,/media/20314/gsxjpeg.png,1430211025,GLOWSHA,GLOWSHA,GlowShares,GlowShares (GLOWSHA),"Anonymous, decentralized voting on the blockch...",,...,,,,,,,,,,
GHOSTCOIN,5336,/coins/ghostcoin/overview,/media/19721/ghc.png,1430210187,GHOSTCOIN,GHOSTCOIN,GhostCoin,GhostCoin (GHOSTCOIN),GhostCoin is a Scrypt Altcoin based on both Pr...,,...,,,,,,,,,,


In [26]:
# Remove the "IsTrading" column
crypto_df = crypto_df.drop(columns='IsTrading')

In [27]:
crypto_df

Unnamed: 0,Id,Url,ImageUrl,ContentCreatedOn,Name,Symbol,CoinName,FullName,Description,AssetTokenStatus,...,MaxSupply,MktCapPenalty,IsUsedInDefi,IsUsedInNft,PlatformType,AlgorithmType,Difficulty,BuiltOn,SmartContractAddress,DecimalPoints
42,4321,/coins/42/overview,/media/35650717/42.jpg,1427211129,42,42,42 Coin,42 Coin (42),Everything about 42 coin is 42 - apart from th...,,...,42,0,0,0,blockchain,scrypt,0.000244,,,
365,33639,/coins/365/overview,/media/352070/365.png,1480032918,365,365,365Coin,365Coin (365),365Coin is a Proof of Work and Proof of Stake ...,,...,-1,0,0,0,blockchain,,,,,
404,21227,/coins/404/overview,/media/35650851/404-300x300.jpg,1466100361,404,404,404Coin,404Coin (404),404 is a PoW/PoS hybrid cryptocurrency that al...,,...,-1,0,0,0,blockchain,,,,,
611,20909,/coins/611/overview,/media/35650940/611-sixeleven.png,1465914773,611,611,SixEleven,SixEleven (611),"611 is a Namecoin based cryptocurrency, and it...",,...,0,0,0,0,,,,,,
808,28223,/coins/808/overview,/media/351513/808.png,1473980395,808,808,808,808 (808),808 is a coin develop for the music community ...,,...,0,0,0,0,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CRAFTCOIN,29711,/coins/craftcoin/overview,/media/351681/craft.png,1475818685,CRAFTCOIN,CRAFTCOIN,Craftcoin,Craftcoin (CRAFTCOIN),CraftCoin is a cryptocurrency based on Litecoi...,,...,,,,,,,,,,
DESTINY,22330,/coins/destiny/overview,/media/351087/des.png,1467197507,DESTINY,DESTINY,Destiny,Destiny (DESTINY),Destiny is a Scrypt based cryptocurrency that ...,,...,,,,,,,,,,
GLOWSHA,5397,/coins/glowsha/overview,/media/20314/gsxjpeg.png,1430211025,GLOWSHA,GLOWSHA,GlowShares,GlowShares (GLOWSHA),"Anonymous, decentralized voting on the blockch...",,...,,,,,,,,,,
GHOSTCOIN,5336,/coins/ghostcoin/overview,/media/19721/ghc.png,1430210187,GHOSTCOIN,GHOSTCOIN,GhostCoin,GhostCoin (GHOSTCOIN),GhostCoin is a Scrypt Altcoin based on both Pr...,,...,,,,,,,,,,


In [46]:
# Remove rows with cryptocurrencies having no coins mined
crypto_df = crypto_df[crypto_df["TotalCoinsMined"]> 0]

In [42]:
# Drop rows where there are 'N/A' text values
crypto_df = crypto_df[crypto_df!='N/A']

In [43]:
# Store the 'CoinName'column in its own DataFrame prior to dropping it from crypto_df
coinname_df = pd.DataFrame(data = crypto_df, columns = ['CoinName'])

In [44]:
# Drop the 'CoinName' column since it's not going to be used on the clustering algorithm
crypto_df = crypto_df.drop(columns = 'CoinName')

KeyError: "['CoinName'] not found in axis"

In [39]:
# Create dummy variables for text features
crypto_dummies= pd.get_dummies(crypto_df, columns=['Algorithm', 'ProofType'])

In [40]:
# Standardize data
crypto_scaled = StandardScaler().fit_transform(crypto_dummies)

ValueError: Found array with 0 sample(s) (shape=(0, 32)) while a minimum of 1 is required by StandardScaler.

### Reducing Dimensions Using PCA

In [48]:
# Use PCA to reduce dimensions to 3 principal components
pca_df = pd.DataFrame(data=crypto_pca, columns=["PC 1", "PC 2", "PC 3"])

NameError: name 'crypto_pca' is not defined

In [49]:
# Create a DataFrame with the principal components data
pca_df = pd.DataFrame(data=crypto_pca, columns=["PC 1", "PC 2", "PC 3"], index=crypto_df.index)

NameError: name 'crypto_pca' is not defined

In [50]:
pca_df

NameError: name 'pca_df' is not defined

### Clustering Crytocurrencies Using K-Means

#### Find the Best Value for `k` Using the Elbow Curve

In [None]:
inertia = []
k = list(range(1, 11))

# Calculate the inertia for the range of k values
for i in k:
    km = KMeans(n_clusters=i, random_state=0)
    km.fit(crypto_pca)
    inertia.append(km.inertia_)
# Create the Elbow Curve using hvPlot
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.hvplot.line(x="k", y="inertia", xticks=k, title="Elbow Curve")

Running K-Means with `k=<your best value for k here>`

In [None]:
# Initialize the K-Means model
km = KMeans(n_clusters=4, random_state=0)
# Fit the model
km.fit(pca_df)
# Predict clusters
predictions = km.predict(pca_df)
# Creating a new DataFrame including predicted clusters and cryptocurrencies features
pca_df['class'] = km.labels_
pca_df.index = crypto_df.index

In [None]:
clustered_df = pd.concat([crypto_df, pca_df,coins_name], axis=1,)

In [None]:
clustered_df

### Visualizing Results

#### Scatter Plot with Tradable Cryptocurrencies

In [None]:
# Scale data to create the scatter plot
MM_SCALER = MinMaxScaler()
plot_data =MM_SCALER.fit_transform(clustered_df[["MaxSupply", "TotalCoinsMined"]])
plot_df = pd.DataFrame(plot_data, columns=["MaxSupply", "TotalCoinsMined"], index=clustered_df.index)
plot_df["CoinName"] = clustered_df["CoinName"]
plot_df["Class"] = clustered_df["Class"]

In [None]:
# Plot the scatter with x="TotalCoinsMined" and y="TotalCoinSupply"
plot_df

#### Table of Tradable Cryptocurrencies

In [None]:
# Table with tradable cryptos
clustered_df[["CoinName","Algorithm","ProofType","MaxSupply","TotalCoinsMined","Class",]].hvplot.table()

In [None]:
# Print the total number of tradable cryptocurrencies
print(f"There are {clustered_df.shape()} tradable cryptocurrencies."