# Clustering Crypto

In [289]:
# Initial imports
import requests
import pandas as pd
import matplotlib.pyplot as plt
import hvplot.pandas
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

### Fetching Cryptocurrency Data

In [290]:
# Use the following endpoint to fetch json data
url = "https://min-api.cryptocompare.com/data/all/coinlist"

In [291]:
# Create a DataFrame 
# HINT: You will need to use the 'Data' key from the json response, then transpose the DataFrame.
# CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
crypto = requests.get(url).json()
# def crypto_clean(crypto):
#     for coin in crypto['Data']:
#         name = coin['CoinName']
#         algorithm = coin['Algorithm']
#         trading = coin['IsTrading']
#         proof = coin['ProofType']
#         total_minted = coin['TotalCoinsMined']
#         total_supply = coin['TotalCoinSupply']

#     return pd.DataFrame({
#         "Name":name,
#         'Algorithm': algorithm,
#         'Currently trading': trading,
#         'ProofType': proof,
#          'Total Minted': total_minted,
#          'Supply': total_supply
#     })

# crypto_df = crypto_clean(crypto)


In [292]:
# Alternatively, use the provided csv file:
# file_path = Path("Resources/crypto_data.csv")

crypto_df = pd.read_csv('crypto_data.csv', index_col="CoinName")
crypto_df = crypto_df.drop(columns=['Unnamed: 0'], axis=1)

crypto_df

Unnamed: 0_level_0,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
CoinName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
42 Coin,Scrypt,True,PoW/PoS,4.199995e+01,42
365Coin,X11,True,PoW/PoS,,2300000000
404Coin,Scrypt,True,PoW/PoS,1.055185e+09,532000000
SixEleven,SHA-256,True,PoW,,611000
808,SHA-256,True,PoW/PoS,0.000000e+00,0
...,...,...,...,...,...
BitcoinPlus,Scrypt,True,PoS,1.283270e+05,1000000
DivotyCoin,Scrypt,False,PoW/PoS,2.149121e+07,100000000
Giotto Coin,Scrypt,False,PoW/PoS,,233100000
OpenSourceCoin,SHA-256,False,PoW/PoS,,21000000


### Data Preprocessing

In [293]:
# Keep only necessary columns:
# 'CoinName','Algorithm','IsTrading','ProofType','TotalCoinsMined','TotalCoinSupply'


In [294]:
# Keep only cryptocurrencies that are trading
crypto_df = crypto_df[crypto_df['IsTrading'] == True]

In [295]:
# Keep only cryptocurrencies with a working algorithm


In [296]:
# Remove the "IsTrading" column
crypto_df = crypto_df.drop(columns="IsTrading")

In [297]:
# Remove rows with at least 1 null value
crypto_df = crypto_df.dropna(axis=0, how='any')
crypto_df

Unnamed: 0_level_0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
CoinName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
42 Coin,Scrypt,PoW/PoS,4.199995e+01,42
404Coin,Scrypt,PoW/PoS,1.055185e+09,532000000
808,SHA-256,PoW/PoS,0.000000e+00,0
EliteCoin,X13,PoW/PoS,2.927942e+10,314159265359
Bitcoin,SHA-256,PoW,1.792718e+07,21000000
...,...,...,...,...
ZEPHYR,SHA-256,DPoS,2.000000e+09,2000000000
Gapcoin,Scrypt,PoW/PoS,1.493105e+07,250000000
Beldex,CryptoNight,PoW,9.802226e+08,1400222610
Horizen,Equihash,PoW,7.296538e+06,21000000


In [298]:
# Remove rows with cryptocurrencies having no coins mined
crypto_df = crypto_df[crypto_df['TotalCoinsMined'] > 1]
crypto_df.head(20)

Unnamed: 0_level_0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
CoinName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
42 Coin,Scrypt,PoW/PoS,41.99995,42
404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
EliteCoin,X13,PoW/PoS,29279420000.0,314159265359
Bitcoin,SHA-256,PoW,17927180.0,21000000
Ethereum,Ethash,PoW,107684200.0,0
Litecoin,Scrypt,PoW,63039240.0,84000000
Dash,X11,PoW/PoS,9031294.0,22000000
Monero,CryptoNight-V7,PoW,17201140.0,0
Ethereum Classic,Ethash,PoW,113359700.0,210000000
ZCash,Equihash,PoW,7383056.0,21000000


In [299]:
# Drop rows where there are 'N/A' text values
crypto_df.replace("N/A", 1.2476)
crypto_df.drop

<bound method DataFrame.drop of                Algorithm ProofType  TotalCoinsMined TotalCoinSupply
CoinName                                                           
42 Coin           Scrypt   PoW/PoS     4.199995e+01              42
404Coin           Scrypt   PoW/PoS     1.055185e+09       532000000
EliteCoin            X13   PoW/PoS     2.927942e+10    314159265359
Bitcoin          SHA-256       PoW     1.792718e+07        21000000
Ethereum          Ethash       PoW     1.076842e+08               0
...                  ...       ...              ...             ...
ZEPHYR           SHA-256      DPoS     2.000000e+09      2000000000
Gapcoin           Scrypt   PoW/PoS     1.493105e+07       250000000
Beldex       CryptoNight       PoW     9.802226e+08      1400222610
Horizen         Equihash       PoW     7.296538e+06        21000000
BitcoinPlus       Scrypt       PoS     1.283270e+05         1000000

[532 rows x 4 columns]>

In [300]:
# Store the 'CoinName'column in its own DataFrame prior to dropping it from crypto_df
crypto_df = crypto_df.reset_index()
Coin_name = crypto_df[['CoinName']]
Coin_name


Unnamed: 0,CoinName
0,42 Coin
1,404Coin
2,EliteCoin
3,Bitcoin
4,Ethereum
...,...
527,ZEPHYR
528,Gapcoin
529,Beldex
530,Horizen


In [301]:
# Drop the 'CoinName' column since it's not going to be used on the clustering algorithm
crypto_df = crypto_df.drop(columns=['CoinName'])
crypto_df


Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,Scrypt,PoW/PoS,4.199995e+01,42
1,Scrypt,PoW/PoS,1.055185e+09,532000000
2,X13,PoW/PoS,2.927942e+10,314159265359
3,SHA-256,PoW,1.792718e+07,21000000
4,Ethash,PoW,1.076842e+08,0
...,...,...,...,...
527,SHA-256,DPoS,2.000000e+09,2000000000
528,Scrypt,PoW/PoS,1.493105e+07,250000000
529,CryptoNight,PoW,9.802226e+08,1400222610
530,Equihash,PoW,7.296538e+06,21000000


In [302]:
# Create dummy variables for text features
dummy = pd.get_dummies(crypto_df[['Algorithm','ProofType']])


In [303]:
# Standardize data
crypto_df = pd.concat([crypto_df, dummy], join='outer',axis=1)
scaler = StandardScaler()
crypto_df = crypto_df.drop(columns=['Algorithm','ProofType'])
df_scaled = scaler.fit_transform(crypto_df)

### Reducing Dimensions Using PCA

In [None]:
# Use PCA to reduce dimensions to 3 principal components
pca = PCA(n_components=3)
pcs_transform = pca.fit_transform(df_scaled)


In [None]:
# Create a DataFrame with the principal components data
pca_df = pd.DataFrame(pcs_transform, columns=['Feature_1','Feature_2',"Feature_3"])
pca_df

### Clustering Crytocurrencies Using K-Means

#### Find the Best Value for `k` Using the Elbow Curve

In [320]:
inertia = []
k = list(range(1, 11))

# Calculate the inertia for the range of k values
for i in k:
    mean_model = KMeans(n_clusters=i, random_state=32)
    mean_model.fit(pca_df)
    inertia.append(mean_model.inertia_)


# Create the Elbow Curve using hvPlot
elbow = {"k": k, "inertia": inertia}
elbow_df = pd.DataFrame(elbow)
elbow_df.hvplot()
# elbow_df[elbow_df['k'] == elbow_df['k'].max()]

  "KMeans is known to have a memory leak on Windows "


Unnamed: 0,k,inertia
9,10,103.115209


Running K-Means with `k=<your best value for k here>`

In [322]:
# Initialize the K-Means model
K_mugga = KMeans(n_clusters=10, random_state=22)
# Fit the model
K_mugga.fit(elbow_df)
# Predict clusters
predictions = K_mugga.predict(elbow_df)
# Create a new DataFrame including predicted clusters and cryptocurrencies features
clustered_df = pd.concat([crypto_df,pca_df, Coin_name], join='outer')
clustered_df

Unnamed: 0,TotalCoinsMined,TotalCoinSupply,Algorithm_1GB AES Pattern Search,Algorithm_536,Algorithm_Argon2d,Algorithm_BLAKE256,Algorithm_Blake,Algorithm_Blake2S,Algorithm_Blake2b,Algorithm_C11,...,ProofType_Pos,ProofType_Proof of Authority,ProofType_Proof of Trust,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW,Feature_1,Feature_2,Feature_3,CoinName
0,4.199995e+01,42,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,,,,
1,1.055185e+09,532000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,,,,
2,2.927942e+10,314159265359,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,,,,
3,1.792718e+07,21000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,,,,
4,1.076842e+08,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
527,,,,,,,,,,,...,,,,,,,,,,ZEPHYR
528,,,,,,,,,,,...,,,,,,,,,,Gapcoin
529,,,,,,,,,,,...,,,,,,,,,,Beldex
530,,,,,,,,,,,...,,,,,,,,,,Horizen


### Visualizing Results

#### Scatter Plot with Tradable Cryptocurrencies

In [308]:
# Scale data to create the scatter plot


In [309]:
# Plot the scatter with x="TotalCoinsMined" and y="TotalCoinSupply"


#### Table of Tradable Cryptocurrencies

In [310]:
# Table with tradable cryptos


In [311]:
# Print the total number of tradable cryptocurrencies
