# Crypto Clustering

In [10]:
# Import statements

import pandas as pd
import hvplot.pandas
from path import Path
import plotly.express as px
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('crypto_data.csv',index_col=[0])
df.head(5)

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
365,365Coin,X11,True,PoW/PoS,,2300000000
404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
611,SixEleven,SHA-256,True,PoW,,611000
808,808,SHA-256,True,PoW/PoS,0.0,0


In [3]:
# To filter for currencies that are currently being traded

df = df[df.IsTrading != False]
print(df.shape)
df.head()

(1144, 6)


Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
365,365Coin,X11,True,PoW/PoS,,2300000000
404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
611,SixEleven,SHA-256,True,PoW,,611000
808,808,SHA-256,True,PoW/PoS,0.0,0


In [4]:
# To drop IsTrading column from the dataframe

df = df.drop('IsTrading', axis = 1)
df.head()

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,PoW/PoS,41.99995,42
365,365Coin,X11,PoW/PoS,,2300000000
404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
611,SixEleven,SHA-256,PoW,,611000
808,808,SHA-256,PoW/PoS,0.0,0


In [5]:
# To remove all rows that have at least one null value

df = df.dropna()
print(df.shape)
df.head()

(685, 5)


Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,PoW/PoS,41.99995,42
404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
808,808,SHA-256,PoW/PoS,0.0,0
1337,EliteCoin,X13,PoW/PoS,29279420000.0,314159265359
BTC,Bitcoin,SHA-256,PoW,17927180.0,21000000


In [6]:
# To filter  cryptocurrencies that have been mined. That is, the total coins mined should be greater than zero.

df = df[df.TotalCoinsMined >0]
print(df.shape)
df.head()


(532, 5)


Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,PoW/PoS,41.99995,42
404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
1337,EliteCoin,X13,PoW/PoS,29279420000.0,314159265359
BTC,Bitcoin,SHA-256,PoW,17927180.0,21000000
ETH,Ethereum,Ethash,PoW,107684200.0,0


In [7]:
# Drop the 'CoinName' column since it's not going to be used on the clustering algorithm.
df = df.drop(columns="CoinName",axis = 1)
print(df.shape)
df.head()

(532, 4)


Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
42,Scrypt,PoW/PoS,41.99995,42
404,Scrypt,PoW/PoS,1055185000.0,532000000
1337,X13,PoW/PoS,29279420000.0,314159265359
BTC,SHA-256,PoW,17927180.0,21000000
ETH,Ethash,PoW,107684200.0,0


In [8]:
# Use get_dummies() to create variables for text features.
X = pd.get_dummies(df, columns=["Algorithm", "ProofType"], drop_first=True)
print(X.shape)
X.head()

(532, 96)


Unnamed: 0,TotalCoinsMined,TotalCoinSupply,Algorithm_536,Algorithm_Argon2d,Algorithm_BLAKE256,Algorithm_Blake,Algorithm_Blake2S,Algorithm_Blake2b,Algorithm_C11,Algorithm_Cloverhash,...,ProofType_PoW/PoS,ProofType_PoW/PoS.1,ProofType_PoW/PoW,ProofType_PoW/nPoS,ProofType_Pos,ProofType_Proof of Authority,ProofType_Proof of Trust,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW
42,41.99995,42,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
404,1055185000.0,532000000,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1337,29279420000.0,314159265359,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
BTC,17927180.0,21000000,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ETH,107684200.0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [12]:
scalar = StandardScaler()
cryto_scale = scalar.fit_transform(X)
print(cryto_scale[0:3])

[[-0.11710817 -0.1528703  -0.0433963  -0.0433963  -0.06142951 -0.07530656
  -0.0433963  -0.06142951 -0.06142951 -0.0433963  -0.0433963  -0.19245009
  -0.06142951 -0.09740465 -0.0433963  -0.11547005 -0.07530656 -0.0433963
  -0.0433963  -0.15191091 -0.0433963  -0.13118084 -0.0433963  -0.0433963
  -0.08703883 -0.0433963  -0.0433963  -0.0433963  -0.0433963  -0.06142951
  -0.0433963  -0.08703883 -0.08703883 -0.08703883 -0.0433963  -0.13118084
  -0.13840913 -0.13840913 -0.0433963  -0.06142951 -0.0433963  -0.07530656
  -0.18168574 -0.0433963  -0.0433963  -0.0433963  -0.07530656 -0.15826614
  -0.31491833 -0.0433963  -0.08703883 -0.07530656 -0.06142951  1.38675049
  -0.0433963  -0.0433963  -0.06142951 -0.0433963  -0.0433963  -0.0433963
  -0.0433963  -0.0433963  -0.0433963  -0.0433963  -0.0433963  -0.39879994
  -0.0433963  -0.18168574 -0.0433963  -0.08703883 -0.08703883 -0.10680283
  -0.13118084 -0.0433963  -0.0433963  -0.0433963  -0.0433963  -0.07530656
  -0.43911856 -0.0433963  -0.06142951 -0.