## Cleaning

In [None]:
import pandas as pd

import plotly.express as px
import hvplot.pandas

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

In [23]:
crypto_df=pd.read_csv('Data/crypto_data.csv')
crypto_df.shape

(1252, 7)

In [24]:
crypto_df=crypto_df[crypto_df.IsTrading == True]

In [25]:
crypto_df=crypto_df.drop('IsTrading', axis=1)

In [26]:
crypto_df= crypto_df.dropna()
crypto_df.shape

(685, 6)

In [27]:
crypto_df['TotalCoinSupply']=crypto_df['TotalCoinSupply'].apply(pd.to_numeric)
crypto_df = crypto_df[crypto_df.TotalCoinsMined != 0]

In [28]:
coin_name=crypto_df['Unnamed: 0']
crypto_df = crypto_df.set_index('Unnamed: 0')
coin_name = pd.DataFrame(crypto_df['CoinName'], index=crypto_df.index)
crypto_df = crypto_df.drop('CoinName', axis=1)
coin_name.head()

Unnamed: 0_level_0,CoinName
Unnamed: 0,Unnamed: 1_level_1
42,42 Coin
404,404Coin
1337,EliteCoin
BTC,Bitcoin
ETH,Ethereum


In [29]:
crypto_df.dtypes

Algorithm           object
ProofType           object
TotalCoinsMined    float64
TotalCoinSupply    float64
dtype: object

In [30]:
X=crypto_df.copy()

## Processing

In [31]:
X = pd.get_dummies(X, columns=["Algorithm",'ProofType'])
X.head()

Unnamed: 0_level_0,TotalCoinsMined,TotalCoinSupply,Algorithm_1GB AES Pattern Search,Algorithm_536,Algorithm_Argon2d,Algorithm_BLAKE256,Algorithm_Blake,Algorithm_Blake2S,Algorithm_Blake2b,Algorithm_C11,...,ProofType_PoW/PoS,ProofType_PoW/PoS,ProofType_PoW/PoW,ProofType_PoW/nPoS,ProofType_Pos,ProofType_Proof of Authority,ProofType_Proof of Trust,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
42,41.99995,42.0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
404,1055185000.0,532000000.0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1337,29279420000.0,314159300000.0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
BTC,17927180.0,21000000.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ETH,107684200.0,0.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [32]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [33]:
X_scaled

array([[-0.11674788, -0.15286468, -0.0433555 , ..., -0.0433555 ,
        -0.0433555 , -0.0433555 ],
       [-0.09358885, -0.14499604, -0.0433555 , ..., -0.0433555 ,
        -0.0433555 , -0.0433555 ],
       [ 0.52587231,  4.4937636 , -0.0433555 , ..., -0.0433555 ,
        -0.0433555 , -0.0433555 ],
       ...,
       [-0.09523411, -0.13215444, -0.0433555 , ..., -0.0433555 ,
        -0.0433555 , -0.0433555 ],
       [-0.11658774, -0.15255408, -0.0433555 , ..., -0.0433555 ,
        -0.0433555 , -0.0433555 ],
       [-0.11674507, -0.15284989, -0.0433555 , ..., -0.0433555 ,
        -0.0433555 , -0.0433555 ]])

## Reduction & Clustering

In [34]:
# Initialize PCA model
pca = PCA(n_components=3)
# Get two principal components for the iris data.
pcs_df = pca.fit_transform(X_scaled)

In [35]:
pcs_df = pd.DataFrame(data=pcs_df, columns=['PC1', 'PC2', 'PC3'])
pcs_df=pcs_df.set_index(crypto_df.index)

pcs_df.head(10)

Unnamed: 0_level_0,PC1,PC2,PC3
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
42,-0.336779,1.002583,-0.644894
404,-0.320068,1.002733,-0.645346
1337,2.316842,1.740532,-0.691652
BTC,-0.1408,-1.276862,0.22169
ETH,-0.144193,-2.030461,0.415041
LTC,-0.165092,-1.183093,-0.039114
DASH,-0.399599,1.28625,-0.5514
XMR,-0.143227,-2.282754,0.453214
ETC,-0.14263,-2.030552,0.415027
ZEC,-0.177172,-1.774454,0.486844


In [36]:
inertia = []
k = list(range(1, 10))

In [37]:
# Looking for the best K
for i in k:
    km = KMeans(n_clusters=i, random_state=0)
    km.fit(pcs_df)
    inertia.append(km.inertia_)

In [38]:
# Define a DataFrame to plot the Elbow Curve using hvPlot
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.hvplot.line(x="k", y="inertia", title="Elbow Curve", xticks=k)

In [39]:
# Initialize the K-means model
model = KMeans(n_clusters=4, random_state=0)

# Fit the model
model.fit(pcs_df)

# Predict clusters
predictions = model.predict(pcs_df)

# Add the predicted class columns
pcs_df["class"] = model.labels_
pcs_df.head()

Unnamed: 0_level_0,PC1,PC2,PC3,class
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
42,-0.336779,1.002583,-0.644894,0
404,-0.320068,1.002733,-0.645346,0
1337,2.316842,1.740532,-0.691652,0
BTC,-0.1408,-1.276862,0.22169,1
ETH,-0.144193,-2.030461,0.415041,1


In [40]:
clustered_df=crypto_df.copy()
clustered_df[pcs_df.columns.to_list()]=pcs_df
clustered_df['CoinName']=coin_name

In [41]:
clustered_df.head(10)

Unnamed: 0_level_0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply,PC1,PC2,PC3,class,CoinName
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
42,Scrypt,PoW/PoS,41.99995,42.0,-0.336779,1.002583,-0.644894,0,42 Coin
404,Scrypt,PoW/PoS,1055185000.0,532000000.0,-0.320068,1.002733,-0.645346,0,404Coin
1337,X13,PoW/PoS,29279420000.0,314159300000.0,2.316842,1.740532,-0.691652,0,EliteCoin
BTC,SHA-256,PoW,17927180.0,21000000.0,-0.1408,-1.276862,0.22169,1,Bitcoin
ETH,Ethash,PoW,107684200.0,0.0,-0.144193,-2.030461,0.415041,1,Ethereum
LTC,Scrypt,PoW,63039240.0,84000000.0,-0.165092,-1.183093,-0.039114,1,Litecoin
DASH,X11,PoW/PoS,9031294.0,22000000.0,-0.399599,1.28625,-0.5514,0,Dash
XMR,CryptoNight-V7,PoW,17201140.0,0.0,-0.143227,-2.282754,0.453214,1,Monero
ETC,Ethash,PoW,113359700.0,210000000.0,-0.14263,-2.030552,0.415027,1,Ethereum Classic
ZEC,Equihash,PoW,7383056.0,21000000.0,-0.177172,-1.774454,0.486844,1,ZCash


## Visualizations

In [56]:
# Plotting the clusters with three features
fig = px.scatter_3d(clustered_df, x="PC1", y="PC2", z="PC3", 
                    color="class",width=800,
                    hover_name="CoinName", hover_data=["Algorithm"])
fig.update_layout(legend=dict(x=0,y=1))
fig.show()

In [55]:
clustered_df.hvplot.table(columns=['CoinName', 'Algorithm', 'ProofType', 'TotalCoinSupply', 'TotalCoinsMined', 'class'])

In [50]:
clustered_df.hvplot.scatter("TotalCoinsMined", "TotalCoinSupply", hover_cols=["class"],by="class")