In [109]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import hvplot.pandas
import plotly.express as px

In [54]:
crypto_df=pd.read_csv('./crypto_data.csv')
crypto_df.head(5)

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.0,0


In [55]:
crypto_df.dtypes

Unnamed: 0          object
CoinName            object
Algorithm           object
IsTrading             bool
ProofType           object
TotalCoinsMined    float64
TotalCoinSupply     object
dtype: object

In [56]:
for column in crypto_df.columns:
    print(f'Colum {column} has {crypto_df[column].isnull().sum()} null values')

Colum Unnamed: 0 has 0 null values
Colum CoinName has 0 null values
Colum Algorithm has 0 null values
Colum IsTrading has 0 null values
Colum ProofType has 0 null values
Colum TotalCoinsMined has 508 null values
Colum TotalCoinSupply has 0 null values


In [57]:
crypto_df['IsTrading'].value_counts()

True     1144
False     108
Name: IsTrading, dtype: int64

In [58]:
#Remove all cryptocurriencies taht aren't trading
crypto_df=crypto_df[(crypto_df['IsTrading']==True)]

In [59]:
# Verify
crypto_df['IsTrading'].value_counts()

True    1144
Name: IsTrading, dtype: int64

In [60]:
# Verify algorithms
crypto_df['Algorithm'].value_counts()

Scrypt            394
X11               182
SHA-256           121
X13                54
PoS                42
                 ... 
IMesh               1
SHA3-256            1
SHA-256 + Hive      1
QuarkTX             1
Momentum            1
Name: Algorithm, Length: 89, dtype: int64

### All Cryptocurriencies have an algorithm defined

In [61]:
# Remove the IsTrading column.
crypto_df=crypto_df.drop('IsTrading',axis=1)

In [62]:
#Remove all cryptocurrencies with at least one null value.
crypto_df=crypto_df.dropna()

In [63]:
# Remove all cryptocurrencies without coins mined
crypto_df=crypto_df[(crypto_df['TotalCoinsMined'] != 0)]

In [64]:
# Create new DataFrame with the unique CoinName
crypto_df['CoinName'].unique()

array(['42 Coin', '404Coin', 'EliteCoin', 'Bitcoin', 'Ethereum',
       'Litecoin', 'Dash', 'Monero', 'Ethereum Classic', 'ZCash',
       'Bitshares', 'DigiByte', 'BitcoinDark', 'PayCoin', 'ProsperCoin',
       'KoboCoin', 'Spreadcoin', 'Argentum', 'Aurora Coin', 'BlueCoin',
       'MyriadCoin', 'MoonCoin', 'ZetaCoin', 'SexCoin', 'Quatloo',
       'EnergyCoin', 'QuarkCoin', 'Riecoin', 'Digitalcoin ', 'BitBar',
       'Catcoin', 'CryptoBullion', 'CannaCoin', 'CryptCoin', 'CasinoCoin',
       'Diamond', 'Verge', 'DevCoin', 'EarthCoin', 'E-Gulden',
       'Einsteinium', 'Emerald', 'Exclusive Coin', 'FlutterCoin',
       'Franko', 'FeatherCoin', 'GrandCoin', 'GlobalCoin', 'GoldCoin',
       'HoboNickels', 'HyperStake', 'Infinite Coin', 'IOCoin', 'IXcoin',
       'KrugerCoin', 'LuckyCoin', 'Litebar ', 'MaxCoin', 'MegaCoin',
       'MediterraneanCoin', 'MintCoin', 'MinCoin', 'MazaCoin',
       'Nautilus Coin', 'NavCoin', 'NobleCoin', 'Namecoin', 'NyanCoin',
       'OpalCoin', 'Orbitcoin', 'P

In [65]:
coins_name=pd.DataFrame(crypto_df["CoinName"],index=crypto_df.index).copy()
coins_name.head(5)

Unnamed: 0,CoinName
0,42 Coin
2,404Coin
5,EliteCoin
7,Bitcoin
8,Ethereum


In [66]:
# Remove the CoinName colum
crypto_df=crypto_df.drop('CoinName',axis=1)

In [67]:
crypto_df.dtypes

Unnamed: 0          object
Algorithm           object
ProofType           object
TotalCoinsMined    float64
TotalCoinSupply     object
dtype: object

In [68]:
crypto_df['TotalCoinSupply'] = crypto_df['TotalCoinSupply'].astype(float)

In [69]:
crypto_df.dtypes

Unnamed: 0          object
Algorithm           object
ProofType           object
TotalCoinsMined    float64
TotalCoinSupply    float64
dtype: object

In [70]:
# Create dummies variables for all of the text features and create X DataFrame
X=pd.get_dummies(crypto_df,columns=['Unnamed: 0',
                                    'Algorithm',
                                   'ProofType'])
X.head()

Unnamed: 0,TotalCoinsMined,TotalCoinSupply,Unnamed: 0_1337,Unnamed: 0_1CR,Unnamed: 0_404,Unnamed: 0_42,Unnamed: 0_8BIT,Unnamed: 0_AAC,Unnamed: 0_ABJ,Unnamed: 0_ABS,...,ProofType_PoW/PoS,ProofType_PoW/PoS.1,ProofType_PoW/PoW,ProofType_PoW/nPoS,ProofType_Pos,ProofType_Proof of Authority,ProofType_Proof of Trust,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW
0,41.99995,42.0,0,0,0,1,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
2,1055185000.0,532000000.0,0,0,1,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
5,29279420000.0,314159300000.0,1,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
7,17927180.0,21000000.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,107684200.0,0.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [72]:
# Standardize X DataFrame data
crypto_scaled=StandardScaler().fit_transform(X)
print(crypto_scaled[0:5])

[[-0.11674788 -0.15286468 -0.0433555  ... -0.0433555  -0.0433555
  -0.0433555 ]
 [-0.09358885 -0.14499604 -0.0433555  ... -0.0433555  -0.0433555
  -0.0433555 ]
 [ 0.52587231  4.4937636  23.06512519 ... -0.0433555  -0.0433555
  -0.0433555 ]
 [-0.11635442 -0.15255408 -0.0433555  ... -0.0433555  -0.0433555
  -0.0433555 ]
 [-0.11438445 -0.15286468 -0.0433555  ... -0.0433555  -0.0433555
  -0.0433555 ]]


In [75]:
# Reducing Data Dimensions Using PCA
# Reduce dataframe dimension and create new pcs_df
pca=PCA(n_components=3)
crypto_pca=pca.fit_transform(crypto_scaled)
pcs_df=pd.DataFrame(
    data=crypto_pca, columns=['PC 1','PC 2','PC 3'],index=crypto_df.index)
pcs_df.head()

Unnamed: 0,PC 1,PC 2,PC 3
0,-0.400762,1.277741,0.137036
2,-0.381785,1.274371,0.134675
5,2.621793,1.65308,0.100332
7,-0.192824,-1.632103,0.024139
8,-0.119956,-2.397375,-0.0375


## Clustering Cryptocurrencies Using K-means

In [76]:
# Create Elbow Curve
# Find the best value for K
inertia = []
k = list(range(1, 11))

# Calculate the inertia for the range of K values
for i in k:
    km = KMeans(n_clusters=i, random_state=0)
    km.fit(pcs_df)
    inertia.append(km.inertia_)

# Graph
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.hvplot.line(x="k", y="inertia", xticks=k, title="Elbow Curve")

### K value defined as 5

In [77]:
# Initialize the K-means model
model = KMeans(n_clusters=5, random_state=0)

# Fit the model
model.fit(pcs_df)

KMeans(n_clusters=5, random_state=0)

In [102]:
# Create a new DataFrame
clustered_df=crypto_df.merge(pcs_df,on=crypto_df.index)
clustered_df.head()

Unnamed: 0.1,key_0,Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply,PC 1,PC 2,PC 3
0,0,42,Scrypt,PoW/PoS,41.99995,42.0,-0.400762,1.277741,0.137036
1,2,404,Scrypt,PoW/PoS,1055185000.0,532000000.0,-0.381785,1.274371,0.134675
2,5,1337,X13,PoW/PoS,29279420000.0,314159300000.0,2.621793,1.65308,0.100332
3,7,BTC,SHA-256,PoW,17927180.0,21000000.0,-0.192824,-1.632103,0.024139
4,8,ETH,Ethash,PoW,107684200.0,0.0,-0.119956,-2.397375,-0.0375


In [103]:
# Set key_0 as index
clustered_df.set_index("key_0",inplace=True)
clustered_df.index.name = None
clustered_df.head()

Unnamed: 0.1,Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply,PC 1,PC 2,PC 3
0,42,Scrypt,PoW/PoS,41.99995,42.0,-0.400762,1.277741,0.137036
2,404,Scrypt,PoW/PoS,1055185000.0,532000000.0,-0.381785,1.274371,0.134675
5,1337,X13,PoW/PoS,29279420000.0,314159300000.0,2.621793,1.65308,0.100332
7,BTC,SHA-256,PoW,17927180.0,21000000.0,-0.192824,-1.632103,0.024139
8,ETH,Ethash,PoW,107684200.0,0.0,-0.119956,-2.397375,-0.0375


In [104]:
# Remove columns
clustered_df=clustered_df.drop('Unnamed: 0',axis=1)
clustered_df.head()

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply,PC 1,PC 2,PC 3
0,Scrypt,PoW/PoS,41.99995,42.0,-0.400762,1.277741,0.137036
2,Scrypt,PoW/PoS,1055185000.0,532000000.0,-0.381785,1.274371,0.134675
5,X13,PoW/PoS,29279420000.0,314159300000.0,2.621793,1.65308,0.100332
7,SHA-256,PoW,17927180.0,21000000.0,-0.192824,-1.632103,0.024139
8,Ethash,PoW,107684200.0,0.0,-0.119956,-2.397375,-0.0375


In [105]:
# Merge coins_name to dataframe
clustered_df = pd.merge(clustered_df,coins_name,left_on=clustered_df.index,right_on=coins_name.index)
clustered_df.head()

Unnamed: 0,key_0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply,PC 1,PC 2,PC 3,CoinName
0,0,Scrypt,PoW/PoS,41.99995,42.0,-0.400762,1.277741,0.137036,42 Coin
1,2,Scrypt,PoW/PoS,1055185000.0,532000000.0,-0.381785,1.274371,0.134675,404Coin
2,5,X13,PoW/PoS,29279420000.0,314159300000.0,2.621793,1.65308,0.100332,EliteCoin
3,7,SHA-256,PoW,17927180.0,21000000.0,-0.192824,-1.632103,0.024139,Bitcoin
4,8,Ethash,PoW,107684200.0,0.0,-0.119956,-2.397375,-0.0375,Ethereum


In [106]:
# Set key_0 as index
clustered_df.set_index("key_0",inplace=True)
clustered_df.index.name = None
clustered_df.head()

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply,PC 1,PC 2,PC 3,CoinName
0,Scrypt,PoW/PoS,41.99995,42.0,-0.400762,1.277741,0.137036,42 Coin
2,Scrypt,PoW/PoS,1055185000.0,532000000.0,-0.381785,1.274371,0.134675,404Coin
5,X13,PoW/PoS,29279420000.0,314159300000.0,2.621793,1.65308,0.100332,EliteCoin
7,SHA-256,PoW,17927180.0,21000000.0,-0.192824,-1.632103,0.024139,Bitcoin
8,Ethash,PoW,107684200.0,0.0,-0.119956,-2.397375,-0.0375,Ethereum


In [107]:
# Add the predicted class column
clustered_df["Class"] = model.labels_
clustered_df.head()

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply,PC 1,PC 2,PC 3,CoinName,Class
0,Scrypt,PoW/PoS,41.99995,42.0,-0.400762,1.277741,0.137036,42 Coin,1
2,Scrypt,PoW/PoS,1055185000.0,532000000.0,-0.381785,1.274371,0.134675,404Coin,1
5,X13,PoW/PoS,29279420000.0,314159300000.0,2.621793,1.65308,0.100332,EliteCoin,1
7,SHA-256,PoW,17927180.0,21000000.0,-0.192824,-1.632103,0.024139,Bitcoin,0
8,Ethash,PoW,107684200.0,0.0,-0.119956,-2.397375,-0.0375,Ethereum,0


## Visualizing Results

In [111]:
# Create 3D scater plot
fig = px.scatter_3d(
    clustered_df,
    x="CoinName",
    y="TotalCoinsMined",
    z="TotalCoinSupply",
    color="Class",
    symbol="Class",
    width=800,
    hover_name="CoinName",
    hover_data=["Algorithm"])

fig.update_layout(legend=dict(x=0, y=1))
fig.show()

In [112]:
# Create a data table 
data_table = clustered_df[["CoinName", "Algorithm", "ProofType", \
                          "TotalCoinSupply", "TotalCoinsMined", "Class"]].hvplot.table(width=600, backlog=1)
data_table

In [113]:
clustered_df.hvplot.scatter(
    x="TotalCoinsMined",
    y="TotalCoinSupply",
    hover_cols=["CoinName"],
    by="Class",
)