In [110]:
# Initial imports
import pandas as pd
from sklearn.cluster import KMeans
import plotly.express as px
import hvplot.pandas

In [111]:
# Load data
file_path = "./data/crypto_data.csv"
crypto_df = pd.read_csv(file_path)
crypto_df.tail(10)

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
1242,GAP,Gapcoin,Scrypt,True,PoW/PoS,14931050.0,250000000
1243,SERO,Super Zero,Ethash,True,PoW,,1000000000
1244,UOS,UOS,SHA-256,True,DPoI,,1000000000
1245,BDX,Beldex,CryptoNight,True,PoW,980222600.0,1400222610
1246,ZEN,Horizen,Equihash,True,PoW,7296538.0,21000000
1247,XBC,BitcoinPlus,Scrypt,True,PoS,128327.0,1000000
1248,DVTC,DivotyCoin,Scrypt,False,PoW/PoS,21491210.0,100000000
1249,GIOT,Giotto Coin,Scrypt,False,PoW/PoS,,233100000
1250,OPSC,OpenSourceCoin,SHA-256,False,PoW/PoS,,21000000
1251,PUNK,SteamPunk,PoS,False,PoS,,40000000


In [112]:
crypto_df.dtypes

Unnamed: 0          object
CoinName            object
Algorithm           object
IsTrading             bool
ProofType           object
TotalCoinsMined    float64
TotalCoinSupply     object
dtype: object

In [113]:
# Replacing " " and "." with "" and converting data type to float. Use only once
crypto_df['TotalCoinSupply'] = crypto_df.TotalCoinSupply.str.replace(" ", "").str.replace(".", "").astype(float)

In [114]:
crypto_df.dtypes

Unnamed: 0          object
CoinName            object
Algorithm           object
IsTrading             bool
ProofType           object
TotalCoinsMined    float64
TotalCoinSupply    float64
dtype: object

In [115]:
# Remove rows if it is not Trading
df1 = crypto_df[crypto_df.IsTrading != 0]
df1.tail(10)

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
1238,ZEPH,ZEPHYR,SHA-256,True,DPoS,2000000000.0,2000000000.0
1239,XQN,Quotient,Scrypt,True,PoW/PoS,,0.0
1240,NETC,NetworkCoin,X13,True,PoW/PoS,,400000.0
1241,VPRC,VapersCoin,Scrypt,True,PoW,,42750000000.0
1242,GAP,Gapcoin,Scrypt,True,PoW/PoS,14931050.0,250000000.0
1243,SERO,Super Zero,Ethash,True,PoW,,1000000000.0
1244,UOS,UOS,SHA-256,True,DPoI,,1000000000.0
1245,BDX,Beldex,CryptoNight,True,PoW,980222600.0,1400223000.0
1246,ZEN,Horizen,Equihash,True,PoW,7296538.0,21000000.0
1247,XBC,BitcoinPlus,Scrypt,True,PoS,128327.0,1000000.0


In [116]:
# Remove rows where Algorithm is not defined
df2 = df1[df1.Algorithm != "Multiple"]
df2.head(5)

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42.0
1,365,365Coin,X11,True,PoW/PoS,,2300000000.0
2,404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000.0
3,611,SixEleven,SHA-256,True,PoW,,611000.0
4,808,808,SHA-256,True,PoW/PoS,0.0,0.0


In [117]:
df3 = df2.drop('IsTrading', axis=1)
df3.head(10)

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,PoW/PoS,41.99995,42.0
1,365,365Coin,X11,PoW/PoS,,2300000000.0
2,404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000.0
3,611,SixEleven,SHA-256,PoW,,611000.0
4,808,808,SHA-256,PoW/PoS,0.0,0.0
5,1337,EliteCoin,X13,PoW/PoS,29279420000.0,314159300000.0
6,2015,2015 coin,X11,PoW/PoS,,0.0
7,BTC,Bitcoin,SHA-256,PoW,17927180.0,21000000.0
8,ETH,Ethereum,Ethash,PoW,107684200.0,0.0
9,LTC,Litecoin,Scrypt,PoW,63039240.0,84000000.0


In [118]:
# Remove all cryptocurrencies with at least one null value.
df4 = df3.dropna(how='any',axis=0)

In [120]:
df4.head(5)

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,PoW/PoS,41.99995,42.0
2,404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000.0
4,808,808,SHA-256,PoW/PoS,0.0,0.0
5,1337,EliteCoin,X13,PoW/PoS,29279420000.0,314159300000.0
7,BTC,Bitcoin,SHA-256,PoW,17927180.0,21000000.0


In [121]:
df4.count

<bound method DataFrame.count of      Unnamed: 0     CoinName    Algorithm ProofType  TotalCoinsMined  \
0            42      42 Coin       Scrypt   PoW/PoS     4.199995e+01   
2           404      404Coin       Scrypt   PoW/PoS     1.055185e+09   
4           808          808      SHA-256   PoW/PoS     0.000000e+00   
5          1337    EliteCoin          X13   PoW/PoS     2.927942e+10   
7           BTC      Bitcoin      SHA-256       PoW     1.792718e+07   
...         ...          ...          ...       ...              ...   
1238       ZEPH       ZEPHYR      SHA-256      DPoS     2.000000e+09   
1242        GAP      Gapcoin       Scrypt   PoW/PoS     1.493105e+07   
1245        BDX       Beldex  CryptoNight       PoW     9.802226e+08   
1246        ZEN      Horizen     Equihash       PoW     7.296538e+06   
1247        XBC  BitcoinPlus       Scrypt       PoS     1.283270e+05   

      TotalCoinSupply  
0        4.200000e+01  
2        5.320000e+08  
4        0.000000e+00  
5     

In [122]:
# Remove rows without CoinMined
df5 = df4[df4.TotalCoinsMined != 0]
df5.tail(10)

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
1226,TCH,TigerCash,SHA-256,PoS,1000000000.0,1000000000.0
1230,WAVES,Waves,Leased POS,LPoS,100000000.0,100000000.0
1231,PART,Particl,PoS,PoS,9283138.0,8634140.0
1234,BTT,BitTorrent,TRC10,DPoS,989988700000.0,990000000000.0
1237,NXT,Nxt,PoS,PoS/LPoS,1000000000.0,1000000000.0
1238,ZEPH,ZEPHYR,SHA-256,DPoS,2000000000.0,2000000000.0
1242,GAP,Gapcoin,Scrypt,PoW/PoS,14931050.0,250000000.0
1245,BDX,Beldex,CryptoNight,PoW,980222600.0,1400223000.0
1246,ZEN,Horizen,Equihash,PoW,7296538.0,21000000.0
1247,XBC,BitcoinPlus,Scrypt,PoS,128327.0,1000000.0


In [123]:
df5.count

<bound method DataFrame.count of      Unnamed: 0     CoinName    Algorithm ProofType  TotalCoinsMined  \
0            42      42 Coin       Scrypt   PoW/PoS     4.199995e+01   
2           404      404Coin       Scrypt   PoW/PoS     1.055185e+09   
5          1337    EliteCoin          X13   PoW/PoS     2.927942e+10   
7           BTC      Bitcoin      SHA-256       PoW     1.792718e+07   
8           ETH     Ethereum       Ethash       PoW     1.076842e+08   
...         ...          ...          ...       ...              ...   
1238       ZEPH       ZEPHYR      SHA-256      DPoS     2.000000e+09   
1242        GAP      Gapcoin       Scrypt   PoW/PoS     1.493105e+07   
1245        BDX       Beldex  CryptoNight       PoW     9.802226e+08   
1246        ZEN      Horizen     Equihash       PoW     7.296538e+06   
1247        XBC  BitcoinPlus       Scrypt       PoS     1.283270e+05   

      TotalCoinSupply  
0        4.200000e+01  
2        5.320000e+08  
5        3.141593e+11  
7     

In [125]:
# new data frame coins_name
coins_name = df5[['CoinName']].copy()

In [126]:
coins_name.head(5)

Unnamed: 0,CoinName
0,42 Coin
2,404Coin
5,EliteCoin
7,Bitcoin
8,Ethereum


In [129]:
crypto_df.index

RangeIndex(start=0, stop=1252, step=1)

In [147]:
#7 Remove CoinName column
df6 = df5.drop('CoinName', axis=1)
df6.head(5)

Unnamed: 0,Coins_names,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,Scrypt,PoW/PoS,41.99995,42.0
2,404,Scrypt,PoW/PoS,1055185000.0,532000000.0
5,1337,X13,PoW/PoS,29279420000.0,314159300000.0
7,BTC,SHA-256,PoW,17927180.0,21000000.0
8,ETH,Ethash,PoW,107684200.0,0.0


In [148]:
dummies1 = pd.get_dummies(df6['ProofType'])

In [149]:
dummies1

Unnamed: 0,DPOS,DPoC,DPoS,HPoW,LPoS,POBh,PoA,PoC,PoS,PoS/LPoS,...,PoW/PoS,PoW/PoS.1,PoW/PoW,PoW/nPoS,Pos,Proof of Authority,Proof of Trust,TPoS,Zero-Knowledge Proof,dPoW/PoW
0,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1238,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1242,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1245,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1246,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [138]:
dummies2 = pd.get_dummies(df6['Algorithm'])
dummies2

Unnamed: 0,1GB AES Pattern Search,536,Argon2d,BLAKE256,Blake,Blake2S,Blake2b,C11,Cloverhash,Counterparty,...,Tribus,VBFT,VeChainThor Authority,X11,X11GOST,X13,X14,X15,X16R,XEVAN
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1238,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1242,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1245,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1246,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [150]:
dummies1 = pd.get_dummies(df6['ProofType']).rename(columns=lambda x: 'ProofType_' + str(x))

In [151]:
df7 = pd.concat([df6, dummies1], axis=1)
# df7 = df6.drop(['ProofType'], inplace=True, axis=1)
# df7.head(5)

In [152]:
df7.head(5)

Unnamed: 0,Coins_names,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply,ProofType_DPOS,ProofType_DPoC,ProofType_DPoS,ProofType_HPoW,ProofType_LPoS,...,ProofType_PoW/PoS,ProofType_PoW/PoS.1,ProofType_PoW/PoW,ProofType_PoW/nPoS,ProofType_Pos,ProofType_Proof of Authority,ProofType_Proof of Trust,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW
0,42,Scrypt,PoW/PoS,41.99995,42.0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
2,404,Scrypt,PoW/PoS,1055185000.0,532000000.0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
5,1337,X13,PoW/PoS,29279420000.0,314159300000.0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
7,BTC,SHA-256,PoW,17927180.0,21000000.0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,ETH,Ethash,PoW,107684200.0,0.0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
