In [39]:
import pandas as pd
import plotly.express as px
import hvplot.pandas
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import hvplot.pandas

In [40]:
# Load data
file = "Resources/crypto_data.csv"
crypto_df = pd.read_csv(file)
crypto_df.head()


Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.0,0


In [41]:
crypto_df = crypto_df.set_index('Unnamed: 0')
crypto_df.head()

Unnamed: 0_level_0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
365,365Coin,X11,True,PoW/PoS,,2300000000
404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
611,SixEleven,SHA-256,True,PoW,,611000
808,808,SHA-256,True,PoW/PoS,0.0,0


In [42]:
# Remove all cryptocurrencies that aren't trading
crypto_trading = crypto_df[crypto_df.IsTrading != False]
crypto_trading.head(30)

Unnamed: 0_level_0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
365,365Coin,X11,True,PoW/PoS,,2300000000
404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
611,SixEleven,SHA-256,True,PoW,,611000
808,808,SHA-256,True,PoW/PoS,0.0,0
1337,EliteCoin,X13,True,PoW/PoS,29279420000.0,314159265359
2015,2015 coin,X11,True,PoW/PoS,,0
BTC,Bitcoin,SHA-256,True,PoW,17927180.0,21000000
ETH,Ethereum,Ethash,True,PoW,107684200.0,0
LTC,Litecoin,Scrypt,True,PoW,63039240.0,84000000


In [43]:
# Look at Algorithm options
crypto_trading['Algorithm'].unique()

array(['Scrypt', 'X11', 'SHA-256', 'X13', 'Ethash', 'CryptoNight-V7',
       'Equihash', 'SHA-512', 'Multiple', 'X15', 'NIST5', 'Quark',
       'Groestl', 'PoS', 'NeoScrypt', 'SHA3', 'HybridScryptHash256',
       'Scrypt-n', 'PHI1612', 'Lyra2REv2', 'CryptoNight', 'Shabal256',
       'Counterparty', 'Blake', 'Momentum', 'Stanford Folding', 'QuBit',
       'XG Hash', 'M7 POW', 'Curve25519', 'Lyra2RE', 'QUAIT', 'vDPOS',
       'Blake2b', 'BLAKE256', '1GB AES Pattern Search', 'Dagger',
       'CryptoNight-Lite', 'X11GOST', 'SHA-256D', 'POS 3.0',
       'Progressive-n', 'DPoS', 'Lyra2Z', 'X14', 'Time Travel', 'Argon2',
       'Keccak', 'Blake2S', 'Dagger-Hashimoto', '536', 'Argon2d',
       'Cloverhash', 'Skein', 'SkunkHash v2 Raptor',
       'VeChainThor Authority', 'Ouroboros', 'POS 2.0', 'SkunkHash',
       'C11', 'Proof-of-BibleHash', 'SHA-256 + Hive',
       'Proof-of-Authority', 'XEVAN', 'VBFT', 'YescryptR16', 'IMesh',
       'Green Protocol', 'Semux BFT consensus', 'X16R', 'Tribus',


In [44]:
# Remove all cryptocurrencies that don’t have an algorithm defined. - Says "Multiple"
crypto_algorithms = crypto_trading[crypto_trading.Algorithm != "Multiple"]
crypto_algorithms.tail()

Unnamed: 0_level_0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
SERO,Super Zero,Ethash,True,PoW,,1000000000
UOS,UOS,SHA-256,True,DPoI,,1000000000
BDX,Beldex,CryptoNight,True,PoW,980222600.0,1400222610
ZEN,Horizen,Equihash,True,PoW,7296538.0,21000000
XBC,BitcoinPlus,Scrypt,True,PoS,128327.0,1000000


In [45]:
crypto_algorithms.count()

CoinName           1126
Algorithm          1126
IsTrading          1126
ProofType          1126
TotalCoinsMined     674
TotalCoinSupply    1126
dtype: int64

In [46]:
# Remove the IsTrading column.
crypto_trade = crypto_algorithms.drop("IsTrading", axis=1)

In [47]:
# Remove all cryptocurrencies with at least one null value.
crypto_no_nan = crypto_trade.dropna()
crypto_no_nan.head(10)

Unnamed: 0_level_0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
42,42 Coin,Scrypt,PoW/PoS,41.99995,42
404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
808,808,SHA-256,PoW/PoS,0.0,0
1337,EliteCoin,X13,PoW/PoS,29279420000.0,314159265359
BTC,Bitcoin,SHA-256,PoW,17927180.0,21000000
ETH,Ethereum,Ethash,PoW,107684200.0,0
LTC,Litecoin,Scrypt,PoW,63039240.0,84000000
DASH,Dash,X11,PoW/PoS,9031294.0,22000000
XMR,Monero,CryptoNight-V7,PoW,17201140.0,0
ETC,Ethereum Classic,Ethash,PoW,113359700.0,210000000


In [48]:
crypto_no_nan.count()

CoinName           674
Algorithm          674
ProofType          674
TotalCoinsMined    674
TotalCoinSupply    674
dtype: int64

In [49]:
# Remove all cryptocurrencies without coins mined.
crypto_with_coins = crypto_no_nan[crypto_no_nan.TotalCoinsMined != 0]
crypto_with_coins.count()

CoinName           524
Algorithm          524
ProofType          524
TotalCoinsMined    524
TotalCoinSupply    524
dtype: int64

In [50]:
crypto_with_coins.head()

Unnamed: 0_level_0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
42,42 Coin,Scrypt,PoW/PoS,41.99995,42
404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
1337,EliteCoin,X13,PoW/PoS,29279420000.0,314159265359
BTC,Bitcoin,SHA-256,PoW,17927180.0,21000000
ETH,Ethereum,Ethash,PoW,107684200.0,0


In [51]:
# Store the names of all cryptocurrencies on a DataFramed named coins_name.
# Use the crypto_df.index as the index for this new DataFrame.
names = crypto_with_coins['CoinName']

In [52]:
coins_name = pd.DataFrame(names, index = crypto_with_coins.index)
coins_name

Unnamed: 0_level_0,CoinName
Unnamed: 0,Unnamed: 1_level_1
42,42 Coin
404,404Coin
1337,EliteCoin
BTC,Bitcoin
ETH,Ethereum
...,...
ZEPH,ZEPHYR
GAP,Gapcoin
BDX,Beldex
ZEN,Horizen


In [53]:
# Remove the CoinName column. 
crypto_df = crypto_with_coins.drop('CoinName', axis=1)
crypto_df.head()

Unnamed: 0_level_0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
42,Scrypt,PoW/PoS,41.99995,42
404,Scrypt,PoW/PoS,1055185000.0,532000000
1337,X13,PoW/PoS,29279420000.0,314159265359
BTC,SHA-256,PoW,17927180.0,21000000
ETH,Ethash,PoW,107684200.0,0


In [54]:
# Create dummies variables for all of the text features, and store the resulting data on a DataFrame named X.
X = pd.get_dummies(data=crypto_df, columns = ["Algorithm", "ProofType"])
X.head()

Unnamed: 0_level_0,TotalCoinsMined,TotalCoinSupply,Algorithm_1GB AES Pattern Search,Algorithm_536,Algorithm_Argon2d,Algorithm_BLAKE256,Algorithm_Blake,Algorithm_Blake2S,Algorithm_Blake2b,Algorithm_C11,...,ProofType_PoW/PoS,ProofType_PoW/PoS,ProofType_PoW/PoW,ProofType_PoW/nPoS,ProofType_Pos,ProofType_Proof of Authority,ProofType_Proof of Trust,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
42,41.99995,42,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
404,1055185000.0,532000000,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1337,29279420000.0,314159265359,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
BTC,17927180.0,21000000,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ETH,107684200.0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [55]:
# Standardize all the data from X Dataframe using StandardSCaler
X = StandardScaler().fit_transform(X)
print(X[0:5])

[[-0.11648562 -0.15303594 -0.04372695 -0.04372695 -0.04372695 -0.06189845
  -0.07588252 -0.04372695 -0.06189845 -0.06189845 -0.04372695 -0.04372695
  -0.19396846 -0.06189845 -0.09815249 -0.04372695 -0.11636001 -0.07588252
  -0.04372695 -0.04372695 -0.15309311 -0.04372695 -0.13219579 -0.04372695
  -0.04372695 -0.0877058  -0.04372695 -0.04372695 -0.04372695 -0.04372695
  -0.06189845 -0.04372695 -0.0877058  -0.0877058  -0.0877058  -0.04372695
  -0.13948209 -0.13948209 -0.04372695 -0.06189845 -0.04372695 -0.07588252
  -0.18311355 -0.04372695 -0.04372695 -0.04372695 -0.07588252 -0.1595002
  -0.31755367 -0.04372695 -0.0877058  -0.07588252 -0.06189845 -0.04372695
   1.3708103  -0.04372695 -0.04372695 -0.06189845 -0.04372695 -0.04372695
  -0.04372695 -0.04372695 -0.04372695 -0.04372695 -0.04372695 -0.04372695
  -0.40232142 -0.04372695 -0.18311355 -0.04372695 -0.0877058  -0.0877058
  -0.1076244  -0.04372695 -0.04372695 -0.13219579 -0.04372695 -0.04372695
  -0.04372695 -0.04372695 -0.07588252 -0

In [56]:
# Use PCA to reduce dimenstions down to three
pca = PCA(n_components=3)

In [57]:
pca_1 = pca.fit_transform(X)
pca_1

array([[-0.33728465,  0.9697491 , -0.60297629],
       [-0.32074788,  0.96990351, -0.60351785],
       [ 2.29689308,  1.64783789, -0.72191178],
       ...,
       [ 0.32145497, -2.31578275,  0.42717734],
       [-0.13062935, -2.03578469,  0.55736349],
       [-0.29650442,  0.84074022, -0.25078714]])

In [58]:
pcs_df = pd.DataFrame(data=pca_1, columns=[
    "PC 1", "PC 2", "PC 3"
], index=crypto_df.index)
pcs_df.head()

Unnamed: 0_level_0,PC 1,PC 2,PC 3
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
42,-0.337285,0.969749,-0.602976
404,-0.320748,0.969904,-0.603518
1337,2.296893,1.647838,-0.721912
BTC,-0.146219,-1.316683,0.207438
ETH,-0.155746,-1.987853,0.422878


In [59]:
inertia = []
k = list(range(1, 11))
# Calculate the inertia for the range of K values
for i in k:
   km = KMeans(n_clusters=i, random_state=0)
   km.fit(pcs_df)
   inertia.append(km.inertia_)

In [60]:
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.hvplot.line(x="k", y="inertia", xticks=k, title="Elbow Curve")

In [61]:
# Function to cluster and plot dataset
def test_cluster_amount(df, clusters):
    model = KMeans(n_clusters=clusters, random_state=5)
    # Fitting model
    model.fit(df)
    
    # Add a new clas column to df_iris
    df["class"] = model.labels_

In [62]:
# Run the DataFrame to see Class
test_cluster_amount(pcs_df, 4)

In [63]:
pcs_df.head()

Unnamed: 0_level_0,PC 1,PC 2,PC 3,class
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
42,-0.337285,0.969749,-0.602976,0
404,-0.320748,0.969904,-0.603518,0
1337,2.296893,1.647838,-0.721912,0
BTC,-0.146219,-1.316683,0.207438,2
ETH,-0.155746,-1.987853,0.422878,2


In [64]:
fig = px.scatter_3d(pcs_df, x="PC 1", y="PC 2", z="PC 3", color="class", symbol="class", width=800)
fig.update_layout(legend=dict(x=0, y=1))
fig.show()

In [65]:
# Create a new DataFrame
clustered_df = pd.merge(pcs_df, crypto_df, left_index=True, right_index=True)
clustered_df.head()

Unnamed: 0_level_0,PC 1,PC 2,PC 3,class,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
42,-0.337285,0.969749,-0.602976,0,Scrypt,PoW/PoS,41.99995,42
404,-0.320748,0.969904,-0.603518,0,Scrypt,PoW/PoS,1055185000.0,532000000
1337,2.296893,1.647838,-0.721912,0,X13,PoW/PoS,29279420000.0,314159265359
BTC,-0.146219,-1.316683,0.207438,2,SHA-256,PoW,17927180.0,21000000
ETH,-0.155746,-1.987853,0.422878,2,Ethash,PoW,107684200.0,0


In [66]:
# Add the CoinName to the dataframe
clustered_df = pd.merge(clustered_df, coins_name, left_index=True, right_index=True)
clustered_df.head()

Unnamed: 0_level_0,PC 1,PC 2,PC 3,class,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply,CoinName
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
42,-0.337285,0.969749,-0.602976,0,Scrypt,PoW/PoS,41.99995,42,42 Coin
404,-0.320748,0.969904,-0.603518,0,Scrypt,PoW/PoS,1055185000.0,532000000,404Coin
1337,2.296893,1.647838,-0.721912,0,X13,PoW/PoS,29279420000.0,314159265359,EliteCoin
BTC,-0.146219,-1.316683,0.207438,2,SHA-256,PoW,17927180.0,21000000,Bitcoin
ETH,-0.155746,-1.987853,0.422878,2,Ethash,PoW,107684200.0,0,Ethereum


In [67]:
# Change column display
clustered_df = clustered_df[['Algorithm', 'ProofType', 'TotalCoinsMined', 'TotalCoinSupply', 'PC 1', 'PC 2', 
                             'PC 3', 'CoinName',"class"]]
clustered_df.head()

Unnamed: 0_level_0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply,PC 1,PC 2,PC 3,CoinName,class
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
42,Scrypt,PoW/PoS,41.99995,42,-0.337285,0.969749,-0.602976,42 Coin,0
404,Scrypt,PoW/PoS,1055185000.0,532000000,-0.320748,0.969904,-0.603518,404Coin,0
1337,X13,PoW/PoS,29279420000.0,314159265359,2.296893,1.647838,-0.721912,EliteCoin,0
BTC,SHA-256,PoW,17927180.0,21000000,-0.146219,-1.316683,0.207438,Bitcoin,2
ETH,Ethash,PoW,107684200.0,0,-0.155746,-1.987853,0.422878,Ethereum,2


In [68]:
# Create 3D scatter plot
fig = px.scatter_3d(clustered_df, x="PC 1", y="PC 2", z="PC 3", color="class", symbol="class", hover_name="CoinName",
                    hover_data=["Algorithm"], width=600)
fig.update_layout(legend=dict(x=0, y=1))
fig.show()

In [71]:
help(clustered_df.hvplot.table())

Help on Table in module holoviews.element.tabular object:

class Table(holoviews.element.selection.SelectionIndexExpr, holoviews.core.data.Dataset, holoviews.core.element.Tabular)
 |  Table(data=None, kdims=None, vdims=None, **kwargs)
 |  
 |  params(datatype=List, cdims=Dict, kdims=List, vdims=List, group=String, label=String, name=String)
 |  
 |      Table is a Dataset type, which gets displayed in a tabular
 |      format and is convertible to most other Element types.
 |      
 |  [1;32mParameters of 'Table'
 |  [0m
 |  [1;31mParameters changed from their default values are marked in red.[0m
 |  [1;36mSoft bound values are marked in cyan.[0m
 |  C/V= Constant/Variable, RO/RW = ReadOnly/ReadWrite, AN=Allow None
 |  
 |  [1;34mName                        Value                     Type     Bounds   Mode [0m
 |  
 |  cdims                   OrderedDict()                 Dict              V RW 
 |  datatype   ['dataframe', 'dictionary', 'grid', '...   List   (0, None)  V RW 
 |

In [75]:
# Create a hvplot table
clustered_df.hvplot.table(columns=["CoinName", "Algorithm", "ProofType", "TotalCoinSupply", "TotalCoinsMined", "class"]
                          , width = 800)

In [77]:
clustered_df.head()

Unnamed: 0_level_0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply,PC 1,PC 2,PC 3,CoinName,class
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
42,Scrypt,PoW/PoS,41.99995,42,-0.337285,0.969749,-0.602976,42 Coin,0
404,Scrypt,PoW/PoS,1055185000.0,532000000,-0.320748,0.969904,-0.603518,404Coin,0
1337,X13,PoW/PoS,29279420000.0,314159265359,2.296893,1.647838,-0.721912,EliteCoin,0
BTC,SHA-256,PoW,17927180.0,21000000,-0.146219,-1.316683,0.207438,Bitcoin,2
ETH,Ethash,PoW,107684200.0,0,-0.155746,-1.987853,0.422878,Ethereum,2


In [78]:
# Create a hvplot scatter
clustered_df.hvplot.scatter(
    x="TotalCoinsMined",
    y="TotalCoinSupply",
    hover_cols=["CoinName"],
)

In [82]:
# Remove outliar for graph
clustered_no_outliar = clustered_df[clustered_df.CoinName != 'BitTorrent']
clustered_no_outliar.head(5)

Unnamed: 0_level_0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply,PC 1,PC 2,PC 3,CoinName,class
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
42,Scrypt,PoW/PoS,41.99995,42,-0.337285,0.969749,-0.602976,42 Coin,0
404,Scrypt,PoW/PoS,1055185000.0,532000000,-0.320748,0.969904,-0.603518,404Coin,0
1337,X13,PoW/PoS,29279420000.0,314159265359,2.296893,1.647838,-0.721912,EliteCoin,0
BTC,SHA-256,PoW,17927180.0,21000000,-0.146219,-1.316683,0.207438,Bitcoin,2
ETH,Ethash,PoW,107684200.0,0,-0.155746,-1.987853,0.422878,Ethereum,2


In [85]:
# Recreate the scatter
# Create a hvplot scatter
clustered_no_outliar.hvplot.scatter(
    x="TotalCoinsMined",
    y="TotalCoinSupply",
    hover_cols=["CoinName"],
    by="class",
    yticks=(10000000, 15000000,20000000,25000000)
)