# Clustering Crypto

In [46]:
# Initial imports
import pandas as pd
import hvplot.pandas
from pathlib import Path
import plotly.express as px
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

### Data Preprocessing

In [47]:
# Loade the cryptocurrencies data
file_path = Path("Resources/crypto_data.csv")
crypto_df = pd.read_csv(file_path, index_col=0)
crypto_df.head(10)

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
365,365Coin,X11,True,PoW/PoS,,2300000000
404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
611,SixEleven,SHA-256,True,PoW,,611000
808,808,SHA-256,True,PoW/PoS,0.0,0
1337,EliteCoin,X13,True,PoW/PoS,29279420000.0,314159265359
2015,2015 coin,X11,True,PoW/PoS,,0
BTC,Bitcoin,SHA-256,True,PoW,17927180.0,21000000
ETH,Ethereum,Ethash,True,PoW,107684200.0,0
LTC,Litecoin,Scrypt,True,PoW,63039240.0,84000000


In [48]:
# Keep only cryptocurrencies that are on trading
def TradeClean(IsTrading):
    if crypto_df["IsTrading"] == "True":
        return 1
    else:
        return 0
    
crypto_df["IsTrading"] = crypto_df["IsTrading"].apply(TradeClean)
crypto_df.head()

# for row in crypto_df.iterrows:
#     if crypto_df["IsTrading"] == "True":
#         print("True")
#     elif crypto_df["IsTrading"] == "False":
#         print("NaN")
# crypto_df.head()

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [49]:
# Keep only cryptocurrencies with a working algorithm
def Proof(ProofType):
    if crypto_df["ProofType"] == "PoW":
        return "True"
    elif crypto_df["ProofType"] == "PoS":
        return "True"
    else:
        return "NaN"
crypto_df["ProofType"] = crypto_df["ProofType"].apply(Proof)
crypto_df.head()

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [50]:
# Remove the "IsTrading" column
crypto_df = crypto_df.drop(columns = "IsTrading")
crypto_df.head()

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,PoW/PoS,41.99995,42
365,365Coin,X11,PoW/PoS,,2300000000
404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
611,SixEleven,SHA-256,PoW,,611000
808,808,SHA-256,PoW/PoS,0.0,0


In [52]:
# Remove rows with at least 1 null value
crypto_df.dropna(inplace = True)
crypto_df.head()

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,PoW/PoS,41.99995,42
404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
808,808,SHA-256,PoW/PoS,0.0,0
1337,EliteCoin,X13,PoW/PoS,29279420000.0,314159265359
BTC,Bitcoin,SHA-256,PoW,17927180.0,21000000


In [53]:
# Remove rows with cryptocurrencies without coins mined
crypto_df.dropna(axis = "columns", inplace = True)
crypto_df.head()

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,PoW/PoS,41.99995,42
404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
808,808,SHA-256,PoW/PoS,0.0,0
1337,EliteCoin,X13,PoW/PoS,29279420000.0,314159265359
BTC,Bitcoin,SHA-256,PoW,17927180.0,21000000


In [55]:
# Fetch the cryptocurrencies names prior to drop them from crypto_df
CoinName = crypto_df["CoinName"]
CoinName

42                 42 Coin
404                404Coin
808                    808
1337             EliteCoin
BTC                Bitcoin
ETH               Ethereum
LTC               Litecoin
DASH                  Dash
XMR                 Monero
ETC       Ethereum Classic
ZEC                  ZCash
BTS              Bitshares
DGB               DigiByte
BTCD           BitcoinDark
XBS               Bitstake
XPY                PayCoin
PRC            ProsperCoin
KOBO              KoboCoin
SPR             Spreadcoin
ACOIN                ACoin
AERO             Aero Coin
APEX              ApexCoin
ARCH              ArchCoin
ARG               Argentum
AUR            Aurora Coin
BET               BetaCoin
BLU               BlueCoin
XMY             MyriadCoin
MOON              MoonCoin
ZET               ZetaCoin
                ...       
LSK                   Lisk
XHI                 HiCoin
XWC              WhiteCoin
DOT                Dotcoin
FSC         FriendshipCoin
THC           The Hempcoin
F

In [61]:
# Remove the cryptocurrency name since it's not going to be used on the clustering algorithm
crypto_df.drop(columns = ["CoinName"], inplace = True)
crypto_df.head()

KeyError: "['CoinName'] not found in axis"

In [None]:
# Create dummies variables for text features
X = pd.get_dummies(columns)

In [None]:
# Standardize data
scaler = StandardScaler().fit(X)
X = scaler.transform(X)

### Reducing Dimensions Using PCA

In [None]:
# Use PCA to reduce dimension to 3 principal components
crypto_scaled = StandardScaler().fit_transform(crypto_df)
print(crypto_scaled[0:5])

pca = PCA(n_components=3)

crypto_pca = pca.fit_transform(crypto_scaled)

# Create a DataFrame with the principal components data
crypto_df_pca = pd.DataFrame(
    data=crypto_pca, columns=["PC1", "PC2", "PC3"]
)
crypto_df_pca.head()

### Clustering Crytocurrencies Using K-Means

#### Find the Best Value for `k` Using the Elbow Curve

In [None]:
inertia = []
k = list(range(1, 11))

# Looking for the best k
for i in k:
    km = KMeans(n_clusters=i, random_state=0)
    km.fit(crypto_df_pca)
    inertia.append(km.inertia_)

# Define a DataFrame to plot the Elbow Curve using hvPlot
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.hvplot.line(x="k", y="inertia", title="Elbow Curve", xticks=k)


Running K-Means with `k=4`

In [None]:
# Initialize the K-Means model
model = KMeans(n_clusters=4, random_state=5)
# Fit the model
model.fit(crypto_df_pca)
# Predict clusters
predictions = model.predict(crypto_df_pca)
print(predictions)
# Create a new DataFrame including predicted clusters and cryptocurrencies features
crypto_df_pca["class"] = model.labels_
crypto_df_pca.head()

In [None]:
# include "Algorithm", "ProofType", "TotalCoinsMined", "TotalCoinSupply", "PC 1", "PC 2", "PC 3", "CoinName", "Class"
clustered_df = pd.DataFrame()

### Visualizing Results

#### 3D-Scatter with Clusters

In [None]:
# Create a 3D-Scatter with the PCA data and the clusters
fig = px.scatter_3d(
    crypto_df_pca,
    x="PC1",
    y="PC2",
    z="PC3",
    color="class",
    symbol="class",
    width=800
)
fig.update_layout(legend=dict(x=0, y=1))
fig.show()

#### Table of Tradable Cryptocurrencies

In [None]:
# Table with tradable cryptos
crypto_table = 

In [None]:
# Print the total number of tradable cryptocurrencies
crypto_table.count()

#### Scatter Plot with Tradable Cryptocurrencies

In [None]:
# Scale data to create the scatter plot
crypto_scaled = StandardScaler().fit_transform(crypto_table)
print(crypto_scaled[0:5])

In [None]:
# Plot the scatter with x="TotalCoinsMined" and y="TotalCoinSupply"
crypto_scaled.hvplot.scatter(x="TotalCoinsMined", y="TotalCoinSupply", by="class", hover_cols=["CoinName"])