In [None]:
# Initial imports
import pandas as pd
import hvplot.pandas
import plotly.express as px
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

In [None]:
# Load the crypto_data.csv dataset.
crypto_df = pd.read_csv("crypto_data.csv",index_col=0)
print(crypto_df.shape)
crypto_df.head()

In [None]:
# Keep all the cryptocurrencies that are being traded.
crypto_trade = crypto_df.query("IsTrading == True")
print(crypto_trade.shape)
crypto_trade.head()

In [None]:
# Remove the "IsTrading" column. 
crypto_data = crypto_trade.drop(['IsTrading'], axis=1)
print(crypto_data.shape)
crypto_data.head()

In [None]:
# Find null values.
crypto_data.isnull().sum()

In [None]:
# Remove rows that have at least 1 null value.
crypto_data = crypto_data.dropna()
crypto_data.isnull().sum()

In [None]:
# Keep the rows where coins are mined.
crypto_data= crypto_data[crypto_data['TotalCoinsMined'] > 0]
print(crypto_data.shape)
crypto_data.head()

In [None]:
# Create a new DataFrame that holds only the cryptocurrencies names.
coin_name = pd.DataFrame(crypto_df.CoinName)
print(coin_name.shape)
coin_name.head()

In [None]:
# Drop the 'CoinName' column since it's not going to be used on the clustering algorithm.
crypto_data = crypto_data.drop(['CoinName'], axis=1)
print(crypto_data.shape)
crypto_data.head()

In [None]:
# Use get_dummies() to create variables for text features.
X = pd.get_dummies(crypto_data, columns=["Algorithm", "ProofType"])
print(X.shape)
X.head(10)

In [None]:
# Standardize the data with StandardScaler().
scaler = StandardScaler()
crypto_scaled = scaler.fit_transform(X)
crypto_scaled[0:5]

In [None]:
# Using PCA to reduce dimension to three principal components.
pca = PCA(n_components=3)
crypto_pca = pca.fit_transform(crypto_scaled)
crypto_pca

In [None]:
# Create a DataFrame with the three principal components.
df_crypto_pca = pd.DataFrame(data=crypto_pca, columns=['PC1', 'PC2', 'PC3'])
df_crypto_pca = df_crypto_pca.set_index(crypto_data.index)
df_crypto_pca.head()

In [None]:
# Create an elbow curve to find the best value for K.
inertia = []
k = list(range(1, 11))

# Calculate the inertia for the range of K values.
for i in k:
    km = KMeans(n_clusters=i, random_state=0)
    km.fit(df_crypto_pca)
    inertia.append(km.inertia_)

# Create the Elbow Curve using hvPlot.
elbow_data = {"k":k, "inertia":inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.hvplot.line(x="k", y="inertia", xticks=k, title="Elbow Curve")

In [None]:
# Initialize the K-Means model.
model = KMeans(n_clusters=4, random_state=0)

# Fit the model
model.fit(df_crypto_pca)

# Predict clusters
predict =  model.predict(df_crypto_pca)
predict

In [None]:
# Create a new DataFrame including predicted clusters and cryptocurrencies features.
# Concatentate the crypto_df and pcs_df DataFrames on the same columns.
clustered_df = pd.concat([crypto_data,df_crypto_pca], axis=1)

#  Add a new column, "CoinName" to the clustered_df DataFrame that holds the names of the cryptocurrencies. 
clustered_df['CoinName'] = coin_name['CoinName']

#  Add a new column, "Class" to the clustered_df DataFrame that holds the predictions.
clustered_df['Class'] = model.labels_

# Print the shape of the clustered_df
print(clustered_df.shape)
clustered_df.head(10)

In [None]:
# Creating a 3D-Scatter with the PCA data and the clusters
fig = px.scatter_3d(
    clustered_df,
    x="PC1",
    y="PC2",
    z="PC3",
    color="Class",
    symbol="Class",
    width=800,
    hover_name="CoinName",
    hover_data=["Algorithm"]
)
fig.update_layout(legend=dict(x=0, y=1))

In [None]:
 Create a table with tradable cryptocurrencies.
clustered_df.hvplot.table(columns=['CoinName', 'Algorithm', 'ProofType', 'TotalCoinSupply', 'TotalCoinsMined', 'Class'], sortable=True, selectable=True)


In [None]:
# Print the total number of tradable cryptocurrencies.
print(f"Total number of tradeable cryptocurrencies: {clustered_df.shape[0]}")

In [None]:
# Scaling data to create the scatter plot with tradable cryptocurrencies.
scaled_df = MinMaxScaler().fit_transform(clustered_df[['TotalCoinSupply','TotalCoinsMined']])
scaled_df

In [None]:
# Create a new DataFrame that has the scaled data with the clustered_df DataFrame index.
plot_df = pd.DataFrame(data=scaled_df, columns=['TotalCoinSupply', 'TotalCoinsMined'])
plot_df = plot_df.set_index(clustered_df.index)

# Add the "CoinName" column from the clustered_df DataFrame to the new DataFrame.
plot_df['CoinName'] = clustered_df['CoinName']

# Add the "Class" column from the clustered_df DataFrame to the new DataFrame. 
plot_df['Class'] = clustered_df['Class']
plot_df.head(10)

In [None]:
# Create a hvplot.scatter plot using x="TotalCoinsMined" and y="TotalCoinSupply".
plot_df.hvplot.scatter(x="TotalCoinsMined", y="TotalCoinSupply", by="Class")