# Clustering Crypto

In [None]:
# Initial imports
import requests
import pandas as pd
import matplotlib.pyplot as plt
import hvplot.pandas
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

### Fetching Cryptocurrency Data

In [None]:
# Use the following endpoint to fetch json data
url = "https://min-api.cryptocompare.com/data/all/coinlist"

In [None]:
# Create a DataFrame 
# HINT: You will need to use the 'Data' key from the json response, then transpose the DataFrame.

r = requests.get(url)
data = r.json()

In [None]:
# Alternatively, use the provided csv file:
# file_path = Path("Resources/crypto_data.csv")

# Create a DataFrame

In [None]:
list(data.keys())

In [None]:
data = data['Data']

In [None]:
crypto_df = pd.DataFrame.from_dict(data)

In [None]:
crypto_df = crypto_df.T
crypto_df.head()

### Data Preprocessing

In [None]:
# Keep only necessary columns:
# 'CoinName','Algorithm','IsTrading','ProofType','TotalCoinsMined','TotalCoinSupply'

crypto_df.columns
crypto_df.drop(columns = [
    'BlockNumber',
    'BlockReward',
    'BlockTime',
    'BuiltOn',
    'ContentCreatedOn',
    'DecimalPlaces',
    'FullName',
    'FullyPremined',
    'Id',
    'ImageUrl',
    'Name',
    'NetHashesPerSecond',
    'PreMinedValue',
    'Rating',
    'SmartContractAddress',
    'SortOrder',
    'Sponsored',
    'Symbol',
    'Taxonomy',
    'TotalCoinsFreeFloat',
    'Url'
], inplace = True)

In [None]:
crypto_df.head()

In [None]:
# Keep only cryptocurrencies that are trading

crypto_df = crypto_df.loc[crypto_df['IsTrading'] == True]

In [None]:
# Keep only cryptocurrencies with a working algorithm

crypto_df = crypto_df.loc[crypto_df['Algorithm'] != 'N/A']

In [None]:
# Remove the "IsTrading" column

crypto_df = crypto_df.drop(columns = 'IsTrading')

In [None]:
# Remove rows with at least 1 null value

crypto_df.isnull().sum()

In [None]:
crypto_df = crypto_df.dropna()
crypto_df.isnull().sum()

In [None]:
# Remove rows with cryptocurrencies having no coins mined

crypto_df = crypto_df.loc[crypto_df['TotalCoinsMined'] != 0]

In [None]:
# Drop rows where there are 'N/A' text values

crypto_df = crypto_df[crypto_df!='N/A']

In [None]:
# Store the 'CoinName'column in its own DataFrame prior to dropping it from crypto_df

coinname_df = pd.DataFrame(
    data = crypto_df, columns = ['CoinName'])

In [None]:
# Drop the 'CoinName' column since it's not going to be used on the clustering algorithm

crypto_df = crypto_df.drop(columns = 'CoinName')
crypto_df.head()

In [None]:
# Create dummy variables for text features

crypto_dummies= pd.get_dummies(crypto_df, columns=['Algorithm', 'ProofType'])
crypto_dummies.head()

In [None]:
# Standardize data

crypto_scaled = StandardScaler().fit_transform(crypto_dummies)
print(crypto_scaled[0:1])


In [None]:
crypto_df.head()

In [None]:
crypto_scaled = crypto_scaled[~np.isnan(crypto_scaled).any(axis=1)]
np.isnan(crypto_scaled).sum()
0

### Reducing Dimensions Using PCA

In [None]:
# Use PCA to reduce dimensions to 3 principal components

pca = PCA(n_components=3)

crypto_pca = pca.fit_transform(crypto_scaled)

In [None]:
# Create a DataFrame with the principal components data

pca_df = pd.DataFrame(
    data=crypto_pca, columns=["PC 1", "PC 2", "PC 3"]
)
pca_df.head()


### Clustering Crytocurrencies Using K-Means

#### Find the Best Value for `k` Using the Elbow Curve

In [None]:
inertia = []
k = list(range(1, 11))

# Calculate the inertia for the range of k values

for i in k:
    km = KMeans(n_clusters=i, random_state=0)
    km.fit(crypto_pca)
    inertia.append(km.inertia_)

# Create the Elbow Curve using hvPlot

elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.hvplot.line(x="k", y="inertia", xticks=k, title="Elbow Curve")

Running K-Means with `k=<your best value for k here>`

In [None]:
# Initialize the K-Means model

model = KMeans(n_clusters=5, random_state=0)

# Fit the model
model.fit(pca_df)

# Predict clusters
predictions = model.predict(pca_df)

# Create a new DataFrame including predicted clusters and cryptocurrencies features

pca_df['class'] = model.labels_

df_merged = crypto_df.join(coinname_df, how='outer')
df_merged = df_merged.append(pca_df)

df_merged.head(20)

### Visualizing Results

#### Scatter Plot with Tradable Cryptocurrencies

In [None]:
# Create a 3D-Scatter with the PCA data and the clusters

fig = px.scatter_3d(
    df_merged,
    x="PC 1",
    y="PC 2",
    z="PC 3",
    hover_name='CoinName',
    hover_data= ['Algorithm'],
    color="class",
    symbol="class",
)
fig.update_layout(legend=dict(x=0, y=1))
fig.show()

In [28]:
# Plot the scatter with x="TotalCoinsMined" and y="TotalCoinSupply"


#### Table of Tradable Cryptocurrencies

In [29]:
# Table with tradable cryptos

columns = ['CoinName', 'Algorithm', 'ProofType', 'TotalCoinSupply', 'TotalCoinsMined', 'class']

In [None]:
# Print the total number of tradable cryptocurrencies
df_merged.hvplot.table(columns)

#### Scatter Plot with Tradable Cryptocurrencies

In [None]:
df_merged['TotalCoinsMined'] = df_merged['TotalCoinsMined'].astype(float) / 1000000
df_merged['TotalCoinSupply'] = df_merged['TotalCoinSupply'].astype(float) / 1000000

In [None]:
# Plot the scatter with x="TotalCoinsMined" and y="TotalCoinSupply"

df_merged.hvplot(
    kind="scatter", 
    x="TotalCoinsMined", 
    y="TotalCoinSupply", 
    c='class', 
    colormap="viridis", 
    hover_cols=['CoinName']
)