# Clustering Crypto

In [5]:
import requests
import pandas as pd
import matplotlib.pyplot as plt
import hvplot.pandas
import plotly.express as px
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans


### Fetching Cryptocurrency Data

In [6]:
url = "https://min-api.cryptocompare.com/data/all/coinlist"

requests.get(url)
response_data = requests.get(url)

In [7]:
import json

# Formatting as json
data = response_data.json()

In [8]:
# Creating a DataFrame
crypto_df = pd.DataFrame.from_dict(data["Data"],orient='index')



### Data Preprocessing

In [13]:
# Traspose the data
crypto_df.dtypes

# Keep only cryptocurrencies that are on trading
crypto_df = crypto_df[crypto_df.IsTrading == True]


# Keep only cryptocurrencies with a working algorithm

# Dropping rows where there are 'N/A' text values
crypto_df = crypto_df[crypto_df.Algorithm != "N/A"]
crypto_df = crypto_df[crypto_df.TotalCoinSupply != "N/A"]

# Remove the "IsTrading" column
crypto_df.drop(['IsTrading'], axis=1,inplace = True)

# Removing rows with at least 1 null value
crypto_df = crypto_df.dropna(how='any',axis=0)

# Removing rows with cryptocurrencies without coins mined
crypto_df = crypto_df[crypto_df.TotalCoinsMined != 0]

### Reducing Dimensions Using PCA

In [20]:
# Using PCA to reduce dimension to 3 principal components
df_aux = crypto_df['CoinName']
df_aux.head()

1337            EliteCoin
1CR               1Credit
42                42 Coin
8BIT            8BIT Coin
AAC     Acute Angle Cloud
Name: CoinName, dtype: object

In [15]:
# Creating a DataFrame with the principal components data
crypto_df.head()

Unnamed: 0,Id,Url,ImageUrl,ContentCreatedOn,Name,Symbol,CoinName,FullName,Algorithm,ProofType,...,TotalCoinsFreeFloat,SortOrder,Sponsored,Taxonomy,Rating,TotalCoinsMined,BlockNumber,NetHashesPerSecond,BlockReward,BlockTime
1337,20824,/coins/1337/overview,/media/35520987/elite.png,1465838687,1337,1337,EliteCoin,EliteCoin (1337),X13,PoW/PoS,...,,577,False,"{'Access': '', 'FCA': '', 'FINMA': '', 'Indust...","{'Weiss': {'Rating': '', 'TechnologyAdoptionRa...",29373200000.0,2432270.0,0,0.0,60.0
1CR,5406,/coins/1cr/overview,/media/20175/1cr.png,1430211162,1CR,1CR,1Credit,1Credit (1CR),Scrypt,PoW,...,,374,False,"{'Access': '', 'FCA': '', 'FINMA': '', 'Indust...","{'Weiss': {'Rating': '', 'TechnologyAdoptionRa...",88213.0,0.0,0,0.0,0.0
42,4321,/coins/42/overview,/media/35650717/42.jpg,1427211129,42,42,42 Coin,42 Coin (42),Scrypt,PoW/PoS,...,,34,False,"{'Access': '', 'FCA': '', 'FINMA': '', 'Indust...","{'Weiss': {'Rating': '', 'TechnologyAdoptionRa...",41.99995,162698.0,0,0.0,0.0
8BIT,5327,/coins/8bit/overview,/media/20176/8bit.png,1430210081,8BIT,8BIT,8BIT Coin,8BIT Coin (8BIT),Scrypt,PoW/PoS,...,,295,False,"{'Access': '', 'FCA': '', 'FINMA': '', 'Indust...","{'Weiss': {'Rating': '', 'TechnologyAdoptionRa...",1467841.0,0.0,0,0.0,0.0
AAC,925291,/coins/aac/overview,/media/34478227/acuteangle.png,1535106240,AAC,AAC,Acute Angle Cloud,Acute Angle Cloud (AAC),ECC 256K1,DPOS,...,,3271,False,"{'Access': 'Permissionless', 'FCA': 'Exchange,...","{'Weiss': {'Rating': '', 'TechnologyAdoptionRa...",1000000000.0,0.0,0,0.0,0.0


In [21]:
#remove uneeded colomns
crypto_df.drop(['CoinName','PreMinedValue','TotalCoinsFreeFloat','BuiltOn','SmartContractAddress','Taxonomy','FullName','Name','Symbol','ImageUrl','Url','Id','ContentCreatedOn'], axis=1,inplace = True)

In [25]:
# to change objects to floats .astype() 
crypto_df['TotalCoinSupply'] = crypto_df.TotalCoinSupply.astype(float)
crypto_df['SortOrder'] = crypto_df.SortOrder.astype(float)
print(crypto_df.dtypes)

Algorithm              object
ProofType              object
FullyPremined          object
TotalCoinSupply       float64
DecimalPlaces           int64
SortOrder             float64
Sponsored                bool
Rating                 object
TotalCoinsMined       float64
BlockNumber           float64
NetHashesPerSecond     object
BlockReward           float64
BlockTime             float64
dtype: object


In [23]:
# Creating dummies variables for text features
df_encoded = pd.get_dummies(crypto_df, columns=["Algorithm","ProofType","FullyPremined","Sponsored"])

In [28]:
# Standardizing data
scaler = StandardScaler()
scaler.fit(df_encoded)

TypeError: float() argument must be a string or a number, not 'dict'

### Clustering Crytocurrencies Using K-Means

#### Finding the Best Value for `k` Using the Elbow Curve

In [29]:
inertia = []
k = list(range(1, 11))

# Calculate the inertia for the range ok k values
# Create the Elbow Curve using hvPlot

for i in k:
    km = KMeans(n_clusters=i, random_state=0)
    km.fit(df_aux)
    inertia.append(km.inertia_)

# Create the Elbow Curve using hvPlot
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.hvplot.line(x="k", y="inertia", xticks=k, title="Elbow Curve")

ValueError: could not convert string to float: 'EliteCoin'

Running K-Means with `k=<you best value for k here>`

In [17]:
def get_clusters(k, data):
    # Initialize the K-Means model
    model = KMeans(n_clusters=k, random_state=0)

    # Fit the model
    model.fit(data)

    # Predict clusters
    predictions = model.predict(data)

    
    data["class"] = model.labels_

    return data


In [18]:
# Creating a new DataFrame including predicted clusters and cryptocurrencies features
five_clusters = get_clusters(5, df_pca)
five_clusters.head(20)

NameError: name 'df_pca' is not defined

### Visualizing Results

#### 3D-Scatter with Clusters

In [30]:
# Creating a 3D-Scatter with the PCA data and the clusters

# Creating a 3D-Scatter with the PCA data and the clusters
import plotly.express as px
fig = px.scatter_3d(
    result,
    x="p3",
    y="p2",
    z="p1",
    color="class",
    symbol="class",
    hover_name="CoinName",
    hover_data=["Algorithm"],
    width=800,
)
fig.update_layout(legend=dict(x=0, y=1))
fig.show()

NameError: name 'result' is not defined

#### Table of Tradable Cryptocurrencies

In [None]:
# Table with tradable cryptos
result.hvplot.table()


#### Scatter Plot with Tradable Cryptocurrencies

In [None]:
# Scaling data to create the scatter plot



In [None]:
# Plot the scatter with x="TotalCoinsMined" and y="TotalCoinSupply"

