In [1]:
# Import required libraries and dependencies
import pandas as pd
import hvplot.pandas
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# Load the data into a Pandas DataFrame
df_market_data = pd.read_csv("Resources/crypto_market_data.csv", index_col="coin_id")

# Display sample data
print(df_market_data.head(10))

# Generate summary statistics
print(df_market_data.describe())

# Plot your data to see what's in your DataFrame
df_market_data.hvplot.line(width=800, height=400, rot=90)


                 price_change_percentage_24h  price_change_percentage_7d  \
coin_id                                                                    
bitcoin                              1.08388                     7.60278   
ethereum                             0.22392                    10.38134   
tether                              -0.21173                     0.04935   
ripple                              -0.37819                    -0.60926   
bitcoin-cash                         2.90585                    17.09717   
binancecoin                          2.10423                    12.85511   
chainlink                           -0.23935                    20.69459   
cardano                              0.00322                    13.99302   
litecoin                            -0.06341                     6.60221   
bitcoin-cash-sv                      0.92530                     3.29641   

                 price_change_percentage_14d  price_change_percentage_30d  \
coin_id   

### Prepare the Data

In [5]:
# Normalize the data
scaler = StandardScaler()
df_market_data_scaled = scaler.fit_transform(df_market_data)

# Create a DataFrame with the scaled data
df_market_data_scaled = pd.DataFrame(df_market_data_scaled, index=df_market_data.index)

# Display sample data
print(df_market_data_scaled.head())


                     0         1         2         3         4         5  \
coin_id                                                                    
bitcoin       0.508529  0.493193  0.772200  0.235460 -0.067495 -0.355953   
ethereum      0.185446  0.934445  0.558692 -0.054341 -0.273483 -0.115759   
tether        0.021774 -0.706337 -0.021680 -0.061030  0.008005 -0.550247   
ripple       -0.040764 -0.810928  0.249458 -0.050388 -0.373164 -0.458259   
bitcoin-cash  1.193036  2.000959  1.760610  0.545842 -0.291203 -0.499848   

                     6  
coin_id                 
bitcoin      -0.251637  
ethereum     -0.199352  
tether       -0.282061  
ripple       -0.295546  
bitcoin-cash -0.270317  


### Find the Best Value for k Using the Original Data.

In [6]:
# Elbow Method
inertia = []
k_list = range(1, 12)

for k in k_list:
    model = KMeans(n_clusters=k)
    model.fit(df_market_data_scaled)
    inertia.append(model.inertia_)

# Creating a DataFrame with the data to plot the Elbow curve
elbow_data = {"k": k_list, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)

# Plotting the Elbow curve
df_elbow.hvplot.line(x="k", y="inertia", title="Elbow Curve", xticks=k_list)




#### Answer the following question: 

**Question:** What is the best value for `k`?

**Answer:** 

---

### Cluster Cryptocurrencies with K-means Using the Original Data

In [7]:
# Initialize the K-Means model
model = KMeans(n_clusters=4)  # Replace 4 with your optimal k value

# Fit the model
model.fit(df_market_data_scaled)

# Predict clusters
clusters = model.predict(df_market_data_scaled)

# Add a new column to the DataFrame with the predicted clusters
df_market_data["cluster"] = clusters

# Display sample data
print(df_market_data.head())

# Scatter plot
df_market_data.hvplot.scatter(x="price_change_percentage_24h", y="price_change_percentage_7d", by="cluster", hover_cols=["coin_id"])




              price_change_percentage_24h  price_change_percentage_7d  \
coin_id                                                                 
bitcoin                           1.08388                     7.60278   
ethereum                          0.22392                    10.38134   
tether                           -0.21173                     0.04935   
ripple                           -0.37819                    -0.60926   
bitcoin-cash                      2.90585                    17.09717   

              price_change_percentage_14d  price_change_percentage_30d  \
coin_id                                                                  
bitcoin                           6.57509                      7.67258   
ethereum                          4.80849                      0.13169   
tether                            0.00640                     -0.04237   
ripple                            2.24984                      0.23455   
bitcoin-cash                     14.75334   

### Optimize Clusters with Principal Component Analysis.

In [8]:
# Create a PCA model instance
pca = PCA(n_components=3)

# Fit and transform PCA
df_market_data_pca = pca.fit_transform(df_market_data_scaled)

# Create a DataFrame with the PCA data
df_pca = pd.DataFrame(df_market_data_pca, columns=["PC1", "PC2", "PC3"], index=df_market_data.index)

# Display sample data
print(df_pca.head())

# Explained variance
print(pca.explained_variance_ratio_)


                   PC1       PC2       PC3
coin_id                                   
bitcoin      -0.600667  0.842760  0.461595
ethereum     -0.458261  0.458466  0.952877
tether       -0.433070 -0.168126 -0.641752
ripple       -0.471835 -0.222660 -0.479053
bitcoin-cash -1.157800  2.041209  1.859715
[0.3719856  0.34700813 0.17603793]
