# Cryptocurrency Clustering Analysis

## 1. Import Libraries

In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import hvplot.pandas

## 2. Load and Summarize Data

In [5]:
# Load the CSV file into a DataFrame
crypto_market_data_path = 'Starter_Code/Resources/crypto_market_data.csv'
crypto_market_data = pd.read_csv(crypto_market_data_path)

# Display summary statistics
summary_stats = crypto_market_data.describe()
summary_stats

FileNotFoundError: [Errno 2] No such file or directory: 'Starter_Code/Resources/crypto_market_data.csv'

## 3. Prepare the Data

In [None]:
# Standardize the data
scaler = StandardScaler()
scaled_data = scaler.fit_transform(crypto_market_data.iloc[:, 1:])

# Create a DataFrame with the scaled data
scaled_df = pd.DataFrame(scaled_data, columns=crypto_market_data.columns[1:])
scaled_df.index = crypto_market_data['coin_id']
scaled_df.head()

## 4. Find the Best Value for k Using the Original Data

In [None]:
# Elbow method to find the best k
inertia = []
k_values = range(1, 12)

for k in k_values:
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(scaled_df)
    inertia.append(kmeans.inertia_)

# Plot the elbow curve
plt.figure(figsize=(10, 6))
plt.plot(k_values, inertia, marker='o')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('Inertia')
plt.title('Elbow Method for Optimal k')
plt.show()

## 5. Cluster Cryptocurrencies with K-means

In [None]:
# Initialize K-means with the best k value (assume 4 for this example)
kmeans = KMeans(n_clusters=4, random_state=42)
crypto_clusters = kmeans.fit_predict(scaled_df)

# Add the cluster labels to the original DataFrame
crypto_market_data['Cluster'] = crypto_clusters

## 6. Visualize Clusters Using hvPlot

In [None]:
# Scatter plot with hvPlot
crypto_market_data.hvplot.scatter(
    x='price_change_percentage_24h',
    y='price_change_percentage_7d',
    by='Cluster',
    hover_cols=['coin_id'],
    title='Cryptocurrency Clusters'
)

## 7. Optimize Clusters with PCA

In [None]:
# Perform PCA and reduce to three principal components
pca = PCA(n_components=3)
pca_data = pca.fit_transform(scaled_df)

# Create a DataFrame with PCA data
pca_df = pd.DataFrame(pca_data, columns=['PC1', 'PC2', 'PC3'])
pca_df.index = crypto_market_data['coin_id']
pca_df.head()

# Explained variance
explained_variance = pca.explained_variance_ratio_
total_explained_variance = explained_variance.sum()
total_explained_variance

## 8. Find the Best Value for k Using PCA Data

In [None]:
# Elbow method with PCA data
pca_inertia = []

for k in k_values:
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(pca_df)
    pca_inertia.append(kmeans.inertia_)

# Plot the elbow curve for PCA data
plt.figure(figsize=(10, 6))
plt.plot(k_values, pca_inertia, marker='o')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('Inertia')
plt.title('Elbow Method for Optimal k (PCA Data)')
plt.show()

## 9. Cluster Cryptocurrencies with PCA Data

In [None]:
# Initialize K-means with the best k value (assume 4 for this example)
kmeans_pca = KMeans(n_clusters=4, random_state=42)
pca_clusters = kmeans_pca.fit_predict(pca_df)

# Add the cluster labels to the PCA DataFrame
pca_df['Cluster'] = pca_clusters

## 10. Compare Results

In [None]:
# Composite plot for elbow curves
elbow_original = pd.DataFrame({'k': k_values, 'inertia': inertia})
elbow_pca = pd.DataFrame({'k': k_values, 'inertia': pca_inertia})

elbow_plot = elbow_original.hvplot.line(x='k', y='inertia', label='Original Data') + elbow_pca.hvplot.line(x='k', y='inertia', label='PCA Data')
elbow_plot.opts(title='Elbow Curve Comparison')

# Composite plot for clustering comparison
cluster_original_plot = crypto_market_data.hvplot.scatter(
    x='price_change_percentage_24h',
    y='price_change_percentage_7d',
    by='Cluster',
    hover_cols=['coin_id'],
    title='Cryptocurrency Clusters'
)

cluster_pca_plot = pca_df.hvplot.scatter(
    x='PC1',
    y='PC2',
    by='Cluster',
    hover_cols=['coin_id'],
    title='Cryptocurrency Clusters (PCA Data)'
)

cluster_comparison_plot = cluster_original_plot + cluster_pca_plot
cluster_comparison_plot.opts(title='Cluster Comparison')