In [155]:
import pandas as pd
from pathlib import Path
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import hvplot.pandas

In [177]:
# Read in the CSV file as a Pandas DataFrame
crypto_market_df = pd.read_csv(
    Path("crypto_market_data.csv")
)

# Review the DataFrame
crypto_market_df.head()

Unnamed: 0,coin_id,price_change_percentage_24h,price_change_percentage_7d,price_change_percentage_14d,price_change_percentage_30d,price_change_percentage_60d,price_change_percentage_200d,price_change_percentage_1y
0,bitcoin,1.08388,7.60278,6.57509,7.67258,-3.25185,83.5184,37.51761
1,ethereum,0.22392,10.38134,4.80849,0.13169,-12.8889,186.77418,101.96023
2,tether,-0.21173,0.04935,0.0064,-0.04237,0.28037,-0.00542,0.01954
3,ripple,-0.37819,-0.60926,2.24984,0.23455,-17.55245,39.53888,-16.60193
4,bitcoin-cash,2.90585,17.09717,14.75334,15.74903,-13.71793,21.66042,14.49384


In [178]:
# Check the DataFrame data types
crypto_market_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41 entries, 0 to 40
Data columns (total 8 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   coin_id                       41 non-null     object 
 1   price_change_percentage_24h   41 non-null     float64
 2   price_change_percentage_7d    41 non-null     float64
 3   price_change_percentage_14d   41 non-null     float64
 4   price_change_percentage_30d   41 non-null     float64
 5   price_change_percentage_60d   41 non-null     float64
 6   price_change_percentage_200d  41 non-null     float64
 7   price_change_percentage_1y    41 non-null     float64
dtypes: float64(7), object(1)
memory usage: 2.7+ KB


In [181]:

# Scaling the numeric columns
crypto_market_df_scaled = StandardScaler().fit_transform(crypto_market_df[["price_change_percentage_24h", "price_change_percentage_7d", "price_change_percentage_14d", "price_change_percentage_30d", "price_change_percentage_60d", "price_change_percentage_200d", "price_change_percentage_1y"]])

# Creating a DataFrame with with the scaled data
crypto_market_df_transformed = pd.DataFrame(crypto_market_df_scaled, columns=["price_change_percentage_24h", "price_change_percentage_7d", "price_change_percentage_14d", "price_change_percentage_30d", "price_change_percentage_60d", "price_change_percentage_200d", "price_change_percentage_1y"])


# Display sample data
crypto_market_df_transformed.head()

Unnamed: 0,price_change_percentage_24h,price_change_percentage_7d,price_change_percentage_14d,price_change_percentage_30d,price_change_percentage_60d,price_change_percentage_200d,price_change_percentage_1y
0,0.508529,0.493193,0.7722,0.23546,-0.067495,-0.355953,-0.251637
1,0.185446,0.934445,0.558692,-0.054341,-0.273483,-0.115759,-0.199352
2,0.021774,-0.706337,-0.02168,-0.06103,0.008005,-0.550247,-0.282061
3,-0.040764,-0.810928,0.249458,-0.050388,-0.373164,-0.458259,-0.295546
4,1.193036,2.000959,1.76061,0.545842,-0.291203,-0.499848,-0.270317


In [176]:
# Transform the Card Type column using get_dummies()
coin_id = pd.get_dummies(crypto_market_df["coin_id"])

# Display sample data
coin_id.head()

Unnamed: 0,binance-usd,binancecoin,bitcoin,bitcoin-cash,bitcoin-cash-sv,cardano,cdai,celsius-degree-token,chainlink,cosmos,...,stellar,tether,tezos,theta-token,tron,true-usd,usd-coin,vechain,wrapped-bitcoin,zcash
0,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [201]:
# Concatenate the df_shopping_transformed and the card_dummies DataFrames
crypto_market_df_transformed = pd.concat([crypto_market_df_transformed, coin_id], axis=1)

# Display sample data
crypto_market_df_transformed.head()

Unnamed: 0,price_change_percentage_24h,price_change_percentage_7d,price_change_percentage_14d,price_change_percentage_30d,price_change_percentage_60d,price_change_percentage_200d,price_change_percentage_1y,binance-usd,binancecoin,bitcoin,...,stellar,tether,tezos,theta-token,tron,true-usd,usd-coin,vechain,wrapped-bitcoin,zcash
0,0.508529,0.493193,0.7722,0.23546,-0.067495,-0.355953,-0.251637,0,0,1,...,0,0,0,0,0,0,0,0,0,0
1,0.185446,0.934445,0.558692,-0.054341,-0.273483,-0.115759,-0.199352,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0.021774,-0.706337,-0.02168,-0.06103,0.008005,-0.550247,-0.282061,0,0,0,...,0,1,0,0,0,0,0,0,0,0
3,-0.040764,-0.810928,0.249458,-0.050388,-0.373164,-0.458259,-0.295546,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1.193036,2.000959,1.76061,0.545842,-0.291203,-0.499848,-0.270317,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [202]:
# Create a a list to store inertia values and the values of k
inertia = []
k = list(range(1, 11))

In [203]:
# Create a for-loop where each value of k is evaluated using the K-means algorithm
# Fit the model using the crypto_market DataFrame
# Append the value of the computed inertia from the `inertia_` attribute of the KMeans model instance
for i in k:
    k_model = KMeans(n_clusters=i, random_state=1)
    k_model.fit(crypto_market_df_transformed)
    inertia.append(k_model.inertia_)



In [204]:
# Define a DataFrame to hold the values for k and the corresponding inertia
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)

# Review the DataFrame
df_elbow.head()

Unnamed: 0,k,inertia
0,1,327.0
1,2,237.571818
2,3,174.478937
3,4,116.022435
4,5,99.858668


In [205]:
# Plot the DataFrame
df_elbow.hvplot.line(
    x="k", 
    y="inertia", 
    title="Elbow Curve", 
    xticks=k
)

In [206]:
# What’s the best value for k? the best value of k is 4

In [218]:
# Create and initialize the K-means model instance for 4 clusters
model = KMeans(n_clusters=4, random_state=1)

# Print the model
model

In [219]:
# Fit the data to the instance of the model
model.fit(crypto_market_df_transformed)



In [220]:
# Make predictions about the data clusters using the trained model
coin_clusters= model.predict(crypto_market_df_transformed)

# Print the predictions
print(coin_clusters)

[0 0 1 1 0 0 0 0 0 1 1 1 1 0 1 0 1 1 0 1 1 0 1 1 1 1 1 1 0 1 1 1 2 0 1 1 3
 1 1 1 1]


In [221]:
# Create a copy of the DataFrame
crypto_market_predictions_df = crypto_market_df_transformed.copy()

# Add a column to the DataFrame that contains the customer_ratings information
crypto_market_predictions_df['coin_clusters'] = coin_clusters

# Review the DataFrame
crypto_market_predictions_df.head()

Unnamed: 0,price_change_percentage_24h,price_change_percentage_7d,price_change_percentage_14d,price_change_percentage_30d,price_change_percentage_60d,price_change_percentage_200d,price_change_percentage_1y,binance-usd,binancecoin,bitcoin,...,tether,tezos,theta-token,tron,true-usd,usd-coin,vechain,wrapped-bitcoin,zcash,coin_clusters
0,0.508529,0.493193,0.7722,0.23546,-0.067495,-0.355953,-0.251637,0,0,1,...,0,0,0,0,0,0,0,0,0,0
1,0.185446,0.934445,0.558692,-0.054341,-0.273483,-0.115759,-0.199352,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0.021774,-0.706337,-0.02168,-0.06103,0.008005,-0.550247,-0.282061,0,0,0,...,1,0,0,0,0,0,0,0,0,1
3,-0.040764,-0.810928,0.249458,-0.050388,-0.373164,-0.458259,-0.295546,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,1.193036,2.000959,1.76061,0.545842,-0.291203,-0.499848,-0.270317,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [225]:
crypto_market_predictions_df.groupby(by=['coin_clusters'])['price_change_percentage_1y'].mean()

coin_clusters
0   -0.208008
1   -0.182039
2    6.088625
3    1.348488
Name: price_change_percentage_1y, dtype: float64

In [226]:
# Plot the data points based on the customer rating
crypto_market_predictions_df.hvplot.scatter(
    y="price_change_percentage_60d", 
    x="price_change_percentage_1y", 
    by="coin_clusters"
)

In [229]:
crypto_market_predictions_df.drop(columns="coin_clusters", inplace=True)

In [279]:
from sklearn.preprocessing import StandardScaler

In [277]:
crypto_market_predictions_lst = StandardScaler().fit_transform(crypto_market_predictions_df)

In [254]:

# Create a coin_id column in crypto_market DataFrame
# using the index of the original crypto_market DataFrame
crypto_market_predictions_df["coin_id"] = crypto_market_predictions_df.index

# Set the newly created Ticker column as index of the df_stocks_scaled DataFrame
crypto_market_predictions_df = crypto_market_predictions_df.set_index("coin_id")



In [255]:
# Instantiate the PCA instance and declare the number of PCA variables
pca = PCA(n_components=3)

In [256]:
# Fit the PCA model on the transformed credit card DataFrame
crypto_market_predictions_pca = pca.fit_transform(crypto_market_predictions_df)

# Review the first 5 rows of the array of list data
crypto_market_predictions_pca[:3]

array([[-0.60347394,  0.84698059,  0.46614036],
       [-0.46040193,  0.46076165,  0.96226073],
       [-0.43509332, -0.16896835, -0.64807191]])

In [267]:
# Calculate the PCA explained variance ratio
pca.explained_variance_ratio_

array([0.32954088, 0.30761875, 0.15756234])

In [270]:
# Create the PCA DataFrame
# Create the PCA DataFrame
crypto_market_predictions_pca = pd.DataFrame (crypto_market_predictions_df, columns=["PCA1", "PCA2"])

# Review the PCA DataFrame
crypto_market_predictions_pca.head()

Unnamed: 0_level_0,PCA1,PCA2
coin_id,Unnamed: 1_level_1,Unnamed: 2_level_1
0,,
1,,
2,,
3,,
4,,
