In [None]:
# Import our dependencies
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import tensorflow as tf
import plotly.express as px
import hvplot.pandas

In [None]:
# Read the cleansed Orders dataset
file_path = "OrdersProducts_Consolidated.csv"
orders_df = pd.read_csv(file_path)
orders_df.head()

# Drop Null Values from datasets
orders_df.dropna()

In [None]:
def elbow_curve(X:float):
    # Elbow curve for Products dataset
    inertia = []
    k = list(range(1, 11))

    # Calculate the inertia for the range of K values
    for i in k:
       km = KMeans(n_clusters=i, random_state=0)
       km.fit(X)
       inertia.append(km.inertia_)

    # Define a DataFrame to plot the Elbow Curve using hvPlot
    elbow_data = {"k": k, "inertia": inertia}
    df_elbow = pd.DataFrame(elbow_data)
    df_elbow.hvplot.line(x="k", y="inertia", title="Elbow Curve - Products", xticks=k)

In [None]:
def Kmeans_fit(X:float, n_clusters:int):
    # Create an instance of K-means for Product clustering
    # Initializing model with test cluster
    kmeans_pmodel = KMeans(n_clusters=3, random_state=0)
    # Fit the model
    kmeans_pmodel.fit(X)

    # Predict clusters
    predictions = kmeans_pmodel.predict(X)
    predictions

In [None]:
final_df = orders_df.drop(columns=["eval_set","product_name"])

In [None]:
# Use get_dummies() to create variables for text features.
X = pd.get_dummies(final_df, columns=["department", "aisle"])
X = X[0:100]
X

In [None]:
elbow_curve(X)

In [None]:
KMeans(X, 3)

In [None]:
# Create a new DataFrame including predicted clusters and Products features.
clustered_df = final_df

#  Add a new column, "Class" to the clustered_df DataFrame that holds the predictions.
clustered_df["Class"] = predictions 

# Print the shape of the clustered_df
print(clustered_df.shape)
clustered_df.head(10)

In [None]:
# Create a hvplot.scatter plot using x="TotalCoinsMined" and y="TotalCoinSupply".
clustered_df.hvplot.scatter(x="product_id", y="num_of_reorders", by="Class", hover_cols ="product_id")

In [None]:
# Creating a 3D-Scatter with the PCA data and the clusters
fig = px.scatter_3d(
    clustered_df,
    x="order_id",
    y="num_of_orders",
    z="num_of_reorders",
    hover_name = "aisle",
    hover_data =  ["department"],
    color="Class",
    symbol="Class",
    width=800,
)
fig.update_layout(legend=dict(x=0, y=1))
fig.show()


In [None]:
final_df.head()

In [None]:
p1_df = pd.DataFrame(final_df,columns=["product_id","num_of_reorders", "num_of_orders", "department", "aisle"])
p1_df

In [None]:
X = pd.get_dummies(p1_df, columns=["department", "aisle"])
X = X[0:100]

In [None]:
X

In [None]:
elbow_curve(X)

In [None]:
# Elbow curve for Products dataset
inertia = []
k = list(range(1, 11))

# Calculate the inertia for the range of K values
for i in k:
   km = KMeans(n_clusters=i, random_state=0)
   km.fit(X)
   inertia.append(km.inertia_)

# Define a DataFrame to plot the Elbow Curve using hvPlot
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.hvplot.line(x="k", y="inertia", title="Elbow Curve - Products", xticks=k)

In [None]:
# 3 .
p2_df = pd.DataFrame(final_df,columns=["order_dow", "order_hour_of_day", "num_of_orders", "num_of_reorders"])
p2_df

In [None]:
X = p2_df
elbow_curve(X)

In [None]:
# Elbow curve for Products dataset
inertia = []
k = list(range(1, 11))

# Calculate the inertia for the range of K values
for i in k:
   km = KMeans(n_clusters=i, random_state=0)
   km.fit(X)
   inertia.append(km.inertia_)

# Define a DataFrame to plot the Elbow Curve using hvPlot
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.hvplot.line(x="k", y="inertia", title="Elbow Curve - Products", xticks=k)

In [None]:
#4.
p3_df = pd.DataFrame(final_df, columns=["num_of_orders", "num_of_reorders", "department", "aisle"])
p3_df

In [None]:
X = pd.get_dummies(p3_df, columns=["department", "aisle"])
X

In [None]:
# Elbow curve for Products dataset
inertia = []
k = list(range(1, 11))

# Calculate the inertia for the range of K values
for i in k:
   km = KMeans(n_clusters=i, random_state=0)
   km.fit(X)
   inertia.append(km.inertia_)

# Define a DataFrame to plot the Elbow Curve using hvPlot
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.hvplot.line(x="k", y="inertia", title="Elbow Curve - Products", xticks=k)

In [None]:
KMeans(X, 3)