# Demo: K-Means

In [2]:
# Import dependencies
import pandas as pd
import hvplot.pandas
from pathlib import Path

In [3]:
# Read in the CSV file as a Pandas DataFrame
service_ratings_df = pd.read_csv(
    Path("../Resources/service_ratings.csv")
)

# Review the DataFrame
service_ratings_df.head()

Unnamed: 0,mobile_app_rating,personal_banker_rating
0,3.5,2.4
1,3.65,3.14
2,2.9,2.75
3,2.93,3.36
4,2.89,2.62


In [4]:
# Visualize a scatter plot of the data
service_ratings_df.hvplot.scatter(x="mobile_app_rating", y="personal_banker_rating")

## Run the k-means model with 2 clusters

In [5]:
# Start by importing the K-means algorithm
from sklearn.cluster import KMeans

In [6]:
# Create and initialize the K-means model instance for two clusters
model = KMeans(n_clusters=2, random_state=1)

# Print the model
model

KMeans(n_clusters=2, random_state=1)

In [7]:
# Fit the data to the instance of the model
model.fit(service_ratings_df)

KMeans(n_clusters=2, random_state=1)

In [8]:
# Make predictions about the data clusters by using the trained model
customer_ratings = model.predict(service_ratings_df)

# Print the predictions
print(customer_ratings)

[1 1 0 0 0 0 1 1 0 0 1 0 1 1 1 1 0 1 0 0 1 1 0 1 0 1 0 1 0 1 1 0 1 1 1 0 0
 0 1 0 0 1 1 1 1 0 0 1 1 1 0 1 1 1 1 1 0 1 0 1 0 1 0 1 0 0 1 1 0 1 1 0 0 1
 0 1 1 0 1 1 1 1 0 1 1 1 0 1 1 0 1 0 1 1 1 1 1 1 0 1 1 1 1 0 1 1 0 0 1 1 0
 1 1 1 1 1 0 0 0 1 1 1 0 0 0 0 1 1 0 0 0 1 1 1 0 1 0 1 1 0 1 0 0 1 1 1 0 1
 1 0 0 0 1 1 1 1 1 1 1 1 0 1 1 0 1 0 0 0 0 1 1 0 1 1 1 0 1 1 1 0 1 1 1]


In [9]:
# Create a copy of the DataFrame
service_rating_predictions_df = service_ratings_df.copy()

# Add a column to the DataFrame that contains the customer ratings information
service_rating_predictions_df['customer rating'] = customer_ratings

# Review the DataFrame
service_rating_predictions_df.head()

Unnamed: 0,mobile_app_rating,personal_banker_rating,customer rating
0,3.5,2.4,1
1,3.65,3.14,1
2,2.9,2.75,0
3,2.93,3.36,0
4,2.89,2.62,0


In [10]:
# Plot the data points based on the customer rating
service_rating_predictions_df.hvplot.scatter(
    x="mobile_app_rating", 
    y="personal_banker_rating", 
    by="customer rating"
)