In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use("fivethirtyeight")

%config InlineBackend.figure_format = 'svg'
%matplotlib inline

In [2]:
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

In [3]:
boston = load_boston()
feature_names = boston['feature_names']

X = pd.DataFrame(boston['data'], columns=feature_names)
y = boston['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [4]:
model = KMeans(n_clusters=3, max_iter=100, random_state=42)
model.fit(X_train)
y_train_preds = model.predict(X_train)
y_test_preds = model.predict(X_test)

In [5]:
X_test_inverse = pd.DataFrame(scaler.inverse_transform(X_test), columns=feature_names)
X_test_inverse.head()
for i in range(3):
  print(f"Cluster {i}")
  print(f"Mean price: {np.round(y_test[y_test_preds == i].mean(), 2)}")
  print(f"Mean CRIM: {np.round(X_test_inverse.loc[y_test_preds == i, 'CRIM'].mean(), 2)}\n")

Cluster 0
Mean price: 21.86
Mean CRIM: 0.27

Cluster 1
Mean price: 16.44
Mean CRIM: 10.17

Cluster 2
Mean price: 31.35
Mean CRIM: 0.06

