The code illustrates how Kmeans can be used as a preprocessing step to improve performance of a classifier of images of digits 0-9. 
There are 1797 images of size 8x8, each flattened to a vector of size 64


In [None]:
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.cluster import KMeans
from sklearn.pipeline import Pipeline
X_digits, y_digits = load_digits(return_X_y=True)

The images are transformed into a 10-dimensional space, described by the distances to the cluster centers.
Note that the original dimension is 64. 

In [None]:
improvement = np.array([])
n_clusters = 10
for random_state in range(0,12):   # since Kmeans performance is random, we average over some trials
  X_train, X_test, y_train, y_test = train_test_split(X_digits, y_digits, random_state=random_state)
  log_reg = LogisticRegression(multi_class="ovr", solver="lbfgs", max_iter=5000, random_state=random_state)
  log_reg.fit(X_train, y_train)
  before = log_reg.score(X_test, y_test)
  pipeline = Pipeline([
    ("bobmeans", KMeans(n_clusters=n_clusters, random_state=random_state)),
    ("boblogreg", LogisticRegression(multi_class="ovr", solver="lbfgs", max_iter=5000, random_state=random_state)),])
  pipeline.fit(X_train, y_train)
  after = pipeline.score(X_test, y_test)  
  print('test {} improved accuracy by {:.4f}'.format(random_state,after-before))
  print('     accuracy is now {:.4f}'.format(after))
  improvement = np.append(improvement,after-before)
print('average improvement = {:.4f}'.format(np.mean(improvement)))
  

test 0 improved accuracy by -0.0533
     accuracy is now 0.8978
test 1 improved accuracy by -0.0444
     accuracy is now 0.9289
test 2 improved accuracy by -0.0511
     accuracy is now 0.8956
test 3 improved accuracy by 0.0044
     accuracy is now 0.9356
test 4 improved accuracy by -0.0467
     accuracy is now 0.9178
test 5 improved accuracy by -0.0422
     accuracy is now 0.9222
test 6 improved accuracy by -0.0378
     accuracy is now 0.9156
test 7 improved accuracy by -0.0044
     accuracy is now 0.9400
test 8 improved accuracy by -0.0244
     accuracy is now 0.9378
test 9 improved accuracy by -0.0378
     accuracy is now 0.9178
test 10 improved accuracy by -0.0222
     accuracy is now 0.9333
test 11 improved accuracy by -0.0200
     accuracy is now 0.9400
average improvement = -0.0317
