### Initialization

In [93]:
from sklearn.datasets import load_breast_cancer
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix

### Load dataset

In [94]:
data = load_breast_cancer()
X = data.data
y = data.target

### Test train split 

In [100]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
scaler = StandardScaler().fit(X_train)
X_train, X_test = scaler.transform(X_train), scaler.transform(X_test)
X_train.shape

(455, 30)

### Modeling - Kmeans clustering

In [99]:
model = KMeans(n_clusters = 2)
model.fit(X_train)
model.cluster_centers_

array([[ 0.96561598,  0.48956059,  1.00024828,  0.96844326,  0.59587395,
         1.05007832,  1.14661797,  1.15584818,  0.61196117,  0.31444713,
         0.91264512,  0.03707434,  0.93068619,  0.9598984 ,  0.04656472,
         0.78698764,  0.7341398 ,  0.82449216,  0.13678101,  0.55719624,
         1.03272515,  0.50443574,  1.06387095,  1.00935441,  0.61592074,
         0.99689455,  1.06051001,  1.14875059,  0.61000818,  0.69598195],
       [-0.47963162, -0.2431699 , -0.49683385, -0.48103596, -0.29597686,
        -0.52158496, -0.56953721, -0.57412196, -0.30396755, -0.1561892 ,
        -0.45332044, -0.01841521, -0.46228163, -0.47679164, -0.02312919,
        -0.39090505, -0.36465497, -0.40953393, -0.06794057, -0.27676524,
        -0.51296545, -0.25055854, -0.5284359 , -0.50135696, -0.30593431,
        -0.49516802, -0.52676648, -0.57059651, -0.30299748, -0.34570156]])

### Evaluation

In [102]:
y_pre = model.predict(X_test)
confusion_matrix(y_test, y_pre)

array([[36,  8],
       [ 2, 68]], dtype=int64)

In [103]:
model.inertia_

8987.740526402751

### Modeling - Hierarchical Clustering

In [104]:
model = AgglomerativeClustering()
model.fit(X_train)

AgglomerativeClustering()

In [105]:
y_pre = model.fit_predict(X_test)
confusion_matrix(y_pre, y_test)

array([[40,  8],
       [ 4, 62]], dtype=int64)

### Modeling - DBSCAN

In [106]:
model = DBSCAN(eps = 2, min_samples = 5)
model.fit(X_train)
model.labels_

array([-1,  0, -1, -1,  0,  0, -1,  1, -1,  0, -1, -1,  0, -1, -1, -1,  0,
        0, -1, -1, -1,  0,  0,  0,  0, -1,  0, -1, -1, -1, -1,  0, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  0, -1, -1,
       -1, -1,  0, -1,  0, -1, -1, -1, -1, -1, -1, -1,  0, -1,  0,  0, -1,
        0, -1, -1, -1, -1,  0, -1, -1, -1, -1, -1,  0, -1, -1,  0, -1, -1,
       -1, -1, -1, -1,  0, -1, -1, -1, -1, -1,  0,  0,  0, -1, -1, -1,  0,
       -1, -1, -1,  0, -1, -1, -1, -1,  0, -1, -1, -1, -1, -1,  0, -1,  0,
       -1, -1, -1, -1,  0, -1,  0,  0, -1, -1,  0, -1, -1,  0, -1,  0,  0,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1,  0, -1,  1,  0,  1,  1, -1,  0,  0, -1, -1, -1, -1,  1,
       -1,  0, -1, -1, -1,  0,  0, -1, -1, -1,  1,  0, -1, -1, -1, -1, -1,
       -1, -1,  0, -1, -1,  0, -1,  1, -1,  0, -1, -1, -1,  0,  0, -1, -1,
       -1, -1, -1, -1,  0, -1, -1,  0,  0,  0, -1, -1, -1, -1, -1, -1, -1,
       -1, -1,  0,  0, -1