## Initilaization

In [1]:
import keras
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
from sklearn.metrics import silhouette_score, confusion_matrix




## Load dataset

In [2]:
cancer = load_breast_cancer()
X = cancer.data
y = cancer.target

## Train Test Split

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

## Modeling - Kmeans clustering

In [4]:
model = KMeans(n_clusters=2, random_state=15)
model.fit(X_train)
model.labels_



array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
       1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
       0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0,
       1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,

In [5]:
model.cluster_centers_

array([[1.25751320e+01, 1.84767697e+01, 8.12803933e+01, 4.96814607e+02,
        9.52784270e-02, 9.20367416e-02, 6.39281003e-02, 3.42331236e-02,
        1.78686517e-01, 6.34892135e-02, 3.05503371e-01, 1.21596573e+00,
        2.15199522e+00, 2.40212247e+01, 7.14192135e-03, 2.34601461e-02,
        2.91695438e-02, 1.07140927e-02, 2.07040955e-02, 3.68756264e-03,
        1.40836910e+01, 2.46412360e+01, 9.21614888e+01, 6.22630337e+02,
        1.30672219e-01, 2.25026910e-01, 2.21660834e-01, 9.31708455e-02,
        2.85147753e-01, 8.31570787e-02],
       [1.96408081e+01, 2.15841414e+01, 1.29712121e+02, 1.21840101e+03,
        1.00494444e-01, 1.43887980e-01, 1.72192525e-01, 1.00676465e-01,
        1.89039394e-01, 5.97150505e-02, 7.51404040e-01, 1.18799899e+00,
        5.24623232e+00, 9.87717172e+01, 6.50525253e-03, 3.08278485e-02,
        3.97551515e-02, 1.49987071e-02, 1.98822222e-02, 3.79204040e-03,
        2.40679798e+01, 2.87661616e+01, 1.60503030e+02, 1.81095960e+03,
        1.38885253e-01,

## Evaluation

In [6]:
y_pre = model.predict(X_test)
confusion_matrix(y_test, y_pre)

array([[21, 26],
       [67,  0]], dtype=int64)

## Modeling - Hierarchical Clustering 

In [7]:
model = AgglomerativeClustering()
model.fit(X_train)

In [8]:
y_pre = model.fit_predict(X_test)
confusion_matrix(y_test, y_pre)

array([[23, 24],
       [67,  0]], dtype=int64)

## Modeling - DBSCAN

In [12]:
model = DBSCAN(eps=2 , min_samples=5)
model.fit(X_train)
y_pre = model.fit_predict(X_test)
confusion_matrix(y_test, y_pre)

array([[ 0,  0,  0],
       [47,  0,  0],
       [67,  0,  0]], dtype=int64)