In [2]:
import numpy as np
import pandas as pd

from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_samples, silhouette_score

In [31]:
patients = pd.read_csv('./heart_disease_patients.csv')
patients.set_index('id', inplace=True)
patients.head()

Unnamed: 0_level_0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,63,1,1,145,233,1,2,150,0,2.3,3
2,67,1,4,160,286,0,2,108,1,1.5,2
3,67,1,4,120,229,0,2,129,1,2.6,2
4,37,1,3,130,250,0,0,187,0,3.5,3
5,41,0,2,130,204,0,2,172,0,1.4,1


In [32]:
X = patients.loc[:, patients.columns != 'id']
X.head()

Unnamed: 0_level_0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,63,1,1,145,233,1,2,150,0,2.3,3
2,67,1,4,160,286,0,2,108,1,1.5,2
3,67,1,4,120,229,0,2,129,1,2.6,2
4,37,1,3,130,250,0,0,187,0,3.5,3
5,41,0,2,130,204,0,2,172,0,1.4,1


In [33]:
np.random.seed(2137)

kmeans = KMeans(n_clusters=3, n_init='auto').fit(X)
patients['cluster'] = kmeans.labels_
patients.head()

Unnamed: 0_level_0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,cluster
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,63,1,1,145,233,1,2,150,0,2.3,3,2
2,67,1,4,160,286,0,2,108,1,1.5,2,1
3,67,1,4,120,229,0,2,129,1,2.6,2,2
4,37,1,3,130,250,0,0,187,0,3.5,3,2
5,41,0,2,130,204,0,2,172,0,1.4,1,0


In [6]:
# get best number of clusters
for n_clusters in range(2, 31):
  clusterer = KMeans(n_clusters=n_clusters, random_state=2137, n_init='auto')
  cluster_labels = clusterer.fit_predict(X)
  silhouette_avg = silhouette_score(X, cluster_labels)
  print(n_clusters, silhouette_avg, sep=', ')

2, 0.3895180943094319
3, 0.28268367106354453
4, 0.23603458951285414
5, 0.2641508499353783
6, 0.26504636573126067
7, 0.2615278391518505
8, 0.21758348320395504
9, 0.2357227190472259
10, 0.23899000102604626
11, 0.2356551544078068
12, 0.2197124683090452
13, 0.22886606439922635
14, 0.225470428338883
15, 0.22083416656634966
16, 0.2084908100855282
17, 0.20764839497551127
18, 0.21866902973751987
19, 0.21188339694595545
20, 0.2236345755509699
21, 0.1995230954098422
22, 0.20249189266086987
23, 0.19909603414650393
24, 0.19655957688667927
25, 0.20914224621176083
26, 0.21323290811249485
27, 0.20349385531774905
28, 0.21985895205096123
29, 0.21636409944606064
30, 0.2184146931877055


# Quantum time

In [34]:
from qiskit_algorithms.utils import algorithm_globals
from qiskit.circuit.library import ZZFeatureMap
from qiskit_machine_learning.kernels import FidelityQuantumKernel
from qiskit_machine_learning.datasets import ad_hoc_data
from qiskit_algorithms.state_fidelities import ComputeUncompute
from qiskit_ibm_runtime import Sampler, Estimator, Options, Session, QiskitRuntimeService
from sklearn.svm import SVC
algorithm_globals.random_seed = 2137

In [43]:
dim = 11
sim_feature_map = ZZFeatureMap(feature_dimension=11, reps=2, entanglement="linear")
sim_kernel = FidelityQuantumKernel(feature_map=sim_feature_map)

In [38]:
service = QiskitRuntimeService(channel = 'ibm_quantum', token='<INSERT TOKEN>')

In [39]:
backend = service.get_backend('ibmq_qasm_simulator')

In [40]:
options = Options()
options.resilience_level=0
sampler = Sampler(backend=backend, options=options)

fidelity = ComputeUncompute(sampler=sampler)
quantum_kernel = FidelityQuantumKernel(fidelity=fidelity, feature_map=sim_feature_map)

In [None]:
quantum_matrix = quantum_kernel.evaluate(x_vec=X)

In [41]:
qsvc = SVC(kernel=quantum_kernel.evaluate)
#qsvc.fit(train_features)

In [None]:
# this is reaaaallyy heavy
sim_matrix = sim_kernel.evaluate(x_vec=X)
print('done')

In [36]:
from sklearn.cluster import SpectralClustering
from sklearn.metrics import normalized_mutual_info_score

spectral = SpectralClustering(2, affinity="precomputed")

cluster_labels = spectral.fit_predict(sim_matrix)

print(np.array(cluster_labels))

[1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 1 0
 0 0 0 1 0 0 0 1 1 0 0 0 0 1 0 1 1 0 0 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0
 1 0 0 0 1 0 0 0 1 0 1 1 1 1 1 0 0 0 0 0 1 1 0 0 0 1 1 1 1 1 0 1 1 0 0 0 0
 0 1 0 1 1 1 0 0 0 0 0 1 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1
 1 1 0 0 0 0 0 0 0 1 0 0 1 1 0 1 1 1 1 1 0 1 1 1 0 0 0 0 1 0 0 1 0 0 0 0 1
 1 0 0 1 1 1 0 0 0 0 0 1 0 0 1 1 0 1 1 1 0 0 0 1 1 1 0 0 0 1 0 1 0 0 1 0 1
 1 0 0 0 1 1 0 0 1 0 0 0 1 0 0 0 0 1 1 1 1 0 1 0 1 0 0 1 0 0 1 0 0 1 1 1 1
 1 0 1 0 1 0 0 0 0 1 1 0 0 0 0 0 0 1 1 1 0 0 1 0 1 1 0 0 0 1 1 0 0 1 0 0 1
 1 0 1 0 0 1 1]
