<a href="https://colab.research.google.com/github/N-SAVITHA/AdvancedLiterateMachinery/blob/main/breastcancer_qml.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install qiskit==0.38.0

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
from qiskit import Aer, execute
from qiskit import QuantumCircuit, ClassicalRegister, QuantumRegister
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.metrics import adjusted_rand_score, confusion_matrix
from sklearn.model_selection import train_test_split

# To use local qasm simulator
backend = Aer.get_backend('qasm_simulator')

def get_theta(d):
    x = d[0]
    y = d[1]
    theta = 2 * math.acos((x + y) / 2.0)
    return theta

def get_distance(x, y):
    theta_1 = get_theta(x)
    theta_2 = get_theta(y)

    qr = QuantumRegister(3, name="qr")
    cr = ClassicalRegister(1, name="cr")
    qc = QuantumCircuit(qr, cr, name="k_means")

    qc.h(qr[0])
    qc.u(theta_1, 0, 0, qr[1])
    qc.u(theta_2, 0, 0, qr[2])
    qc.cswap(qr[0], qr[1], qr[2])
    qc.h(qr[0])

    qc.measure(qr[0], cr[0])

    job = execute(qc, backend=backend, shots=2048)
    result = job.result()
    counts = result.get_counts(qc)

    # Calculating the probability of measuring '1'
    prob = counts.get('1', 0) / 2048
    return prob

def draw_plot(points, centers, label=True):
    if not label:
        plt.scatter(points[:, 0], points[:, 1])
    else:
        plt.scatter(points[:, 0], points[:, 1], c=centers, cmap='viridis')
    plt.xlim(0, 1)
    plt.ylim(0, 1)
    plt.title("Predicted Clusters" if label else "True Labels")

def initialize_centers(points, k):
    return points[np.random.choice(points.shape[0], k, replace=False), :]

def find_nearest_neighbour(points, centroids):
    n = len(points)
    k = centroids.shape[0]
    centers = np.zeros(n)

    for i in range(n):
        min_dis = float('inf')
        ind = 0
        for j in range(k):
            temp_dis = get_distance(points[i, :], centroids[j, :])
            if temp_dis < min_dis:
                min_dis = temp_dis
                ind = j
        centers[i] = ind

    return centers

def find_centroids(points, centers):
    k = int(np.max(centers)) + 1
    centroids = np.zeros([k, 2])

    for i in range(k):
        centroids[i, :] = np.mean(points[centers == i], axis=0)

    return centroids

def preprocess(points):
    scaler = MinMaxScaler()
    return scaler.fit_transform(points)

def cluster_purity(y_true, y_pred):
    contingency_matrix = pd.crosstab(y_true, y_pred)
    purity = np.sum(np.amax(contingency_matrix.values, axis=0)) / np.sum(contingency_matrix.values)
    return purity
def cluster_purity(y_true, y_pred):
    # Create a contingency matrix
    contingency_matrix = pd.crosstab(y_true, y_pred)

    # Find the maximum counts in each column (predicted cluster)
    max_counts_per_cluster = np.amax(contingency_matrix.values, axis=0)

    # Calculate the total number of correctly assigned instances
    total_correctly_assigned = np.sum(max_counts_per_cluster)

    # Calculate the total number of instances
    total_instances = np.sum(contingency_matrix.values)

    # Compute the purity
    purity = total_correctly_assigned / total_instances

    return purity
def calculate_accuracy(conf_matrix):
    return np.trace(conf_matrix) / np.sum(conf_matrix)

# Load the dataset
data = pd.read_csv('/content/breast-cancer (1).csv')

# Preprocess the dataset
features = data.drop(['id', 'diagnosis'], axis=1)
features = features.select_dtypes(include=[np.number])

# Standardize the features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Apply PCA to reduce dimensions to 2
pca = PCA(n_components=2)
points = pca.fit_transform(features_scaled)

# Normalize the points for the quantum algorithm
points = preprocess(points)

# Splitting into train and test sets
X_train, X_test, y_train, y_test = train_test_split(points, data['diagnosis'].map({'M': 0, 'B': 1}).values, test_size=0.3, random_state=42)

# Parameters
k = 2  # Number of centers

# Initialize centroids
centroids = initialize_centers(X_train, k)

# Run k-means algorithm on training set
for i in range(20):  # Increase the number of iterations
    centers = find_nearest_neighbour(X_train, centroids)
    new_centroids = find_centroids(X_train, centers)

    # Convergence check
    if np.allclose(centroids, new_centroids):
        break
    centroids = new_centroids

# Predict clusters on test set
test_centers = find_nearest_neighbour(X_test, centroids)

# Evaluate the model
true_labels = y_test

# Adjusted Rand Index
ari_score = adjusted_rand_score(true_labels, test_centers)
print(f"Adjusted Rand Index: {ari_score}")

# Corrected Cluster Purity
purity = cluster_purity(true_labels, test_centers)
print(f"Cluster Purity: {purity}")

# Confusion Matrix
conf_matrix = confusion_matrix(true_labels, test_centers)
print("Confusion Matrix:")
print(conf_matrix)

# Accuracy
accuracy = calculate_accuracy(conf_matrix)
print(f"Accuracy: {accuracy}")
def draw_plot(points, centers, label=True, title="Plot"):
    if not label:
        plt.scatter(points[:, 0], points[:, 1], c=centers, cmap='viridis')
    else:
        plt.scatter(points[:, 0], points[:, 1], c=centers, cmap='viridis')
    plt.xlim(0, 1)
    plt.ylim(0, 1)
    plt.title(title)

# Visualize the results
plt.figure(figsize=(12, 6))

# True Labelsa
plt.subplot(1, 2, 1)
draw_plot(X_test, true_labels, label=False, title="True Labels")

# Predicted Clusters
plt.subplot(1, 2, 2)
draw_plot(X_test, test_centers, label=True, title="Predicted Clusters")

plt.tight_layout()
plt.show()
