In [None]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from qiskit import QuantumCircuit, QuantumRegister, ClassicalRegister
from qiskit_aer import Aer
import time

# Cache for storing quantum circuits to avoid recreating them
circuit_cache = {}

def create_oracle(values, target_idx, num_qubits):
    st = time.time()
    cache_key = f'oracle_{target_idx}_{num_qubits}'
    if cache_key in circuit_cache:
        return circuit_cache[cache_key]

    oracle = QuantumCircuit(num_qubits)
    for i in range(num_qubits):
        if (target_idx >> i) & 1:
            oracle.x(i)
    
    if num_qubits == 1:
        oracle.h(0)
        oracle.z(0)
        oracle.h(0)
    elif num_qubits > 3:
        mid = num_qubits // 2
        oracle.h(num_qubits - 1)
        oracle.mcx(list(range(mid)), mid)
        oracle.mcx(list(range(mid, num_qubits - 1)), num_qubits - 1)
        oracle.h(num_qubits - 1)
    else:
        oracle.h(num_qubits - 1)
        if num_qubits == 2:
            oracle.cx(0, 1)
        else:
            oracle.mcx(list(range(num_qubits - 1)), num_qubits - 1)
    
    for i in range(num_qubits):
        if (target_idx >> i) & 1:
            oracle.x(i)
    circuit_cache[cache_key] = oracle
    et = time.time()
    print(f"create_oracle time = {et-st}")
    return oracle

def create_diffusion(num_qubits):
    st = time.time()
    cache_key = f'diffusion_{num_qubits}'
    if cache_key in circuit_cache:
        return circuit_cache[cache_key]

    diffusion = QuantumCircuit(num_qubits + 1)
    for qubit in range(num_qubits):
        diffusion.h(qubit)
    for qubit in range(num_qubits):
        diffusion.x(qubit)
    chunk_size = 3
    for i in range(0, num_qubits - 1, chunk_size):
        control_qubits = list(range(i, min(i + chunk_size, num_qubits - 1)))
        if len(control_qubits) > 0:
            diffusion.h(num_qubits)
            diffusion.mcx(control_qubits, num_qubits)
            diffusion.h(num_qubits)
    for qubit in range(num_qubits):
        diffusion.x(qubit)
    for qubit in range(num_qubits):
        diffusion.h(qubit)
    circuit_cache[cache_key] = diffusion
    et = time.time()
    print(f"create_diffusion = {et-st}")
    return diffusion

def grover_find_min_index(values):
    st = time.time()
    n = len(values)
    num_bits = max(1, int(np.ceil(np.log2(n))))
    min_idx = np.argmin(values)
    
    qr = QuantumRegister(num_bits + 1, 'q')
    cr = ClassicalRegister(num_bits, 'c')
    circuit = QuantumCircuit(qr, cr)
    
    for i in range(num_bits):
        circuit.h(qr[i])
    
    iterations = int(np.pi/4 * np.sqrt(2**num_bits))
    oracle = create_oracle(values, min_idx, num_bits + 1)
    diffusion = create_diffusion(num_bits)
    
    for _ in range(iterations):
        circuit = circuit.compose(oracle)
        circuit = circuit.compose(diffusion)
    
    for i in range(num_bits):
        circuit.measure(qr[i], cr[i])
    
    backend = Aer.get_backend('aer_simulator')
    result = backend.run(circuit, shots=1000).result()
    counts = result.get_counts()
    max_count_result = max(counts.items(), key=lambda x: x[1])[0]
    et = time.time()
    print(f" grover find min index time = {et -st}")

    return int(max_count_result, 2) % n

def quantum_sort_cluster(cluster_df, sort_column):
    st=time.time()
    if len(cluster_df) == 0:
        return cluster_df
    
    df = cluster_df.copy()
    sorted_indices = []
    values = df[sort_column].tolist()
    remaining_indices = list(range(len(values)))
    
    while remaining_indices:
        remaining_values = [values[i] for i in remaining_indices]
        min_idx = grover_find_min_index(remaining_values)
        actual_idx = remaining_indices[min_idx]
        sorted_indices.append(actual_idx)
        remaining_indices.remove(actual_idx)
    
    et=time.time()
    print(f"quantum_sort_cluster time = {et -st}")
    return df.iloc[sorted_indices].reset_index(drop=True)

def cluster_based_quantum_sort(input_csv, sort_column, n_clusters=4):
    start_time = time.time()
    
    # Read and preprocess data
    df = pd.read_csv(input_csv)
    df = df.dropna()
    
    if sort_column not in df.columns:
        print(f"Column '{sort_column}' not found.")
        return
    
    print("Original Data:\n", df)
    
    # Perform clustering
    clustering_data = df[[sort_column]]
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    df['cluster'] = kmeans.fit_predict(clustering_data)
    
    # Get unique clusters and sort them to ensure consistent processing order
    unique_clusters = sorted(df['cluster'].unique())#it is storing uniq cluster ids 
    all_sorted = []
    
    # Process each cluster exactly once with synchronized messages
    for cluster_id in unique_clusters:
        cluster_df = df[df['cluster'] == cluster_id].drop(columns=['cluster'])
        cluster_size = len(cluster_df)
        print(f"\nProcessing Cluster {cluster_id} (size {cluster_size})")
        
        # Process the cluster
        sorted_cluster = quantum_sort_cluster(cluster_df, sort_column)
        all_sorted.append(sorted_cluster)
        
        # Print completion message for the current cluster only
        print(f"Completed Cluster {cluster_id}")
    
    # Combine all sorted clusters
    merged_df = pd.concat(all_sorted, ignore_index=True)
    # final_sorted_df = merged_df.sort_values(by=sort_column).reset_index(drop=True)
    
    print("\nFinal Sorted Data:")
    print(merged_df)
    
    end_time = time.time()
    print(f"Total execution time: {end_time - start_time} seconds")

if __name__ == "__main__":
    circuit_cache.clear()
    cluster_based_quantum_sort('student_dataset.csv', sort_column="Roll No.", n_clusters=20)

In [None]:

def quantumRollNoSort(df):  
    result = {}
    for column in df.columns:
        col_data = df[column]
        if (pd.api.types.is_integer_dtype(col_data)) : # main thing for being roll no
                stringRoll = {}
                #trying to convert the int type into string type
                try:
                    # they are array of type string
                    stringRoll['strS'] =[str(i) for i in (col_data.head(10))]#first 10 nums
                    stringRoll['strM'] = [str(i) for i in col_data.iloc[int(len(col_data)/2)-5: int(len(col_data)/2)+5]]#mid 10 nums in string
                    stringRoll['strE'] =  [str(i) for i in col_data.iloc[len(col_data)-10:len(col_data)]]
                    arr = np.array(list(stringRoll.values()))
                    arr = (arr.flatten())
                    lenOfEachEleInKeys= {}
                    for i in stringRoll.keys():#this is iterating for keys 
                        for j in stringRoll[i]:# this is iterating for 10 values in each keys 
                            if (len(j) == len(stringRoll[i][1])) and len(j) >= 5: #checking for each roll if they are of same length
                                lenOfEachEleInKeys[i] = len(j)
                                # print("almost")
                            else:
                                result [column] =0
                                continue 
                            
                    if len(set(lenOfEachEleInKeys.values())) == 1:
                                isSame = all((x.startswith(arr[1][0])) for x in arr)
                                print(isSame)
                                if isSame:
                                    result[column] = 4
                                    print("done")
                                    continue
                except:
                    print("except")
                    result[column] = 0
                    continue
        else:
            print("Not int")
            result[column] = 0
            continue
            
    return result

df = pd.read_csv('student_dataset.csv')
r = quantumRollNoSort(df)
print(r)

Not int
False
Not int
Not int
True
done
Not int
Not int
{'Student_Names': 0, 'Phone_No.': 0, 'Math': 0, 'Physics': 0, 'Chemistry': 0, 'Grade': 0, 'Comment': 0, 'Roll No.': 4, 'School Name': 0, 'Student Address': 0}


In [5]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import time

def classical_sort_cluster(cluster_df, sort_column):
    return cluster_df.sort_values(by=sort_column).reset_index(drop=True)

def classical_cluster_based_sort(input_csv, sort_column, n_clusters=4):
    start_time = time.time()
    
    df = pd.read_csv(input_csv)
    df = df.dropna()
    
    if sort_column not in df.columns:
        print(f"Column '{sort_column}' not found.")
        return
    
    print("Original Data:\n", df)
    
    clustering_data = df[[sort_column]]
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    df['cluster'] = kmeans.fit_predict(clustering_data)
    
    all_sorted = []
    for cluster_id in range(n_clusters):
        cluster_df = df[df['cluster'] == cluster_id].drop(columns=['cluster'])
        print(f"\nSorting Cluster {cluster_id} (size {len(cluster_df)}):")
        sorted_cluster = classical_sort_cluster(cluster_df, sort_column)
        all_sorted.append(sorted_cluster)
    
    merged_df = pd.concat(all_sorted, ignore_index=True)
    final_sorted_df = merged_df.sort_values(by=sort_column).reset_index(drop=True)
    
    print("\nFinal Sorted Data:")
    print(final_sorted_df)
    
    
    end_time = time.time()
    total_time = end_time - start_time
    print(f"Total time for classical implementation = {total_time} seconds")

if __name__ == "__main__":
    classical_cluster_based_sort('phpB0xrNj.csv',sort_column="f3", n_clusters=20)

Original Data:
           f1      f2      f3      f4      f5      f6      f7    f596    f597  \
0    -0.4394 -0.0930  0.1718  0.4620  0.6226  0.4704  0.3578  0.6410  0.6154   
1    -0.4348 -0.1198  0.2474  0.4036  0.5026  0.6328  0.4948  1.0000  0.7272   
2    -0.2330  0.2124  0.5014  0.5222 -0.3422 -0.5840 -0.7168  0.2380  0.1904   
3    -0.3808 -0.0096  0.2602  0.2554 -0.4290 -0.6746 -0.6868  0.5252  0.3670   
4    -0.3412  0.0946  0.6082  0.6216 -0.1622 -0.3784 -0.4324  0.4688  0.5626   
...      ...     ...     ...     ...     ...     ...     ...     ...     ...   
1487 -0.2232  0.1542  0.3394  0.3720  0.5100  0.5970  0.3104  0.5068  0.3698   
1488 -0.2552  0.0776  0.1948  0.5122  0.6522  0.6258  0.4934  0.1818  0.3454   
1489 -0.3188 -0.0318  0.1354  0.2988  0.7132  0.6374  0.5140 -0.1276  0.4042   
1490 -0.3636 -0.1448  0.3064  0.4074  0.5320  0.6262  0.3670 -0.0176  0.2280   
1491 -0.3236  0.0522  0.5156  0.9832  1.0000  0.4488  0.8038  0.1070  0.1572   

        f598  ...    f6

In [85]:

def isDfTotallyIntFloat(df):
    return all(pd.api.types.is_numeric_dtype(dtype)  for dtype in df.dtypes)
    
df = pd.read_csv('student_dataset.csv')
type = isDfTotallyIntFloat(df)
print(type)

False


In [1]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from qiskit_aer import Aer
from qiskit_algorithms import Grover, AmplificationProblem
from qiskit.circuit.library import PhaseOracle
from qiskit.utils import QuantumInstance

# ----- Quantum Minimum Finding -----

def index_to_bin(index, num_bits):
    return format(index, f'0{num_bits}b')

def create_oracle_expression(min_index, num_bits):
    bin_index = index_to_bin(min_index, num_bits)
    expr = ' & '.join([f"{'' if bit == '1' else '~'}x{i}" for i, bit in enumerate(bin_index)])
    return expr

def grover_find_min_index(values):
    n = len(values)
    num_bits = int(np.ceil(np.log2(n)))
    padded_length = 2 ** num_bits

    padded_values = values + [float('inf')] * (padded_length - n)
    min_index = np.argmin(padded_values)

    oracle_expr = create_oracle_expression(min_index, num_bits)
    oracle = PhaseOracle(oracle_expr)
    problem = AmplificationProblem(oracle)

    backend = Aer.get_backend("aer_simulator")
    grover = Grover()
    result = grover.amplify(problem, quantum_instance=QuantumInstance(backend))

    measured_index = max(result.circuit_results.items(), key=lambda x: x[1])[0]
    return int(measured_index, 2)

def quantum_sort_cluster(cluster_df, sort_column):
    df = cluster_df.copy().reset_index(drop=True)
    sorted_rows = []

    while not df.empty:
        values = df[sort_column].tolist()
        min_idx = grover_find_min_index(values)
        sorted_rows.append(df.loc[min_idx])
        df = df.drop(min_idx).reset_index(drop=True)

    return pd.DataFrame(sorted_rows)

# ----- Main Cluster-Based Hybrid Sort -----

def cluster_based_quantum_sort(input_csv, sort_column, n_clusters=2, output_csv='cluster_sorted.csv'):
    df = pd.read_csv(input_csv)
    
    if sort_column not in df.columns:
        print(f"Column '{sort_column}' not found.")
        return

    print("Original Data:\n", df)

    # Clustering
    clustering_data = df[[sort_column]]
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    df['cluster'] = kmeans.fit_predict(clustering_data)

    all_sorted = []

    for cluster_id in range(n_clusters):
        cluster_df = df[df['cluster'] == cluster_id].drop(columns=['cluster'])
        print(f"\nSorting Cluster {cluster_id} (size {len(cluster_df)}):")
        sorted_cluster = quantum_sort_cluster(cluster_df, sort_column)
        all_sorted.append(sorted_cluster)

    # Combine clusters and final classical sort
    merged_df = pd.concat(all_sorted, ignore_index=True)
    final_sorted_df = merged_df.sort_values(by=sort_column).reset_index(drop=True)

    print("\nFinal Sorted Data:")
    print(final_sorted_df)

    final_sorted_df.to_csv(output_csv, index=False)
    print(f"\nSorted data saved to '{output_csv}'.")

# Example usage
if __name__ == "__main__":
    cluster_based_quantum_sort("data.csv", sort_column="score", n_clusters=2)



ImportError: cannot import name 'BaseSampler' from 'qiskit.primitives' (c:\Users\tikes\OneDrive\Documents\OneDrive\Desktop\ClonedProject\Tikesh01.github.io\.venv\Lib\site-packages\qiskit\primitives\__init__.py)