In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
# Define models to train
from sklearn.model_selection import KFold, cross_val_score

from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,classification_report

In [2]:
file_path="D:/Github_local/Quantum_ML_mini_project/Classical ML/Cleaned_data.csv"
df=pd.read_csv(file_path)
df.head()

Unnamed: 0,Age,Gender,TB,DB,Alkphos,Sgpt,Sgot,TP,ALB,A/G Ratio,Selector
0,65,0,0.7,0.1,187,16,18,6.8,3.3,0.9,1
1,62,1,10.9,5.5,699,64,100,7.5,3.2,0.74,1
2,62,1,7.3,4.1,490,60,68,7.0,3.3,0.89,1
3,58,1,1.0,0.4,182,14,20,6.8,3.4,1.0,1
4,72,1,3.9,2.0,195,27,59,7.3,2.4,0.4,1


# Training and Testing Datasets over Several Model

In [3]:
#Split the data into predictor variables and target variable, following by breaking them into train and test sets. 
Y = df['Selector'].values
X = df.drop(['Selector'], axis=1).values

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, Y_train, Y_test = train_test_split (X, Y, test_size = 0.25, random_state=42)
print(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)

(437, 10) (146, 10) (437,) (146,)


In [4]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)
X_resampled, Y_resampled = smote.fit_resample(X, Y)
X_train_resampled, X_test_resampled, Y_train_resampled, Y_test_resampled = train_test_split(X_resampled, Y_resampled, test_size=0.25, random_state=42)
print(X_train_resampled.shape, X_test_resampled.shape, Y_train_resampled.shape, Y_test_resampled.shape)

(624, 10) (208, 10) (624,) (208,)


IBM Qiskit Quantum Implementation

In [5]:
# Import necessary libraries
import numpy as np
from qiskit_aer import Aer
from qiskit_machine_learning.algorithms.classifiers import VQC
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Custom Wrapper Class for VQC
class VQCWrapper(BaseEstimator, ClassifierMixin):
    def __init__(self, vqc):
        self.vqc = vqc
    
    def fit(self, X, y):
        self.vqc.fit(X, y)
    
    def predict(self, X):
        return self.vqc.predict(X)

#Feature_map
from qiskit.circuit.library import ZZFeatureMap
num_features = X.shape[1]
feature_map = ZZFeatureMap(feature_dimension=num_features, reps=1)
#ansatz
from qiskit.circuit.library import RealAmplitudes
ansatz = RealAmplitudes(num_qubits=num_features, reps=3)
#optimizer and sampler
from qiskit_algorithms.optimizers import COBYLA
optimizer = COBYLA(maxiter=20)
from qiskit.primitives import Sampler

sampler = Sampler()

# Assuming you have X_train, X_test, y_train, y_test as your data
# Creating VQC
vqc = VQC(feature_map=feature_map, ansatz=ansatz, optimizer=optimizer)

# # Set the backend for VQC
# backend = Aer.get_backend('qasm_simulator')
# vqc.set_backend(backend)

# Create VQC Wrapper
vqc_wrapper = VQCWrapper(vqc)
# Train VQC
vqc_wrapper.fit(X_train_resampled, Y_train_resampled)

In [6]:
# Creating RDC
RDC = RandomForestClassifier(n_estimators=10)
# Train ANN
RDC.fit(X_train_resampled, Y_train_resampled)

# Get RDC predictions
rdc_predictions = RDC.predict(X_test)

# Ensemble Model
ensemble_rdc_model = VotingClassifier(estimators=[
    ('vqc', vqc_wrapper),
    ('rdc', RDC)
], voting='hard')

# Train Ensemble Model
ensemble_rdc_model.fit(X_train_resampled, Y_train_resampled)

# Get Ensemble Model predictions
ensemble_rdc_predictions = ensemble_rdc_model.predict(X_test)

# Evaluate models
print("Ensemble Model Accuracy:", accuracy_score(Y_test, ensemble_rdc_predictions))


Ensemble Model Accuracy: 0.8904109589041096


In [7]:
# Creating ANN
ANN = MLPClassifier(activation="relu",solver ="adam",random_state=42)
# Train ANN
ANN.fit(X_train_resampled, Y_train_resampled)

# Get ANN predictions
ann_predictions = ANN.predict(X_test)

# Ensemble Model
ensemble_ann_model = VotingClassifier(estimators=[
    ('vqc', vqc_wrapper),
    ('ann', ANN)
], voting='hard')

# Train Ensemble Model
ensemble_ann_model.fit(X_train_resampled, Y_train_resampled)

# Get Ensemble Model predictions
ensemble_ann_predictions = ensemble_ann_model.predict(X_test)

# Evaluate models
print("Ensemble Model Accuracy:", accuracy_score(Y_test, ensemble_ann_predictions))




Ensemble Model Accuracy: 0.7534246575342466


In [8]:
from sklearn.metrics import classification_report


ensemble_ann_report = classification_report(Y_test, ensemble_ann_predictions)
print("\nEnsemble ANN REPORT:\n",ensemble_ann_report)
ensemble_rdc_report = classification_report(Y_test, ensemble_rdc_predictions)
print("\nEnsemble RFC REPORT:\n",ensemble_rdc_report)


Ensemble ANN REPORT:
               precision    recall  f1-score   support

           1       0.88      0.77      0.82       109
           2       0.51      0.70      0.59        37

    accuracy                           0.75       146
   macro avg       0.70      0.74      0.71       146
weighted avg       0.79      0.75      0.76       146


Ensemble RFC REPORT:
               precision    recall  f1-score   support

           1       0.91      0.94      0.93       109
           2       0.82      0.73      0.77        37

    accuracy                           0.89       146
   macro avg       0.86      0.84      0.85       146
weighted avg       0.89      0.89      0.89       146

