In [8]:
import pandas as pd
import os
import time
from sklearn.model_selection import cross_val_score, cross_val_predict, train_test_split, GridSearchCV
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, recall_score
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
train = pd.read_csv("../data/train_fe.csv")

In [3]:
train.head(10)

Unnamed: 0,international_plan,voice_mail_plan,area_code_408,area_code_415,area_code_510,region_Midwest,region_Northeast,region_South,region_West,account_length,...,total_eve_calls,total_eve_charge,total_night_minutes,total_night_calls,total_night_charge,total_intl_minutes,total_intl_calls,total_intl_charge,customer_service_calls,churn
0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.524793,...,0.582353,0.542866,0.572161,0.43609,0.572152,0.5,0.15,0.5,0.111111,0.0
1,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.438017,...,0.605882,0.53769,0.599772,0.526316,0.6,0.685,0.15,0.685185,0.111111,0.0
2,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.561983,...,0.647059,0.333225,0.338457,0.533835,0.338608,0.61,0.25,0.609259,0.0,0.0
3,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.342975,...,0.517647,0.170171,0.436095,0.421053,0.436076,0.33,0.35,0.32963,0.222222,0.0
4,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.305785,...,0.717647,0.407959,0.407629,0.661654,0.407595,0.505,0.15,0.505556,0.333333,0.0
5,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.483471,...,0.594118,0.6066,0.45602,0.639098,0.456329,0.315,0.3,0.314815,0.0,0.0
6,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.495868,...,0.635294,0.958266,0.480786,0.639098,0.481013,0.375,0.35,0.375926,0.333333,0.0
7,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.603306,...,0.552941,0.283403,0.478508,0.473684,0.478481,0.355,0.3,0.355556,0.0,0.0
8,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.578512,...,0.652941,0.610482,0.804725,0.481203,0.805063,0.56,0.25,0.559259,0.0,0.0
9,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.301653,...,0.870588,0.449369,0.433533,0.458647,0.433544,0.455,0.25,0.455556,0.0,0.0


In [4]:
cols = ['region_South', 'region_West', 'account_length','number_vmail_messages', 'total_day_minutes', 'total_day_calls',
        'total_intl_charge', 'customer_service_calls', 'churn']

In [5]:
train = train[cols]

In [6]:
x_train_use, y_train_use = train.drop("churn", axis = 1), train["churn"]

In [9]:
x_train, x_val, y_train, y_val = train_test_split(x_train_use, y_train_use, train_size=0.8, random_state = 42)

In [10]:
x_train.shape, y_train.shape

((2132, 8), (2132,))

In [11]:
x_train=np.array(x_train)
y_train=np.array(y_train)

In [17]:
num_qubits=len(cols)-1
num_qubits

8

In [18]:
from qiskit.circuit.library import PauliFeatureMap

feature_map = PauliFeatureMap(feature_dimension=num_qubits, reps=1, paulis=['ZZ'])


In [None]:
from qiskit_machine_learning.algorithms import QSVC

from qiskit import BasicAer
from qiskit_algorithms.utils import algorithm_globals
from qiskit_machine_learning.kernels import FidelityQuantumKernel



algorithm_globals.random_seed = 12345

qkernel = FidelityQuantumKernel(feature_map=feature_map)


qsvc = QSVC(quantum_kernel=qkernel)


start = time.time()
qsvc.fit(x_train, y_train)
elapsed = time.time() - start
train_score=qsvc.score(x_train, y_train)
qsvc_score = qsvc.score(x_val, y_val)

print(f"QSVC classification test score: {qsvc_score}")

In [None]:
y_pred = qsvc.predict(x_train)
val_pred=qsvc.predict(x_val)



In [None]:

f1_train=f1_score(y_train, y_pred)

f1_test=f1_score(y_val, val_pred)

In [None]:
prec_train = precision_score(y_train, y_pred)
prec_test = precision_score(y_val, val_pred)

In [None]:
recall_train =recall_score(y_train, y_pred)
recall_test =recall_score(y_val, val_pred)


In [None]:
print(classification_report(y_train, y_pred))

print(classification_report(y_val, val_pred))

In [None]:
def plot_confusion_matrix(conf_matrix):
    plt.figure(figsize=(8, 6))
    sns.set(font_scale=1.2)
    sns.heatmap(conf_matrix, annot=True, fmt='g', cmap='Blues', cbar=False,
                xticklabels=['Predicted 0', 'Predicted 1'],
                yticklabels=['Actual 0', 'Actual 1'])
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.title('Confusion Matrix')
    plt.show()


In [None]:
conf = confusion_matrix(y_train, y_pred)
plot_confusion_matrix(conf)

In [None]:
conf = confusion_matrix(y_val, val_pred)
plot_confusion_matrix(conf)

In [None]:
qsvc_mod_num = "../model/mod_qsvc_pauli.model"
qsvc.save(qsvc_mod_num)

In [None]:
df = pd.DataFrame({"one":[1]})
df["f1_test"] = f1_test
df["f1_train"] = f1_train
df["prec_train"] = prec_train
df["prec_test"] = prec_test
df["recall_train"] = recall_train
df["recall_test"] = recall_test
df["train time"]= elapsed
df["model"] = "QSVC"
df["feature_map_type"] = "Pauli"
df = df.drop("one", axis = 1)

df.to_csv("../result/regular/res_qsvc_pauli.csv", index = False)  

In [None]:
df["feature_map_type"] = "Pauli"
#df["optimizer"] = ""
df["train_score"] = train_score
df["val_score"] = qsvc_score
df["recall_score"] = recall_score(y_val, y_pred, average='weighted')
df["f1_score"] = f1_score(y_val, y_pred, average='weighted')
df["precision_score"] = precision_score(y_val, y_pred, average='weighted')
df["model"] = "QSVC"