In [1]:
import numpy as np
import qiskit
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from qiskit_machine_learning.kernels import QuantumKernel
from qiskit import Aer
from qiskit_machine_learning.algorithms import QSVC
from sklearn.metrics import classification_report
from sklearn.datasets import fetch_openml
from qiskit.circuit.library import PauliFeatureMap
from qiskit.circuit.library import ZFeatureMap
from qiskit.circuit.library import ZZFeatureMap
from qiskit.visualization import plot_circuit_layout
from qiskit import QuantumCircuit
from qiskit.circuit.library import BlueprintCircuit
from qiskit.circuit import Parameter
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score
from qoop.compilation.qsp import QuantumStatePreparation
from qoop.core import state
from qoop.evolution.environment import EEnvironmentMetadata
from qoop.evolution.environment import EEnvironment

**DATA PREPROCESSING**

Load data

In [2]:
import pandas as pd
credit_df = pd.read_csv("C:\Desktop\GA+QSVM\german_credit_data.csv",index_col=0)
credit_df.head(10)

Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,67,male,2,own,,little,1169,6,radio/TV,good
1,22,female,2,own,little,moderate,5951,48,radio/TV,bad
2,49,male,1,own,little,,2096,12,education,good
3,45,male,2,free,little,little,7882,42,furniture/equipment,good
4,53,male,2,free,little,little,4870,24,car,bad
5,35,male,1,free,,,9055,36,education,good
6,53,male,2,own,quite rich,,2835,24,furniture/equipment,good
7,35,male,3,rent,little,moderate,6948,36,car,good
8,61,male,1,own,rich,,3059,12,radio/TV,good
9,28,male,3,own,little,moderate,5234,30,car,bad


In [3]:
credit_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1000 entries, 0 to 999
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Age               1000 non-null   int64 
 1   Sex               1000 non-null   object
 2   Job               1000 non-null   int64 
 3   Housing           1000 non-null   object
 4   Saving accounts   817 non-null    object
 5   Checking account  606 non-null    object
 6   Credit amount     1000 non-null   int64 
 7   Duration          1000 non-null   int64 
 8   Purpose           1000 non-null   object
 9   Risk              1000 non-null   object
dtypes: int64(4), object(6)
memory usage: 85.9+ KB


In [4]:
credit_df = credit_df.fillna(value="not available")
credit_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1000 entries, 0 to 999
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Age               1000 non-null   int64 
 1   Sex               1000 non-null   object
 2   Job               1000 non-null   int64 
 3   Housing           1000 non-null   object
 4   Saving accounts   1000 non-null   object
 5   Checking account  1000 non-null   object
 6   Credit amount     1000 non-null   int64 
 7   Duration          1000 non-null   int64 
 8   Purpose           1000 non-null   object
 9   Risk              1000 non-null   object
dtypes: int64(4), object(6)
memory usage: 85.9+ KB


In [5]:
credit_df.describe()

Unnamed: 0,Age,Job,Credit amount,Duration
count,1000.0,1000.0,1000.0,1000.0
mean,35.546,1.904,3271.258,20.903
std,11.375469,0.653614,2822.736876,12.058814
min,19.0,0.0,250.0,4.0
25%,27.0,2.0,1365.5,12.0
50%,33.0,2.0,2319.5,18.0
75%,42.0,2.0,3972.25,24.0
max,75.0,3.0,18424.0,72.0


In [6]:
credit_df.nunique()

Age                  53
Sex                   2
Job                   4
Housing               3
Saving accounts       5
Checking account      4
Credit amount       921
Duration             33
Purpose               8
Risk                  2
dtype: int64

Note:
Sex: male = 1, female = 2

Housing: own = 1, rent = 2, free = 3

Saving account/Checking account: Not available = 0, litte = 1, moderate = 2, quite rich = 3, rich = 4

Purpose: car = 1, furniture/equipment = 2, radio/TV = 3, domestic appliances = 4, repairs = 5, education = 6, business = 7, vacation/others = 8

**Numerical Data**

In [7]:
credit_df.Sex = credit_df.Sex.map({ 'male' : 1, 'female' : 2})
credit_df.Housing = credit_df.Housing.map({ 'own' : 1, 'rent' : 2, 'free' : 3})
credit_df['Saving accounts'] = credit_df['Saving accounts'].map({ 'not available' : 0, 'little' : 1, 'moderate' : 2, 'quite rich': 3, 'rich': 4})
credit_df['Checking account'] = credit_df['Checking account'].map({ 'not available' : 0, 'little' : 1, 'moderate' : 2, 'quite rich': 3, 'rich': 4})
credit_df['Purpose'] = credit_df['Purpose'].map({ 'car':1, 'furniture/equipment':2, 'radio/TV':3, 'domestic appliances':4, 'repairs':5, 'education':6, 'business':7, 'vacation/others':8})

In [12]:
credit_df.head(10)

Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,67,1,2,1,0,1,1169,6,3,good
1,22,2,2,1,1,2,5951,48,3,bad
2,49,1,1,1,1,0,2096,12,6,good
3,45,1,2,3,1,1,7882,42,2,good
4,53,1,2,3,1,1,4870,24,1,bad
5,35,1,1,3,0,0,9055,36,6,good
6,53,1,2,1,3,0,2835,24,2,good
7,35,1,3,2,1,2,6948,36,1,good
8,61,1,1,1,4,0,3059,12,3,good
9,28,1,3,1,1,2,5234,30,1,bad


In [13]:
# Create set of only independant variables by dropping Risk
X = credit_df.drop(['Risk'], axis=1)
X.head()

Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose
0,67,1,2,1,0,1,1169,6,3
1,22,2,2,1,1,2,5951,48,3
2,49,1,1,1,1,0,2096,12,6
3,45,1,2,3,1,1,7882,42,2
4,53,1,2,3,1,1,4870,24,1


In [14]:
# Create a series of outcome variable only
y = credit_df['Risk']
y.head()

0    good
1     bad
2    good
3    good
4     bad
Name: Risk, dtype: object

In [15]:
# split datasets into training and test subsets for both X and y using sklearn
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=5)

In [16]:
print(X_train.shape)
print(X_test.shape)

(700, 9)
(300, 9)


Generate X train, X test, y train, y test


In [21]:
print("X train: ",X_train.shape)
print("X test: ",X_test.shape)
print("y train: ",y_train.shape)
print("y test: ",y_test.shape)

X train:  (700, 9)
X test:  (300, 9)
y train:  (700,)
y test:  (300,)
