In [1]:
import pandas as pd

from quoptuna import DataPreparation
from quoptuna.backend.utils.data_utils.data import mock_csv_data

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from ucimlrepo import fetch_ucirepo

# fetch dataset
dataset = fetch_ucirepo(id=143)

# data (as pandas dataframes)
X = dataset.data.features
y = dataset.data.targets

# metadata
print(dataset.metadata)

# variable information
print(dataset.variables)

{'uci_id': 143, 'name': 'Statlog (Australian Credit Approval)', 'repository_url': 'https://archive.ics.uci.edu/dataset/143/statlog+australian+credit+approval', 'data_url': 'https://archive.ics.uci.edu/static/public/143/data.csv', 'abstract': 'This file concerns credit card applications. This database exists elsewhere in the repository (Credit Screening Database) in a slightly different form', 'area': 'Business', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 690, 'num_features': 14, 'feature_types': ['Categorical', 'Integer', 'Real'], 'demographics': [], 'target_col': ['A15'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 1987, 'last_updated': None, 'dataset_doi': '10.24432/C59012', 'creators': ['Ross Quinlan'], 'intro_paper': None, 'additional_info': {'summary': 'This file concerns credit card applications.  All attribute names and values have been changed to meaningless symbols to protect 

In [4]:
X

Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14
0,1,22.08,11.460,2,4,4,1.585,0,0,0,1,2,100,1213
1,0,22.67,7.000,2,8,4,0.165,0,0,0,0,2,160,1
2,0,29.58,1.750,1,4,4,1.250,0,0,0,1,2,280,1
3,0,21.67,11.500,1,5,3,0.000,1,1,11,1,2,0,1
4,1,20.17,8.170,2,6,4,1.960,1,1,14,0,2,60,159
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
685,1,31.57,10.500,2,14,4,6.500,1,0,0,0,2,0,1
686,1,20.67,0.415,2,8,4,0.125,0,0,0,0,2,0,45
687,0,18.83,9.540,2,6,4,0.085,1,0,0,0,2,100,1
688,0,27.42,14.500,2,14,8,3.085,1,1,1,0,2,120,12


In [3]:
y

Unnamed: 0,A15
0,0
1,0
2,0
3,1
4,1
...,...
685,1
686,0
687,1
688,1


In [5]:
# X and Y conct to single dataframe

X = dataset.data.features
y = dataset.data.targets

df = pd.concat([X, y], axis=1)

# df replace Class with target
df["target"] = df["A15"]

# df drop Class
df = df.drop(columns=["A15"])

df

Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,target
0,1,22.08,11.460,2,4,4,1.585,0,0,0,1,2,100,1213,0
1,0,22.67,7.000,2,8,4,0.165,0,0,0,0,2,160,1,0
2,0,29.58,1.750,1,4,4,1.250,0,0,0,1,2,280,1,0
3,0,21.67,11.500,1,5,3,0.000,1,1,11,1,2,0,1,1
4,1,20.17,8.170,2,6,4,1.960,1,1,14,0,2,60,159,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
685,1,31.57,10.500,2,14,4,6.500,1,0,0,0,2,0,1,1
686,1,20.67,0.415,2,8,4,0.125,0,0,0,0,2,0,45,0
687,0,18.83,9.540,2,6,4,0.085,1,0,0,0,2,100,1,1
688,0,27.42,14.500,2,14,8,3.085,1,1,1,0,2,120,12,1


In [6]:
# data["target"] = target
# replace False with -1 and True with 1
df["target"] = df["target"].replace({1: 1, 0: -1})

df.dropna(inplace=True)

In [7]:
# check if any missing values
df.isnull().sum()

A1        0
A2        0
A3        0
A4        0
A5        0
A6        0
A7        0
A8        0
A9        0
A10       0
A11       0
A12       0
A13       0
A14       0
target    0
dtype: int64

In [10]:
df

Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,target
0,1,22.08,11.460,2,4,4,1.585,0,0,0,1,2,100,1213,-1
1,0,22.67,7.000,2,8,4,0.165,0,0,0,0,2,160,1,-1
2,0,29.58,1.750,1,4,4,1.250,0,0,0,1,2,280,1,-1
3,0,21.67,11.500,1,5,3,0.000,1,1,11,1,2,0,1,1
4,1,20.17,8.170,2,6,4,1.960,1,1,14,0,2,60,159,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
685,1,31.57,10.500,2,14,4,6.500,1,0,0,0,2,0,1,1
686,1,20.67,0.415,2,8,4,0.125,0,0,0,0,2,0,45,-1
687,0,18.83,9.540,2,6,4,0.085,1,0,0,0,2,100,1,1
688,0,27.42,14.500,2,14,8,3.085,1,1,1,0,2,120,12,1


In [11]:
# find missing values in each column
df.isnull().sum()

A1        0
A2        0
A3        0
A4        0
A5        0
A6        0
A7        0
A8        0
A9        0
A10       0
A11       0
A12       0
A13       0
A14       0
target    0
dtype: int64

In [12]:
import os

In [13]:
tmp_path = os.path.join(os.getcwd(), "data")  # noqa: PTH109, PTH118
# check if the fodler exisit if not create the folder
if not os.path.exists(tmp_path):  # noqa: PTH110
    os.makedirs(tmp_path)  # noqa: PTH103

In [14]:
os.listdir()

['.DS_Store',
 'data',
 'db',
 'test_143.ipynb',
 'test_225.ipynb',
 'test_45.ipynb',
 'test_iono.ipynb',
 'test_new_data_test.ipynb']

In [15]:
file_path = mock_csv_data(df, tmp_path="data", file_name="Statlog")

In [16]:
data_prep = DataPreparation(
    file_path=file_path, x_cols=list(df.columns.difference(["target"])), y_col="target"
)

In [17]:
data_dict = data_prep.get_data(output_type="2")

In [18]:
data_dict.keys()
data_dict["train_x"] = data_dict["train_x"].values
data_dict["test_x"] = data_dict["test_x"].values
data_dict["train_y"] = data_dict["train_y"].values
data_dict["test_y"] = data_dict["test_y"].values

In [19]:
data_dict

{'train_x': array([[ 0.68873723, -0.49388662, -0.91919518, ..., -0.31650063,
         -1.04750391, -0.86419641],
        [ 0.68873723,  0.94661602,  1.08790822, ...,  1.87692286,
          0.95465038,  1.15714435],
        [ 0.68873723,  0.53504384,  1.08790822, ..., -0.27912123,
          0.95465038,  1.15714435],
        ...,
        [-1.45193254, -0.49388662, -0.91919518, ..., -0.57815647,
         -1.04750391, -0.86419641],
        [ 0.68873723,  0.53504384,  1.08790822, ..., -0.14156502,
          0.95465038,  1.15714435],
        [ 0.68873723, -0.49388662,  1.08790822, ...,  1.05457594,
          0.95465038, -0.86419641]]),
 'train_y': array([[ 1],
        [-1],
        [-1],
        [-1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [-1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [-1],
        [-1],
        [-1],
        [ 1],
        [-1],
        [ 1],
 

In [20]:
# find the value with index 1724
data_dict["train_x"]

array([[ 0.68873723, -0.49388662, -0.91919518, ..., -0.31650063,
        -1.04750391, -0.86419641],
       [ 0.68873723,  0.94661602,  1.08790822, ...,  1.87692286,
         0.95465038,  1.15714435],
       [ 0.68873723,  0.53504384,  1.08790822, ..., -0.27912123,
         0.95465038,  1.15714435],
       ...,
       [-1.45193254, -0.49388662, -0.91919518, ..., -0.57815647,
        -1.04750391, -0.86419641],
       [ 0.68873723,  0.53504384,  1.08790822, ..., -0.14156502,
         0.95465038,  1.15714435],
       [ 0.68873723, -0.49388662,  1.08790822, ...,  1.05457594,
         0.95465038, -0.86419641]])

In [21]:
from quoptuna import Optimizer

In [22]:
file_name = "Statlog"

In [23]:
optimizer = Optimizer(db_name=file_name, study_name=file_name, data=data_dict)

In [24]:
study, best_trials = optimizer.optimize(n_trials=100)

[I 2025-09-08 02:17:44,357] A new study created in RDB with name: Statlog
INFO:root:Model DataReuploadingClassifier converged after 496 steps.
[I 2025-09-08 02:17:50,210] Trial 0 finished with value: 0.8849557522123894 and parameters: {'max_vmap': 1, 'batch_size': 32, 'learning_rate': 0.1, 'n_input_copies': 3, 'n_layers': 1, 'observable_type': 'full', 'repeats': 5, 'C': 0.1, 'gamma_factor': 10, 'trotter_steps': 1, 't': 0.01, 'n_qfeatures': 'half', 'n_episodes': 10, 'visible_qubits': 'half', 'temperature': 100, 'encoding_layers': 10, 'degree': 2, 'n_qchannels': 1, 'qkernel_shape': 3, 'kernel_shape': 3, 'filter_name': 'sharpen', 'gamma': 0.001, 'alpha': 0.01, 'hidden_layer_sizes': '(10, 10, 10, 10)', 'eta0': 0.1, 'model_type': 'DataReuploadingClassifier'}. Best is trial 0 with value: 0.8849557522123894.
INFO:root:Model DataReuploadingClassifierSeparable converged after 476 steps.
[I 2025-09-08 02:17:54,863] Trial 1 finished with value: 0.8981481481481481 and parameters: {'max_vmap': 1, '