In [1]:
import pandas as pd

from quoptuna import DataPreparation
from quoptuna.backend.utils.data_utils.data import mock_csv_data

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from ucimlrepo import fetch_ucirepo

# fetch dataset
dataset = fetch_ucirepo(id=176)

# data (as pandas dataframes)
X = dataset.data.features
y = dataset.data.targets

# metadata
print(dataset.metadata)

# variable information
print(dataset.variables)

{'uci_id': 176, 'name': 'Blood Transfusion Service Center', 'repository_url': 'https://archive.ics.uci.edu/dataset/176/blood+transfusion+service+center', 'data_url': 'https://archive.ics.uci.edu/static/public/176/data.csv', 'abstract': 'Data taken from the Blood Transfusion Service Center in Hsin-Chu City in Taiwan -- this is a classification problem. ', 'area': 'Business', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 748, 'num_features': 4, 'feature_types': ['Real'], 'demographics': [], 'target_col': ['Donated_Blood'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 2008, 'last_updated': 'Sat Mar 16 2024', 'dataset_doi': '10.24432/C5GS39', 'creators': ['I-Cheng Yeh'], 'intro_paper': {'ID': 434, 'type': 'NATIVE', 'title': 'Knowledge discovery on RFM model using Bernoulli sequence', 'authors': 'I. Yeh, K. Yang, Tao-Ming Ting', 'venue': 'Expert systems with applications', 'year': 2009, 'journa

In [3]:
dataset.metadata

{'uci_id': 176,
 'name': 'Blood Transfusion Service Center',
 'repository_url': 'https://archive.ics.uci.edu/dataset/176/blood+transfusion+service+center',
 'data_url': 'https://archive.ics.uci.edu/static/public/176/data.csv',
 'abstract': 'Data taken from the Blood Transfusion Service Center in Hsin-Chu City in Taiwan -- this is a classification problem. ',
 'area': 'Business',
 'tasks': ['Classification'],
 'characteristics': ['Multivariate'],
 'num_instances': 748,
 'num_features': 4,
 'feature_types': ['Real'],
 'demographics': [],
 'target_col': ['Donated_Blood'],
 'index_col': None,
 'has_missing_values': 'no',
 'missing_values_symbol': None,
 'year_of_dataset_creation': 2008,
 'last_updated': 'Sat Mar 16 2024',
 'dataset_doi': '10.24432/C5GS39',
 'creators': ['I-Cheng Yeh'],
 'intro_paper': {'ID': 434,
  'type': 'NATIVE',
  'title': 'Knowledge discovery on RFM model using Bernoulli sequence',
  'authors': 'I. Yeh, K. Yang, Tao-Ming Ting',
  'venue': 'Expert systems with applicat

In [4]:
dataset.variables

Unnamed: 0,name,role,type,demographic,description,units,missing_values
0,Recency,Feature,Integer,,months since last donation,,no
1,Frequency,Feature,Integer,,total number of donations,,no
2,Monetary,Feature,Integer,,total blood donated in c.c.,,no
3,Time,Feature,Integer,,months since first donation,,no
4,Donated_Blood,Target,Binary,,whether he/she donated blood in March 2007 (1 ...,,no


In [5]:
X

Unnamed: 0,Recency,Frequency,Monetary,Time
0,2,50,12500,98
1,0,13,3250,28
2,1,16,4000,35
3,2,20,5000,45
4,1,24,6000,77
...,...,...,...,...
743,23,2,500,38
744,21,2,500,52
745,23,3,750,62
746,39,1,250,39


In [6]:
y

Unnamed: 0,Donated_Blood
0,1
1,1
2,1
3,1
4,0
...,...
743,0
744,0
745,0
746,0


In [7]:
# X and Y conct to single dataframe

X = dataset.data.features
y = dataset.data.targets

df = pd.concat([X, y], axis=1)

# df replace Class with target
df["target"] = df["Donated_Blood"]

# df drop Class
df = df.drop(columns=["Donated_Blood"])

df

Unnamed: 0,Recency,Frequency,Monetary,Time,target
0,2,50,12500,98,1
1,0,13,3250,28,1
2,1,16,4000,35,1
3,2,20,5000,45,1
4,1,24,6000,77,0
...,...,...,...,...,...
743,23,2,500,38,0
744,21,2,500,52,0
745,23,3,750,62,0
746,39,1,250,39,0


In [8]:
# data["target"] = target
# replace False with -1 and True with 1
df["target"] = df["target"].replace({1: 1, 0:-1})

df.dropna(inplace=True)

In [9]:
# check if any missing values
df.isnull().sum()

Recency      0
Frequency    0
Monetary     0
Time         0
target       0
dtype: int64

In [10]:
# df.head()
# #  apply min max noramlisation for all Age	Gender	TB	DB	Alkphos	Sgpt	Sgot	TP	ALB	A/G Ratio	target except target gend3er and a/g ratio

# df["Age"]=df["Age"].apply(lambda x: (x-df["Age"].min())/(df["Age"].max()-df["Age"].min()))
# df["TB"]=df["TB"].apply(lambda x: (x-df["TB"].min())/(df["TB"].max()-df["TB"].min()))
# df["DB"]=df["DB"].apply(lambda x: (x-df["DB"].min())/(df["DB"].max()-df["DB"].min()))
# df["Alkphos"]=df["Alkphos"].apply(lambda x: (x-df["Alkphos"].min())/(df["Alkphos"].max()-df["Alkphos"].min()))
# df["Sgpt"]=df["Sgpt"].apply(lambda x: (x-df["Sgpt"].min())/(df["Sgpt"].max()-df["Sgpt"].min()))
# df["Sgot"]=df["Sgot"].apply(lambda x: (x-df["Sgot"].min())/(df["Sgot"].max()-df["Sgot"].min()))
# df["TP"]=df["TP"].apply(lambda x: (x-df["TP"].min())/(df["TP"].max()-df["TP"].min()))
# df["ALB"]=df["ALB"].apply(lambda x: (x-df["ALB"].min())/(df["ALB"].max()-df["ALB"].min()))


In [11]:
df

Unnamed: 0,Recency,Frequency,Monetary,Time,target
0,2,50,12500,98,1
1,0,13,3250,28,1
2,1,16,4000,35,1
3,2,20,5000,45,1
4,1,24,6000,77,-1
...,...,...,...,...,...
743,23,2,500,38,-1
744,21,2,500,52,-1
745,23,3,750,62,-1
746,39,1,250,39,-1


In [12]:
# find missing values in each column
df.isnull().sum()

Recency      0
Frequency    0
Monetary     0
Time         0
target       0
dtype: int64

In [13]:
import os

In [14]:
tmp_path = os.path.join(os.getcwd(), "data")  # noqa: PTH109, PTH118
# check if the fodler exisit if not create the folder
if not os.path.exists(tmp_path):  # noqa: PTH110
    os.makedirs(tmp_path)  # noqa: PTH103

In [15]:
os.listdir()

['.DS_Store',
 'data',
 'db',
 'db copy',
 'test_143.ipynb',
 'test_176.ipynb',
 'test_225.ipynb',
 'test_267.ipynb',
 'test_45.ipynb',
 'test_iono.ipynb',
 'test_new_data_test.ipynb',
 'test_shap.ipynb']

In [17]:
file_path = mock_csv_data(df, tmp_path="data", file_name="Blood")

In [18]:
data_prep = DataPreparation(
    file_path=file_path, x_cols=list(df.columns.difference(["target"])), y_col="target"
)

In [19]:
data_dict = data_prep.get_data(output_type="2")

In [20]:
data_dict.keys()
data_dict["train_x"] = data_dict["train_x"].values
data_dict["test_x"] = data_dict["test_x"].values
data_dict["train_y"] = data_dict["train_y"].values
data_dict["test_y"] = data_dict["test_y"].values

In [21]:
data_dict

{'train_x': array([[ 0.59726713,  0.59726713, -0.0626311 , -0.75048336],
        [-0.43093957, -0.43093957, -0.0626311 , -0.83258377],
        [-0.43093957, -0.43093957,  1.42068485, -0.33998131],
        ...,
        [ 0.9400027 ,  0.9400027 ,  0.80263654,  0.23472156],
        [ 0.25453156,  0.25453156,  0.80263654,  2.41038242],
        [ 0.59726713,  0.59726713, -0.68067941,  0.23472156]]),
 'train_y': array([[ 1],
        [ 1],
        [-1],
        [ 1],
        [-1],
        [ 1],
        [-1],
        [ 1],
        [-1],
        [ 1],
        [ 1],
        [-1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [-1],
        [-1],
        [ 1],
        [ 1],
        [-1],
        [ 1],
        [-1],
        [-1],
        [ 1],
        [ 1],
        [-1],
        [-1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [-1],
        [ 1],
        [ 1],
        [ 1],
   

In [24]:
# find the value with index 1724
data_dict["train_x"]

array([[ 0.59726713,  0.59726713, -0.0626311 , -0.75048336],
       [-0.43093957, -0.43093957, -0.0626311 , -0.83258377],
       [-0.43093957, -0.43093957,  1.42068485, -0.33998131],
       ...,
       [ 0.9400027 ,  0.9400027 ,  0.80263654,  0.23472156],
       [ 0.25453156,  0.25453156,  0.80263654,  2.41038242],
       [ 0.59726713,  0.59726713, -0.68067941,  0.23472156]])

In [25]:
from quoptuna import Optimizer

In [26]:
file_name = "Blood"

In [27]:
optimizer = Optimizer(db_name=file_name, study_name=file_name, data=data_dict)

In [28]:
study, best_trials = optimizer.optimize(n_trials=100)

[I 2025-09-29 01:58:31,913] A new study created in RDB with name: Blood
ERROR:root:An error occurred
Traceback (most recent call last):
  File "C:\Users\aneen\Documents\GitHub\quoptuna\src\quoptuna\backend\tuners\optimizer.py", line 136, in objective
    model.fit(self.train_x, self.train_y)
  File "C:\Users\aneen\Documents\GitHub\quoptuna\src\quoptuna\backend\base\pennylane_models\qml_benchmarks\models\separable.py", line 377, in fit
    self.svm.random_state = int(
                            ^^^^
  File "c:\Users\aneen\Documents\GitHub\quoptuna\.venv\Lib\site-packages\jax\_src\array.py", line 286, in __int__
    core.check_scalar_conversion(self)
  File "c:\Users\aneen\Documents\GitHub\quoptuna\.venv\Lib\site-packages\jax\_src\core.py", line 653, in check_scalar_conversion
    raise TypeError("Only scalar arrays can be converted to Python scalars; "
TypeError: Only scalar arrays can be converted to Python scalars; got arr.ndim=1
[I 2025-09-29 01:58:32,105] Trial 0 finished with valu