In [33]:
import pandas as pd

from quoptuna import DataPreparation
from quoptuna.backend.utils.data_utils.data import mock_csv_data

In [34]:
from ucimlrepo import fetch_ucirepo

# fetch dataset
dataset = fetch_ucirepo(id=225)

# data (as pandas dataframes)
X = dataset.data.features
y = dataset.data.targets

# metadata
print(dataset.metadata)

# variable information
print(dataset.variables)

{'uci_id': 225, 'name': 'ILPD (Indian Liver Patient Dataset)', 'repository_url': 'https://archive.ics.uci.edu/dataset/225/ilpd+indian+liver+patient+dataset', 'data_url': 'https://archive.ics.uci.edu/static/public/225/data.csv', 'abstract': 'Death by liver cirrhosis continues to increase, given the increase in alcohol consumption rates, chronic hepatitis infections, and obesity-related liver disease. Notwithstanding the high mortality of this disease, liver diseases do not affect all sub-populations equally. The early detection of pathology is a determinant of patient outcomes, yet female patients appear to be marginalized when it comes to early diagnosis of liver pathology. \nThe dataset comprises 584 patient records collected from the NorthEast of Andhra Pradesh, India.\nThe prediction task is to determine whether a patient suffers from liver disease based on the information about several biochemical markers, including albumin and other enzymes required for metabolism.\n', 'area': 'He

In [35]:
y

Unnamed: 0,Selector
0,1
1,1
2,1
3,1
4,1
...,...
578,2
579,1
580,1
581,1


In [36]:
# X and Y conct to single dataframe

X = dataset.data.features
y = dataset.data.targets

df = pd.concat([X, y], axis=1)

# df replace Class with target
df["target"] = df["Selector"]

# df drop Class
df = df.drop(columns=["Selector"])

df

Unnamed: 0,Age,Gender,TB,DB,Alkphos,Sgpt,Sgot,TP,ALB,A/G Ratio,target
0,65,Female,0.7,0.1,187,16,18,6.8,3.3,0.90,1
1,62,Male,10.9,5.5,699,64,100,7.5,3.2,0.74,1
2,62,Male,7.3,4.1,490,60,68,7.0,3.3,0.89,1
3,58,Male,1.0,0.4,182,14,20,6.8,3.4,1.00,1
4,72,Male,3.9,2.0,195,27,59,7.3,2.4,0.40,1
...,...,...,...,...,...,...,...,...,...,...,...
578,60,Male,0.5,0.1,500,20,34,5.9,1.6,0.37,2
579,40,Male,0.6,0.1,98,35,31,6.0,3.2,1.10,1
580,52,Male,0.8,0.2,245,48,49,6.4,3.2,1.00,1
581,31,Male,1.3,0.5,184,29,32,6.8,3.4,1.00,1


In [39]:
# data["target"] = target
# replace False with -1 and True with 1
df["target"] = df["target"].replace({1: 1, 2: -1})

df.dropna(inplace=True)

In [40]:
# check if any missing values
df.isnull().sum()

Age          0
Gender       0
TB           0
DB           0
Alkphos      0
Sgpt         0
Sgot         0
TP           0
ALB          0
A/G Ratio    0
target       0
dtype: int64

In [41]:
df["Gender"] = df["Gender"].replace({"Male": 1, "Female": -1})

df

  df["Gender"] = df["Gender"].replace({"Male": 1, "Female": -1})


Unnamed: 0,Age,Gender,TB,DB,Alkphos,Sgpt,Sgot,TP,ALB,A/G Ratio,target
0,65,-1,0.7,0.1,187,16,18,6.8,3.3,0.90,1
1,62,1,10.9,5.5,699,64,100,7.5,3.2,0.74,1
2,62,1,7.3,4.1,490,60,68,7.0,3.3,0.89,1
3,58,1,1.0,0.4,182,14,20,6.8,3.4,1.00,1
4,72,1,3.9,2.0,195,27,59,7.3,2.4,0.40,1
...,...,...,...,...,...,...,...,...,...,...,...
578,60,1,0.5,0.1,500,20,34,5.9,1.6,0.37,-1
579,40,1,0.6,0.1,98,35,31,6.0,3.2,1.10,1
580,52,1,0.8,0.2,245,48,49,6.4,3.2,1.00,1
581,31,1,1.3,0.5,184,29,32,6.8,3.4,1.00,1


In [42]:
df.head()
#  apply min max noramlisation for all Age	Gender	TB	DB	Alkphos	Sgpt	Sgot	TP	ALB	A/G Ratio	target except target gend3er and a/g ratio

df["Age"] = df["Age"].apply(lambda x: (x - df["Age"].min()) / (df["Age"].max() - df["Age"].min()))
df["TB"] = df["TB"].apply(lambda x: (x - df["TB"].min()) / (df["TB"].max() - df["TB"].min()))
df["DB"] = df["DB"].apply(lambda x: (x - df["DB"].min()) / (df["DB"].max() - df["DB"].min()))
df["Alkphos"] = df["Alkphos"].apply(
    lambda x: (x - df["Alkphos"].min()) / (df["Alkphos"].max() - df["Alkphos"].min())
)
df["Sgpt"] = df["Sgpt"].apply(
    lambda x: (x - df["Sgpt"].min()) / (df["Sgpt"].max() - df["Sgpt"].min())
)
df["Sgot"] = df["Sgot"].apply(
    lambda x: (x - df["Sgot"].min()) / (df["Sgot"].max() - df["Sgot"].min())
)
df["TP"] = df["TP"].apply(lambda x: (x - df["TP"].min()) / (df["TP"].max() - df["TP"].min()))
df["ALB"] = df["ALB"].apply(lambda x: (x - df["ALB"].min()) / (df["ALB"].max() - df["ALB"].min()))

In [43]:
df

Unnamed: 0,Age,Gender,TB,DB,Alkphos,Sgpt,Sgot,TP,ALB,A/G Ratio,target
0,0.709302,-1,0.004021,0.000000,0.060576,0.003015,0.001626,0.594203,0.521739,0.90,1
1,0.674419,1,0.140751,0.275510,0.310699,0.027136,0.018296,0.695652,0.500000,0.74,1
2,0.674419,1,0.092493,0.204082,0.208598,0.025126,0.011791,0.623188,0.521739,0.89,1
3,0.627907,1,0.008043,0.015306,0.058134,0.002010,0.002033,0.594203,0.543478,1.00,1
4,0.790698,1,0.046917,0.096939,0.064485,0.008543,0.009961,0.666667,0.326087,0.40,1
...,...,...,...,...,...,...,...,...,...,...,...
578,0.651163,1,0.001340,0.000000,0.213483,0.005025,0.004879,0.463768,0.152174,0.37,-1
579,0.418605,1,0.002681,0.000000,0.017098,0.012563,0.004269,0.478261,0.500000,1.10,1
580,0.558140,1,0.005362,0.005102,0.088911,0.019095,0.007928,0.536232,0.500000,1.00,1
581,0.313953,1,0.012064,0.020408,0.059111,0.009548,0.004472,0.594203,0.543478,1.00,1


In [44]:
# find missing values in each column
df.isnull().sum()

Age          0
Gender       0
TB           0
DB           0
Alkphos      0
Sgpt         0
Sgot         0
TP           0
ALB          0
A/G Ratio    0
target       0
dtype: int64

In [45]:
import os

In [46]:
tmp_path = os.path.join(os.getcwd(), "data")  # noqa: PTH109, PTH118
# check if the fodler exisit if not create the folder
if not os.path.exists(tmp_path):  # noqa: PTH110
    os.makedirs(tmp_path)  # noqa: PTH103

In [47]:
os.listdir()

['.DS_Store',
 'data',
 'db',
 'test_225.ipynb',
 'test_iono.ipynb',
 'test_new_data_test.ipynb']

In [48]:
file_path = mock_csv_data(df, tmp_path="data", file_name="Ionosphere")

In [49]:
data_prep = DataPreparation(
    file_path=file_path, x_cols=list(df.columns.difference(["target"])), y_col="target"
)

In [50]:
data_dict = data_prep.get_data(output_type="2")

In [51]:
data_dict.keys()
data_dict["train_x"] = data_dict["train_x"].values
data_dict["test_x"] = data_dict["test_x"].values
data_dict["train_y"] = data_dict["train_y"].values
data_dict["test_y"] = data_dict["test_y"].values

In [52]:
data_dict

{'train_x': array([[-0.46055914,  0.20344649,  0.93890865, ...,  0.20693351,
          0.11002759,  0.57055009],
        [ 0.79211845,  1.46329159, -0.91205529, ..., -0.19192   ,
         -0.40424871,  1.21648291],
        [-0.77372854, -0.17450704, -0.78865769, ..., -0.37768739,
         -0.42031984,  0.47827397],
        ...,
        [ 0.79211845,  1.96722963, -0.91205529, ..., -0.36129615,
         -0.42031984,  2.87745304],
        [ 0.16577966, -0.04852253, -0.17166971, ..., -0.30665869,
         -0.40424871, -0.25993497],
        [ 1.88821134,  1.33730708, -1.71413966, ..., -0.24655747,
         -0.38817757,  0.38599785]]),
 'train_y': array([[-1],
        [ 1],
        [-1],
        [-1],
        [-1],
        [-1],
        [-1],
        [-1],
        [-1],
        [ 1],
        [-1],
        [ 1],
        [-1],
        [ 1],
        [-1],
        [-1],
        [-1],
        [-1],
        [-1],
        [ 1],
        [-1],
        [ 1],
        [ 1],
        [ 1],
        [-1],
 

In [53]:
# find the value with index 1724
data_dict["train_x"]

array([[-0.46055914,  0.20344649,  0.93890865, ...,  0.20693351,
         0.11002759,  0.57055009],
       [ 0.79211845,  1.46329159, -0.91205529, ..., -0.19192   ,
        -0.40424871,  1.21648291],
       [-0.77372854, -0.17450704, -0.78865769, ..., -0.37768739,
        -0.42031984,  0.47827397],
       ...,
       [ 0.79211845,  1.96722963, -0.91205529, ..., -0.36129615,
        -0.42031984,  2.87745304],
       [ 0.16577966, -0.04852253, -0.17166971, ..., -0.30665869,
        -0.40424871, -0.25993497],
       [ 1.88821134,  1.33730708, -1.71413966, ..., -0.24655747,
        -0.38817757,  0.38599785]])

In [54]:
from quoptuna import Optimizer

In [56]:
file_name = "ILPD_v1"

In [57]:
optimizer = Optimizer(db_name=file_name, study_name=file_name, data=data_dict)

In [58]:
study, best_trials = optimizer.optimize(n_trials=100)

[I 2025-09-07 17:43:27,464] A new study created in RDB with name: ILPD_v1
ERROR:root:An error occurred
Traceback (most recent call last):
  File "C:\Users\aneen\Documents\GitHub\quoptuna\src\quoptuna\backend\tuners\optimizer.py", line 134, in objective
    model = create_model(model_type, **params)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\aneen\Documents\GitHub\quoptuna\src\quoptuna\backend\models.py", line 135, in create_model
    params["hidden_layer_sizes"] = ast.literal_eval(params["hidden_layer_sizes"])
                                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.12_3.12.2800.0_x64__qbz5n2kfra8p0\Lib\ast.py", line 66, in literal_eval
    node_or_string = parse(node_or_string.lstrip(" \t"), mode='eval')
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.12_3.12.2800.0_x64__qbz5n2k