In [1]:
# Imports
import sys
from pathlib import Path

# Resolve project root and ensure it's on sys.path
ROOT = Path.cwd().resolve()
for _ in range(5):
    if (ROOT / "pyproject.toml").exists() or (ROOT / "raw_data").exists():
        break
    ROOT = ROOT.parent
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))

from katabatic.pipeline.train_test_split.pipeline import TrainTestSplitPipeline
from utils import discretize_preprocess
from katabatic.models.ganblr.models import GANBLR


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Preprocess data
from pathlib import Path

dataset_path = ROOT / "raw_data" / "car.csv"
output_path = ROOT / "discretized_data" / "car.csv"
output_path.parent.mkdir(parents=True, exist_ok=True)

discretize_preprocess(str(dataset_path), str(output_path))



Preprocessing: /Users/vikumdabare/Documents/Work/Katabatic/katabatic/raw_data/car.csv
Saved preprocessed discrete dataset to: /Users/vikumdabare/Documents/Work/Katabatic/katabatic/discretized_data/car.csv


In [3]:
# Run pipeline
input_csv = str(output_path)
output_dir = str(ROOT / "sample_data" / "car")
real_test_dir = output_dir

# GANBLR actually saves under <output_dir>/ganblr (due to absolute path join in model)
from pathlib import Path as _Path
synthetic_dir = str(_Path(output_dir) / "ganblr")

pipeline = TrainTestSplitPipeline(model=GANBLR)
result = pipeline.run(
    input_csv=input_csv,
    output_dir=output_dir,
    synthetic_dir=synthetic_dir,
    real_test_dir=real_test_dir,
)
print(result)



Loaded data with shape: (1728, 7)
Saved train/test full data
Train size: (1382, 7), Test size: (346, 7)
Train label distribution:
 6
2    0.700434
0    0.222142
1    0.039797
3    0.037627
Name: proportion, dtype: float64
Test label distribution:
 6
2    0.699422
0    0.222543
1    0.040462
3    0.037572
Name: proportion, dtype: float64
Saved X/y split
Training shape: (1382, 6) (1382,)
Test shape: (346, 6) (346,)
Loaded X shape: (1382, 6), y shape: (1382,)
warmup run:


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.3039 - loss: 1.3797


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100: G_loss = 3.626862, G_accuracy = 0.385673, D_loss = 1.283450, D_accuracy = 0.497060


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 2/100: G_loss = 3.749597, G_accuracy = 0.452243, D_loss = 1.330202, D_accuracy = 0.502488


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 3/100: G_loss = 2.450386, G_accuracy = 0.505065, D_loss = 0.885121, D_accuracy = 0.500678


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 4/100: G_loss = 4.418542, G_accuracy = 0.553546, D_loss = 1.672563, D_accuracy = 0.501583


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 5/100: G_loss = 1.368670, G_accuracy = 0.611433, D_loss = 1.306325, D_accuracy = 0.500226


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 6/100: G_loss = 1.345007, G_accuracy = 0.657019, D_loss = 1.408818, D_accuracy = 0.502035


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 7/100: G_loss = 2.071465, G_accuracy = 0.701158, D_loss = 0.857590, D_accuracy = 0.507010


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 8/100: G_loss = 1.912939, G_accuracy = 0.738784, D_loss = 0.787694, D_accuracy = 0.504297


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 9/100: G_loss = 3.266705, G_accuracy = 0.769175, D_loss = 1.242886, D_accuracy = 0.511533


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 10/100: G_loss = 1.985476, G_accuracy = 0.799566, D_loss = 0.873159, D_accuracy = 0.497060


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 11/100: G_loss = 2.893404, G_accuracy = 0.833575, D_loss = 1.145032, D_accuracy = 0.493894


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 12/100: G_loss = 1.598580, G_accuracy = 0.853111, D_loss = 0.792176, D_accuracy = 0.499322


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 13/100: G_loss = 2.005751, G_accuracy = 0.859624, D_loss = 0.889270, D_accuracy = 0.498869


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 14/100: G_loss = 1.462699, G_accuracy = 0.869030, D_loss = 1.065368, D_accuracy = 0.507915


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 15/100: G_loss = 1.333826, G_accuracy = 0.879884, D_loss = 1.047377, D_accuracy = 0.509724


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 16/100: G_loss = 1.405094, G_accuracy = 0.886397, D_loss = 1.030306, D_accuracy = 0.508820


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 17/100: G_loss = 1.386719, G_accuracy = 0.892909, D_loss = 0.966596, D_accuracy = 0.496156


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 18/100: G_loss = 1.017356, G_accuracy = 0.901592, D_loss = 1.485118, D_accuracy = 0.508820


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 19/100: G_loss = 1.537760, G_accuracy = 0.905933, D_loss = 0.768101, D_accuracy = 0.499322


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 20/100: G_loss = 1.405611, G_accuracy = 0.911722, D_loss = 0.876616, D_accuracy = 0.505201


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 21/100: G_loss = 1.769615, G_accuracy = 0.921129, D_loss = 0.878066, D_accuracy = 0.509724


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 22/100: G_loss = 1.606314, G_accuracy = 0.921852, D_loss = 0.775755, D_accuracy = 0.497512


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 23/100: G_loss = 1.147735, G_accuracy = 0.924023, D_loss = 0.893965, D_accuracy = 0.488014


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 24/100: G_loss = 2.489864, G_accuracy = 0.929812, D_loss = 1.058734, D_accuracy = 0.516508


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 25/100: G_loss = 1.875788, G_accuracy = 0.930535, D_loss = 0.867185, D_accuracy = 0.523293


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 26/100: G_loss = 1.006684, G_accuracy = 0.931259, D_loss = 1.044611, D_accuracy = 0.500678


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 27/100: G_loss = 0.906585, G_accuracy = 0.933430, D_loss = 1.170866, D_accuracy = 0.514247


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 28/100: G_loss = 1.000283, G_accuracy = 0.934153, D_loss = 0.964720, D_accuracy = 0.499774


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 29/100: G_loss = 0.994625, G_accuracy = 0.937048, D_loss = 0.978894, D_accuracy = 0.502035


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 30/100: G_loss = 1.437756, G_accuracy = 0.937771, D_loss = 0.837381, D_accuracy = 0.521483


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 31/100: G_loss = 1.282842, G_accuracy = 0.940666, D_loss = 0.895006, D_accuracy = 0.492537


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 32/100: G_loss = 1.042554, G_accuracy = 0.941389, D_loss = 1.091154, D_accuracy = 0.495251


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 33/100: G_loss = 0.939740, G_accuracy = 0.941389, D_loss = 0.954222, D_accuracy = 0.502488


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 34/100: G_loss = 0.714450, G_accuracy = 0.943560, D_loss = 1.201117, D_accuracy = 0.501583


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 35/100: G_loss = 0.662542, G_accuracy = 0.943560, D_loss = 1.471346, D_accuracy = 0.496608


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 36/100: G_loss = 0.967162, G_accuracy = 0.943560, D_loss = 1.022276, D_accuracy = 0.500226


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 37/100: G_loss = 1.189541, G_accuracy = 0.944284, D_loss = 0.939783, D_accuracy = 0.491180


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 38/100: G_loss = 2.126721, G_accuracy = 0.945731, D_loss = 1.016354, D_accuracy = 0.499322


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 39/100: G_loss = 2.372452, G_accuracy = 0.947178, D_loss = 1.147450, D_accuracy = 0.504297


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 40/100: G_loss = 1.494323, G_accuracy = 0.947178, D_loss = 0.838646, D_accuracy = 0.499322


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 41/100: G_loss = 0.764240, G_accuracy = 0.947902, D_loss = 1.084931, D_accuracy = 0.502940


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 42/100: G_loss = 0.756489, G_accuracy = 0.947178, D_loss = 1.142569, D_accuracy = 0.507915


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 43/100: G_loss = 0.718320, G_accuracy = 0.952243, D_loss = 1.064844, D_accuracy = 0.485753


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 44/100: G_loss = 2.382577, G_accuracy = 0.952243, D_loss = 1.138613, D_accuracy = 0.511533


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 45/100: G_loss = 0.787707, G_accuracy = 0.954414, D_loss = 1.120588, D_accuracy = 0.495251


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 46/100: G_loss = 0.702314, G_accuracy = 0.953690, D_loss = 1.119519, D_accuracy = 0.496608


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 47/100: G_loss = 1.174904, G_accuracy = 0.955137, D_loss = 0.961417, D_accuracy = 0.515604


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 48/100: G_loss = 0.892759, G_accuracy = 0.955861, D_loss = 0.973751, D_accuracy = 0.502035


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 49/100: G_loss = 1.055553, G_accuracy = 0.958032, D_loss = 0.831319, D_accuracy = 0.497512


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 50/100: G_loss = 2.556316, G_accuracy = 0.958755, D_loss = 1.341371, D_accuracy = 0.487110


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 51/100: G_loss = 0.471015, G_accuracy = 0.960926, D_loss = 1.425503, D_accuracy = 0.495251


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 52/100: G_loss = 0.765781, G_accuracy = 0.962373, D_loss = 0.873601, D_accuracy = 0.495251


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 53/100: G_loss = 0.983643, G_accuracy = 0.962373, D_loss = 0.972247, D_accuracy = 0.491180


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 54/100: G_loss = 1.188206, G_accuracy = 0.962373, D_loss = 0.877956, D_accuracy = 0.516056


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 55/100: G_loss = 0.957192, G_accuracy = 0.963821, D_loss = 0.767204, D_accuracy = 0.479873


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 56/100: G_loss = 0.840092, G_accuracy = 0.964544, D_loss = 0.967971, D_accuracy = 0.501131


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 57/100: G_loss = 2.776848, G_accuracy = 0.964544, D_loss = 1.503305, D_accuracy = 0.485301


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 58/100: G_loss = 0.708462, G_accuracy = 0.965991, D_loss = 1.100215, D_accuracy = 0.513795


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 59/100: G_loss = 1.590851, G_accuracy = 0.965991, D_loss = 0.948066, D_accuracy = 0.502035


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 60/100: G_loss = 0.468872, G_accuracy = 0.967439, D_loss = 1.288793, D_accuracy = 0.502035


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 61/100: G_loss = 0.772424, G_accuracy = 0.968886, D_loss = 0.802680, D_accuracy = 0.507010


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 62/100: G_loss = 1.876915, G_accuracy = 0.968886, D_loss = 1.072866, D_accuracy = 0.491180


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 63/100: G_loss = 0.905665, G_accuracy = 0.969609, D_loss = 0.863247, D_accuracy = 0.513342


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 64/100: G_loss = 0.475258, G_accuracy = 0.971056, D_loss = 1.333631, D_accuracy = 0.502488


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 65/100: G_loss = 0.420337, G_accuracy = 0.971056, D_loss = 1.275482, D_accuracy = 0.508367


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 66/100: G_loss = 0.624032, G_accuracy = 0.971056, D_loss = 1.040748, D_accuracy = 0.506106


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 67/100: G_loss = 0.927105, G_accuracy = 0.972504, D_loss = 0.732469, D_accuracy = 0.525554


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 68/100: G_loss = 2.454082, G_accuracy = 0.974674, D_loss = 1.201315, D_accuracy = 0.506558


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 69/100: G_loss = 1.261377, G_accuracy = 0.974674, D_loss = 0.766417, D_accuracy = 0.513342


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 70/100: G_loss = 1.549549, G_accuracy = 0.973951, D_loss = 0.919685, D_accuracy = 0.535957


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 71/100: G_loss = 0.291271, G_accuracy = 0.973951, D_loss = 1.758464, D_accuracy = 0.498869


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 72/100: G_loss = 0.562346, G_accuracy = 0.972504, D_loss = 1.004355, D_accuracy = 0.510176


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 73/100: G_loss = 0.854315, G_accuracy = 0.974674, D_loss = 0.952402, D_accuracy = 0.508820


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 74/100: G_loss = 1.090856, G_accuracy = 0.973951, D_loss = 0.893961, D_accuracy = 0.485301


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 75/100: G_loss = 1.101313, G_accuracy = 0.974674, D_loss = 0.862558, D_accuracy = 0.509724


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 76/100: G_loss = 0.834668, G_accuracy = 0.976122, D_loss = 0.797613, D_accuracy = 0.496608


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 77/100: G_loss = 0.635199, G_accuracy = 0.976122, D_loss = 0.866163, D_accuracy = 0.492990


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 78/100: G_loss = 1.503436, G_accuracy = 0.976845, D_loss = 0.983090, D_accuracy = 0.480326


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 79/100: G_loss = 0.654862, G_accuracy = 0.977569, D_loss = 1.072160, D_accuracy = 0.507463


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 80/100: G_loss = 1.607313, G_accuracy = 0.978292, D_loss = 0.942549, D_accuracy = 0.511533


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 81/100: G_loss = 0.943633, G_accuracy = 0.979739, D_loss = 0.765762, D_accuracy = 0.503844


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 82/100: G_loss = 2.757790, G_accuracy = 0.981187, D_loss = 1.416554, D_accuracy = 0.505654


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 83/100: G_loss = 0.526286, G_accuracy = 0.981187, D_loss = 1.096951, D_accuracy = 0.513342


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 84/100: G_loss = 1.801581, G_accuracy = 0.982634, D_loss = 1.071358, D_accuracy = 0.490276


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 85/100: G_loss = 1.863629, G_accuracy = 0.982634, D_loss = 1.059583, D_accuracy = 0.506106


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 86/100: G_loss = 0.905024, G_accuracy = 0.982634, D_loss = 0.825107, D_accuracy = 0.530077


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 87/100: G_loss = 0.217354, G_accuracy = 0.984081, D_loss = 2.024848, D_accuracy = 0.497060


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 88/100: G_loss = 1.350591, G_accuracy = 0.984081, D_loss = 0.857704, D_accuracy = 0.518770


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 89/100: G_loss = 0.985321, G_accuracy = 0.984081, D_loss = 0.833287, D_accuracy = 0.486205


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 90/100: G_loss = 0.611603, G_accuracy = 0.984081, D_loss = 0.960798, D_accuracy = 0.496608


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 91/100: G_loss = 1.633310, G_accuracy = 0.983357, D_loss = 1.019655, D_accuracy = 0.508820


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 92/100: G_loss = 0.457230, G_accuracy = 0.983357, D_loss = 1.089765, D_accuracy = 0.501583


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 93/100: G_loss = 0.578573, G_accuracy = 0.984081, D_loss = 0.966669, D_accuracy = 0.513795


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 94/100: G_loss = 0.399883, G_accuracy = 0.984081, D_loss = 1.104065, D_accuracy = 0.503844


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 95/100: G_loss = 0.929760, G_accuracy = 0.984805, D_loss = 0.795717, D_accuracy = 0.499322


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 96/100: G_loss = 0.808958, G_accuracy = 0.984805, D_loss = 0.899060, D_accuracy = 0.499774


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 97/100: G_loss = 0.280967, G_accuracy = 0.984805, D_loss = 1.466435, D_accuracy = 0.508820


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 98/100: G_loss = 1.609813, G_accuracy = 0.984805, D_loss = 0.959209, D_accuracy = 0.501131


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 99/100: G_loss = 1.143486, G_accuracy = 0.984081, D_loss = 0.957850, D_accuracy = 0.495251




Epoch 100/100: G_loss = 2.295796, G_accuracy = 0.983357, D_loss = 1.221713, D_accuracy = 0.497512


Generating for node: 2: 100%|██████████| 7/7 [00:00<00:00, 231.38it/s]



 Synthetic data saved to: /Users/vikumdabare/Documents/Work/Katabatic/katabatic/sample_data/car/ganblr


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



Results saved to: Results/car/ganblr_tstr.csv

TSTR Evaluation Results:

LR:
Accuracy: 0.7052
F1 Score: 0.6427

MLP:
Accuracy: 0.8468
F1 Score: 0.8378

RF:
Accuracy: 0.8902
F1 Score: 0.8885

XGBoost:
Accuracy: 0.8960
F1 Score: 0.8970
Train test split pipeline executed successfully.
