In [None]:
import pickle
import numpy as np
import jsonlines
import matplotlib.pyplot as plt
from sklearn.preprocessing import KBinsDiscretizer


def load_jsonl_data(file_path: str):
    """Loads data from a JSONL file."""
    data = []
    with jsonlines.open(file_path, "r") as reader:
        for entry in reader:
            data.append(entry)
    return data

In [None]:
def extract_amounts(dataset_path: str):
    """Loads transaction data and extracts amounts."""
    train_data = load_jsonl_data(f"{dataset_path}/lm/train.jsonl")
    valid_data = load_jsonl_data(f"{dataset_path}/lm/valid.jsonl")
    
    transactions = train_data + valid_data
    amount_values = [amount for record in transactions for amount in record["amounts"]]
    
    return np.array(amount_values).reshape(-1, 1)

In [1]:
"""
    Discretizes the amount values and saves the discretizer.
    
    Parameters:
    - amount_data: NumPy array of amounts
    - num_bins: Number of bins for discretization
    - dataset_name: Name of the dataset (used for saving the model)
    """
def discretize_and_save(amount_data, num_bins: int, dataset_name: str):
    
    discretizer = KBinsDiscretizer(n_bins=num_bins, encode="ordinal", strategy="quantile")
    discretizer.fit(amount_data)

    save_path = f"presets/{dataset_name}/discretizers/{num_bins}_quantile"
    with open(save_path, "wb") as f:
        pickle.dump(discretizer, f)
    
    return discretizer

**Execute using the below cell**

In [None]:
# Define dataset path
dataset_name = "rosbank"  # Change this to match dataset name
dataset_path = f"../data/{dataset_name}" # Define path

# Load and extract amounts
amount_data = extract_amounts(dataset_path)

# Discretize & Save (100 bins)
discretizer_100 = discretize_and_save(amount_data, 100, dataset_name)

# Discretize & Save (50 bins)
discretizer_50 = discretize_and_save(amount_data, 50, dataset_name)