In [None]:
from pinard.model_selection import train_test_split_idx
import numpy as np
from os.path import abspath
from os.path import dirname as d

ROOT_DIR = './..'


def path_to(str):
    return ROOT_DIR + "/tests/data/" + str


def split_data():
    split_data = np.loadtxt(path_to("test_split.csv"), delimiter=";")
    y = np.reshape(split_data[:, 0], (-1, 1))
    x = split_data[:, 1:]
    return (x, y)


def split_validation_data():
    split_validation_data = np.loadtxt(
        path_to("test_split_validation.csv"), delimiter=";"
    )
    return split_validation_data.astype(int)


split_list = [
    ({"method": "random", "test_size": 0.25, "random_state": 42}, 0),
    (
        {
            "method": "k_mean",
            "test_size": 0.25,
            "random_state": 42,
            "metric": "canberra",
        },
        1,
    ),
    (
        {
            "pca_components": 4,
            "method": "k_mean",
            "test_size": 0.25,
            "random_state": 42,
            "metric": "canberra",
        },
        2,
    ),
    ({"method": "kennard_stone", "test_size": 0.25, "random_state": 42}, 3),
    (
        {
            "method": "kennard_stone",
            "test_size": 0.25,
            "random_state": 42,
            "metric": "correlation",
            "pca_components": 8,
        },
        4,
    ),
    (
        {
            "method": "kennard_stone",
            "test_size": 0.25,
            "random_state": 42,
            "metric": "correlation",
        },
        5,
    ),
    ({"method": "spxy", "test_size": 0.25, "random_state": 42}, 6),
    ({"method": "spxy", "test_size": 0.25, "random_state": 42, "pca_components": 2}, 7),
    (
        {"method": "spxy", "test_size": 0.25, "random_state": 42, "metric": "canberra"},
        8,
    ),
    ({"method": "stratified", "test_size": 0.25, "random_state": 42}, 9),
    ({"method": "stratified", "test_size": 0.25, "random_state": 42, "n_bins": 4}, 10),
    ({"method": "circular", "test_size": 0.25, "random_state": 42}, 11),
]


x, y = split_data()
index = 2
opt = split_list[index][0]
train_index, _ = train_test_split_idx(x, y=y, **opt)
np.savetxt(
    "alidation.csv", train_index, delimiter=";", fmt="%d"
)
split_validation_data = split_validation_data()
# print(list(zip(train_index, split_validation_data[:, index])))
np.testing.assert_array_equal(train_index, split_validation_data[:, index])

In [215]:
%load_ext autoreload
%autoreload 2
import numpy as np
import operator
import pinard
import pinard.augmentation as aug
import matplotlib.pyplot as plt
import importlib
importlib.reload(pinard.augmentation)
importlib.reload(aug)

# def augment(X, apply_on="samples"):
#     x = np.arange(0, len(X[0]), 1)
#     t, c, k = interpolate.splrep(x, X[0], s=0, k=3)
#     delta_x_size = int(np.around(len(t) / 20))
#     delta_x = np.linspace(np.min(x), np.max(x), delta_x_size)
#     delta_y = np.random.uniform(-10, 10, delta_x_size)
#     delta = np.interp(t, delta_x, delta_y)
#     t = t + delta
#     spline = interpolate.BSpline(t, c, k, extrapolate=True)

#     return spline(X)


def split_data():
    split_data = np.loadtxt("data/test_augmentation.csv", delimiter=";")
    y = np.reshape(split_data[:, 0], (-1, 1))
    x = split_data[:, 1:]
    return (x, y)

seed = 42
augmenters = [
    (0, "Rotate_Translate", aug.Rotate_Translate, {"random_state":seed}),
    (1, "Rotate_Translate_custom", aug.Rotate_Translate,{"random_state": seed, "p_range": 5, "y_factor": 5}),
    (2, "Random_X_Operation", aug.Random_X_Operation, {"random_state": seed}),
    (3, "Random_X_Operation_custom", aug.Random_X_Operation, {"random_state": seed, "operator_func": operator.add, "operator_range": (-0.002, 0.002)}),
    (4, "Spline_Smoothing", aug.Spline_Smoothing, {"random_state": seed}),
    (5, "Spline_X_Perturbation", aug.Spline_X_Perturbations, {"random_state": seed}),
    (6, "Spline_X_Perturbation", aug.Spline_X_Perturbations, {"random_state": seed, "perturbation_density":0.01, "perturbation_range":(-30, 30)}),
    (7, "Spline_Y_Perturbation", aug.Spline_Y_Perturbations, {"random_state": seed}),
    (8, "Spline_Y_Perturbation", aug.Spline_Y_Perturbations, {"random_state": seed, "spline_points":5, "perturbation_intensity":0.02}),
    (9, "Spline_X_Simplification", aug.Spline_X_Simplification, {"random_state": seed}),
    (10, "Spline_X_Simplification", aug.Spline_X_Simplification, {"random_state": seed, "spline_points":15}),
    (11, "Spline_Curve_Simplification", aug.Spline_Curve_Simplification, {"random_state": seed}),
    (12, "Spline_Curve_Simplification", aug.Spline_Curve_Simplification, {"random_state": seed, "spline_points":5, "uniform":False}),
    (13, "Spline_Curve_Simplification", aug.Spline_Curve_Simplification, {"random_state": seed, "spline_points":5, "uniform":True}),
]

X, Y = split_data()

for index, name, augmenter, params in augmenters:
    print(name)
    aug_instance = augmenter(**params)
    augmented_X = aug_instance.augment(X, apply_on="global")
    augmented_X = np.savetxt(f"data/test_augmentation_{index}_{name}_global.csv", augmented_X, delimiter=";", fmt="%.3f")
    
    aug_instance = augmenter(**params)
    augmented_X = aug_instance.augment(X, apply_on="samples")
    augmented_X = np.savetxt(f"data/test_augmentation_{index}_{name}_samples.csv", augmented_X, delimiter=";", fmt="%.3f")


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Rotate_Translate
Rotate_Translate_custom
Random_X_Operation
Random_X_Operation_custom
Spline_Smoothing
(4, 234) (4, 234)
(4, 234) (4, 234)
Spline_X_Perturbation
Spline_X_Perturbation
Spline_Y_Perturbation
Spline_Y_Perturbation
Spline_X_Simplification
Spline_X_Simplification
Spline_Curve_Simplification
Spline_Curve_Simplification
Spline_Curve_Simplification


In [None]:
import json


def generate_json_combinations(json_data, depth=0, max_depth=7):
    if depth > max_depth:
        return []
    if isinstance(json_data, list):
        combinations = []
        for item in json_data:
            combinations.extend(generate_json_combinations(item, depth + 1, max_depth))
        return combinations
    elif isinstance(json_data, dict):
        combinations = []
        for key, value in json_data.items():
            combinations.extend(generate_json_combinations(value, depth + 1, max_depth))
        return combinations
    else:
        return [json_data]


# Example usage
input_json = {
    "key1": "my_task",
    "key2": {"*": ["my_task2", "my_task2_alt"]},
    "key3": ["pipeline_task1", {"*": ["pipeline_task2", {"*": ["pipeline_task4", "pipeline_task4_alt"]}]}, "pipeline_task3"],
    "*": [{"key5": "task5", "key6": "task6"}, {"key5_alt": "task5_alt"}]
}

# Call the function and print the generated JSON objects
generated_json_objects = generate_json_combinations(input_json)
for obj in generated_json_objects:
    print(json.dumps(obj, indent=4))

In [None]:
import numpy as np

# Example NumPy array
arr = np.array([[1, 2, 3],
                [4, 5, 6],
                [7, 8, 9]])

# Calculate the mean of each column (2nd dimension)
column_means = np.mean(arr, axis=0)

print(column_means)