In [9]:
from master_thesis_experiments.simulator_toolbox.generator.synth_classification_generator import SynthClassificationGenerator
from master_thesis_experiments.active_learning.random_sampling_v2 import RandomSamplingStrategyV2
from master_thesis_experiments.simulator_toolbox.utils import get_root_level_dir
from master_thesis_experiments.adaptation.density_estimation import MultivariateNormalEstimator
from master_thesis_experiments.simulator_toolbox.data_provider.base import DataProvider
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import pairwise
from master_thesis_experiments.simulator_toolbox.utils import get_logger
from master_thesis_experiments.adaptation.density_estimation import DensityEstimator
from master_thesis_experiments.simulator_toolbox.simulation.base import Simulation
from tqdm import tqdm
import numpy as np
import pandas as pd
from copy import deepcopy
from master_thesis_experiments.active_learning.base import BaseStrategy
from scipy.stats import entropy
from sklearn.metrics import euclidean_distances
from sklearn import preprocessing
from scipy.stats import multivariate_normal
from pathlib import Path
import csv
import json

logger = get_logger(__name__)

In [10]:
N_EXPERIMENTS = 5

N_SAMPLES = 150

N_FEATURES = 2
N_CLASSES = 10

N_CONCEPTS = 5
CONCEPT_SIZE = 500
LAST_CONCEPT_SIZE = 15

TEST_SET_SIZE = 300

In [11]:
def euclidean_to_similarity(distance_matrix, sigma):
    similarity_matrix = np.exp(-distance_matrix / (sigma**2))
    return similarity_matrix

def generate_mean_values(min_distance, n_features, n_classes):
    mean_values = []
    while len(mean_values) < n_classes:
        means = np.random.uniform(0, 15, n_features)
        too_close = False
        for existing_mean in mean_values:
            distance = euclidean_distances([means], [existing_mean])[0][0]
            if distance < min_distance:
                too_close = True
                break
        if not too_close:
            mean_values.append(means)
    return mean_values

def check_means_distances(mean_values, min_distance):
    for concept in mean_values.keys():
        for i, mean in enumerate(mean_values[concept].values()):
            for j, other_mean in enumerate(mean_values[concept].values()):
                if i != j:
                    distance = euclidean_distances([mean], [other_mean])[0][0]
                    if distance < min_distance:
                        return False
    return True

In [12]:

class WeightingHandler:
    def __init__(
        self, concept_list, n_samples, scaling_factor=1, similarity_measure="euclidean"
    ):
        self.past_concepts = concept_list[:-1]
        self.current_concept = concept_list[-1]
        self.n_samples = n_samples
        self.scaling_factor = scaling_factor
        self.similarity_measure = similarity_measure
        self.selected_samples_weights = pd.DataFrame(columns=["weights"])
        self.past_dataset = None
        self.n_past_samples = None
        self.weights = None
        self.model = None
        self.classes = None
        self.similarities = None

    def initialize(self):
        dataset = pd.DataFrame()
        for concept in self.past_concepts:
            data = concept.get_dataset()
            dataset = pd.concat([dataset, data], axis=0, ignore_index=True)

        self.past_dataset = DataProvider("past_dataset", dataset)
        output_column = dataset.columns[-1]
        self.classes = np.unique(dataset[output_column])
        self.n_past_samples = self.past_dataset.n_samples

        self.weights = pd.DataFrame({"weights": np.ones(self.n_past_samples)})

        self.model = LogisticRegression(multi_class="multinomial", solver="sag")

    def compute_pre_weights(self):
        output_column = self.past_dataset.get_dataset().columns[-1]
        X_past, _ = self.past_dataset.get_split_dataset()
        X_current, _ = self.current_concept.get_split_dataset()

        X_past = pd.DataFrame(X_past)
        X_current = pd.DataFrame(X_current)

        for index, class_ in enumerate(self.classes):
            X_filtered_past = X_past.loc[
                self.past_dataset.get_dataset()[output_column] == class_
            ]
            X_filtered_past_indexes = X_filtered_past.index.values.tolist()

            X_filtered_current = X_current.loc[
                self.current_concept.get_dataset()[output_column] == class_
            ]

            # euclidian_matrix = pairwise.euclidean_distances(
            #     X_filtered_past, X_filtered_current
            # )

            rbf_matrix = pairwise.rbf_kernel(X_filtered_past, X_filtered_current, gamma=1)

            # convert euclidean distances to similarity
            # similarity_matrix = 1 / (1 + euclidian_matrix)

            similarity_matrix = 1 - rbf_matrix

            similarity_vector = np.sum(similarity_matrix, axis=1)

            # weight update
            self.weights["weights"].loc[X_filtered_past_indexes] = (
                self.scaling_factor
                * self.weights["weights"].loc[X_filtered_past_indexes]
                * similarity_vector
            )

        # normalize weights
        total = self.weights["weights"].sum()
        self.weights["weights"] = self.weights["weights"] * self.weights.size / total

        return deepcopy(self.weights)

    def update_weights(self, selected_sample, selected_sample_index):
        logger.info("Updating weights...")

        sample_label = selected_sample[-1]
        sample_features = selected_sample[:-1]

        # updating datasets and weights
        self.past_dataset.delete_sample(selected_sample_index)
        self.current_concept.add_samples([selected_sample])

        self.selected_samples_weights.loc[selected_sample_index] = self.weights.loc[
            selected_sample_index
        ]
        self.weights.drop(selected_sample_index, axis=0, inplace=True)

        output_column = self.past_dataset.get_dataset().columns[-1]
        X_past, _ = self.past_dataset.get_split_dataset()
        X_past = pd.DataFrame(X_past)
        X_past.set_index(
            pd.Index(self.past_dataset.get_dataset().index.tolist()), inplace=True
        )

        X_filtered_past = X_past.loc[
            self.past_dataset.get_dataset()[output_column] == sample_label
        ]
        X_filtered_past_indexes = X_filtered_past.index.values.tolist()

        # euclidean_vector = pairwise.euclidean_distances(
        #     X_filtered_past, sample_features.reshape(1, -1)
        # )
        rbf_vector = pairwise.rbf_kernel(X_filtered_past, sample_features.reshape(1, -1), gamma=1)

        # similarity_vector = 1 / (1 + euclidean_vector)
        similarity_vector = 1 - rbf_vector

        similarity_vector = np.sum(similarity_vector, axis=1)

        # weight update
        self.weights["weights"].loc[X_filtered_past_indexes] = (
            self.scaling_factor
            * self.weights["weights"].loc[X_filtered_past_indexes]
            * similarity_vector
        )

        # normalize weights
        total = self.weights["weights"].sum()
        self.weights["weights"] = self.weights["weights"] * self.weights.size / total

        return deepcopy(self.weights)

In [13]:
class WeightedSamplingStrategy(BaseStrategy):
    def __init__(
        self,
        concept_mapping,
        concept_list,
        n_samples,
        prior_probs,
        estimator_type: DensityEstimator(),
    ):
        super().__init__(concept_mapping, concept_list, n_samples, prior_probs, estimator_type)
        self.name = "WeightedSampling"
        self.model = LogisticRegression(multi_class="multinomial", solver="lbfgs")
        self.weighting_handler = WeightingHandler(
            deepcopy(concept_list),
            n_samples,
            scaling_factor=1,
            similarity_measure="euclidean",
        )
        self.weighting_handler.initialize()
        self.weights = None

    def initialize(self):
        if self.past_dataset is None:
            super().initialize()

        self.compute_pre_weights()

    def compute_pre_weights(self):
        self.weights = self.weighting_handler.compute_pre_weights()

    def train_model(self):
        logger.debug("Training model...")

        X_past, y_past = self.past_dataset.get_split_dataset()
        X_current, y_current = self.current_concept.get_split_dataset()
        X = np.concatenate((X_past, X_current), axis=0)
        y = np.concatenate((y_past, y_current), axis=0)

        X_current_shape = X_current.shape[0]
        sample_weight = pd.concat(
            [self.weights, pd.DataFrame({"weights": [1] * X_current_shape})],
            axis=0,
            ignore_index=True,
        )

        self.model.fit(X, y, sample_weight=sample_weight.to_numpy().ravel())

    def select_samples(self):
        self.iteration += 1
        logger.debug(f"Selecting sample #{self.iteration}...")

        X, _ = self.past_dataset.get_split_dataset()

        probabilities = self.model.predict_proba(X)
        entropies = pd.DataFrame(entropy(probabilities.T), columns=['entropy'])

        indexes = self.past_dataset.get_dataset().index.tolist()

        score = entropies

        # combine entropy with distance from already selected samples
        if self.all_selected_samples:
            alpha = 0.5

            all_selected_samples = pd.DataFrame(self.all_selected_samples)
            all_selected_samples = all_selected_samples[all_selected_samples.columns[:-1]]

            # rbf kernel: close points have score close to 1,
            # so I subtract 1 to have close points with score close to 0
            similarity_matrix = 1 - pd.DataFrame(pairwise.rbf_kernel(X=X, Y=all_selected_samples, gamma=0.7))
            similarity_vector = similarity_matrix.prod(axis=1)

            score = alpha * entropies['entropy'] + (1 - alpha) * similarity_vector
            score = score.to_frame()

        score.set_index(pd.Index(indexes), inplace=True)
        selected_sample_index = score.idxmax(axis=0)
        selected_sample_index = selected_sample_index.tolist()[0]

        sample = (
            self.past_dataset.get_data_from_ids(selected_sample_index)
            .to_numpy()
            .ravel()
        )
        self.selected_sample = sample

        # spostarlo alla fine per salvare i sample
        # relabelati e vedere quanti sono poi in evaluation
        self.all_selected_samples.append(self.selected_sample.tolist())
        self.relabel_samples()
        self.past_dataset.delete_sample(selected_sample_index)
        self.current_concept.add_samples([self.selected_sample.T])

        return selected_sample_index

    def run(self):
        self.train_model()
        selected_sample_index = self.select_samples()

        self.weights = self.weighting_handler.update_weights(
            self.selected_sample, selected_sample_index
        )

        return deepcopy(self.current_concept.get_split_dataset())

In [14]:
PERTURBATION_TYPE = ["mean", "combination"]
PERTURBATION_INTENSITY = ["small", "large"]

class SynthClassificationSimulationV2(Simulation):
    def __init__(
            self,
            name,
            generator,
            strategies,
            results_dir,
            n_samples,
            estimator_type,
            test_set_size,
    ):
        super().__init__(
            name, generator, strategies, results_dir, n_samples, estimator_type
        )

        self.current_concept_extended = None
        self.test_set_size = test_set_size
        self.mean_values = []
        self.cov_values = []

        self.AL_accuracy = {}
        self.clairvoyant_accuracy = {}
        self.pre_AL_accuracy = None
        self.weights = None

    def generate_dataset(self, n_concepts, concept_size, last_concept_size):
        """
        This method generates the dataset
        """
        logger.debug("Generating the dataset...")

        scaler = preprocessing.StandardScaler()

        n = self.generator.size
        triangular_size = int(n * (n + 1) / 2)

        theta_values = [np.pi, np.pi / 2, np.pi / 4, 3 * np.pi / 4]

        self.mean_values = generate_mean_values(
            min_distance=4,
            n_features=self.generator.size,
            n_classes=self.generator.n_classes,
        )

        self.cov_values = [
            np.random.uniform(5, 9, triangular_size)
            for _ in range(self.generator.n_classes)
        ]

        self.generator.mean_values = self.mean_values
        self.generator.cov_values = self.cov_values

        alpha = 2
        # the higher the alpha, the more balanced the prior probabilities
        self.generator.prior_probs = np.random.dirichlet(
            alpha * np.ones(self.generator.n_classes)
        )

        # generate a set of perturbations for each class, where the set size is
        # equal to the number of concepts. Some classes will have a small perturbation, while
        # others will have a large perturbation.
        # A perturbation can consist of adding noise to the
        # means or to the cov matrices of the classes or to rotate the cov matrices, or
        # a combination of these.

        perturbations = []

        valid_means = False

        mean_values = None
        covariance_matrices = None

        while valid_means is not True:

            mean_values = {}
            covariance_matrices = {}

            for class_index in range(self.generator.n_classes):
                perturbations.append([])

                class_perturbation_type = np.random.choice(PERTURBATION_TYPE, size=1)[0]
                class_perturbation_intensity = np.random.choice(
                    PERTURBATION_INTENSITY, size=1
                )[0]

                for _ in range(4):
                    perturbation = {}

                    if (
                            class_perturbation_type == "mean"
                            and class_perturbation_intensity == "small"
                    ):
                        perturbation["type"] = "mean"
                        perturbation["mean"] = np.random.normal(
                            scale=1, size=self.generator.size
                        )

                        cov_noise = np.random.uniform(-1, 1, triangular_size)

                        perturbation['cov'] = cov_noise

                    elif (
                            class_perturbation_type == "mean"
                            and class_perturbation_intensity == "large"
                    ):
                        perturbation["type"] = "mean"
                        perturbation["mean"] = np.random.normal(
                            scale=5, size=self.generator.size
                        )

                        cov_noise = np.random.uniform(-1, 1, triangular_size)
                        perturbation["cov"] = cov_noise

                    elif (
                            class_perturbation_type == "combination"
                            and class_perturbation_intensity == "small"
                    ):
                        perturbation["type"] = "combination"
                        perturbation["mean"] = np.random.normal(
                            scale=1, size=self.generator.size
                        )

                        cov_noise = np.random.uniform(-2, 2, triangular_size)

                        perturbation["cov"] = cov_noise
                        perturbation["theta"] = np.random.choice(theta_values, size=1)

                    else:
                        perturbation["type"] = "combination"
                        perturbation["mean"] = np.random.normal(
                            scale=5, size=self.generator.size
                        )

                        cov_noise = np.random.uniform(-5, 5, triangular_size)

                        perturbation["cov"] = cov_noise
                        perturbation["theta"] = np.random.choice(theta_values, size=1)

                    perturbations[class_index].append(perturbation)

            for i in range(n_concepts):

                mean_values[i] = {}
                covariance_matrices[i] = {}

                self.concept_mapping["concept_" + str(i)] = {}

                for class_ in range(self.generator.n_classes):

                    concept_perturbation = np.random.choice(perturbations[class_])

                    if concept_perturbation["type"] == "mean":

                        self.generator.mean_values[class_] = (
                                self.mean_values[class_] + concept_perturbation["mean"]
                        )

                        self.generator.cov_values[class_] = (
                                self.cov_values[class_] + concept_perturbation["cov"]
                        )

                        mean_values[i][class_] = (
                                self.mean_values[class_] + concept_perturbation["mean"]
                        )
                        covariance_matrices[i][class_] = self.generator.covariance_matrices[class_]

                    else:

                        self.generator.mean_values[class_] = (
                                self.mean_values[class_] + concept_perturbation["mean"]
                        )
                        self.generator.cov_values[class_] = (
                                self.cov_values[class_] + concept_perturbation["cov"]
                        )

                        dims = np.random.choice(
                            list(range(self.generator.size)), size=2, replace=False
                        )
                        self.generator.rotate(
                            dims[0], dims[1], concept_perturbation["theta"], class_
                        )

                        mean_values[i][class_] = (
                                self.mean_values[class_] + concept_perturbation["mean"]
                        )
                        covariance_matrices[i][class_] = self.generator.covariance_matrices[class_]

            valid_means = check_means_distances(mean_values, min_distance=3.5)

        for i in range(n_concepts):

            for class_ in range(self.generator.n_classes):

                self.generator.mean_values[class_] = mean_values[i][class_]
                self.generator.covariance_matrices[class_] = covariance_matrices[i][class_]

                self.concept_mapping["concept_" + str(i)][
                    "class_" + str(class_)
                    ] = multivariate_normal(
                    self.generator.mean_values[class_],
                    self.generator.covariance_matrices[class_],
                )

            self.prior_probs_per_concept.append(self.generator.prior_probs.tolist())

            if i != n_concepts - 1:
                dataset = self.generator.generate(concept_size)
                generated_classes = dataset["y_0"].unique()

                while len(generated_classes) != self.generator.n_classes:
                    logger.debug(
                        "Regenerating dataset, some class did not generate any sample"
                    )
                    dataset = self.generator.generate(concept_size)
                    generated_classes = dataset["y_0"].unique()
            else:
                dataset = self.generator.generate(last_concept_size)
                generated_classes = dataset["y_0"].unique()

                while len(generated_classes) != self.generator.n_classes:
                    logger.debug(
                        "Regenerating dataset, some class did not generate any sample"
                    )
                    dataset = self.generator.generate(last_concept_size)
                    generated_classes = dataset["y_0"].unique()

                if self.test_set is None:
                    self.test_set = self.generator.generate(self.test_set_size)
                    self.test_set = DataProvider("test_set", self.test_set)

                    self.current_concept_extended = self.generator.generate(N_SAMPLES)
                    self.current_concept_extended = DataProvider(
                        "current_concept_extended", self.current_concept_extended
                    )

            scaler.fit_transform(dataset)
            self.concepts.append(DataProvider("concept_" + str(i), dataset))

        self.metadata = {
            "dataset_name": self.generator.name,
            "past_dataset_size": (n_concepts - 1) * concept_size,
            "task": "classification",
            "type": "synth",
            "n_concepts": n_concepts,
            "concept_size": concept_size,
            "last_concept_size": last_concept_size,
            "prior_probs_per_concept": self.prior_probs_per_concept,
            "n_samples": self.n_samples,
            "means": [means.tolist() for means in self.generator.mean_values],
            "covs": [covs.tolist() for covs in self.generator.covariance_matrices],
            "n_classes": N_CLASSES,
        }

    def run(self):
        """
        iw_handler = IWHandler(
            concept_mapping=self.concept_mapping,
            concept_list=self.concepts,
            estimator_type=self.estimator_type,
            prior_class_probabilities=self.prior_probs_per_concept,
        )

        # compute true weights
        self.true_weights = iw_handler.run_true_weights().tolist()

        # compute pre-AL weights
        self.pre_AL_weights = iw_handler.run_weights().tolist()

        iw_handler.soft_reset()
        """

        classifier = LogisticRegression(
            multi_class="multinomial", solver="sag", max_iter=1000
        )

        clairvoyant_classifier = LogisticRegression(
            multi_class="multinomial", solver="sag", max_iter=1000
        )

        current_concept = deepcopy(self.concepts[-1].generated_dataset)

        X, y = self.concepts[-1].get_split_dataset()

        classifier.fit(X=X, y=y)
        X_test, y_test = self.test_set.get_split_dataset()
        self.pre_AL_accuracy = classifier.score(X_test, y_test)

        for strategy in self.strategies:
            strategy_instance: BaseStrategy = strategy(
                concept_mapping=deepcopy(self.concept_mapping),
                concept_list=deepcopy(self.concepts),
                n_samples=self.n_samples,
                estimator_type=self.estimator_type,
                prior_probs=deepcopy(self.prior_probs_per_concept[0]),
            )
            self.strategy_instances.append(strategy_instance)

            strategy_instance.initialize()

            n_samples = self.n_samples

            while n_samples > 0:
                current_concept = pd.concat(
                    (
                        current_concept,
                        self.current_concept_extended.generated_dataset.iloc[
                            [n_samples - 1]
                        ],
                    ),
                    ignore_index=True,
                )

                X_clrv, y_clrv = (
                    current_concept[current_concept.columns[:-1]],
                    current_concept[current_concept.columns[-1]],
                )

                clairvoyant_classifier.fit(X=X_clrv, y=y_clrv)

                X_new, y_new = strategy_instance.run()
                classifier.fit(X=X_new, y=y_new)

                n_selected_samples = self.n_samples - n_samples + 1
                self.AL_accuracy[
                    (strategy_instance.name, n_selected_samples)
                ] = classifier.score(X_test, y_test)

                self.clairvoyant_accuracy[
                    n_selected_samples
                ] = clairvoyant_classifier.score(X_test, y_test)

                n_samples -= 1

            self.selected_samples_per_strategy[
                strategy_instance.name
            ] = strategy_instance.all_selected_samples

            if isinstance(strategy_instance, WeightedSamplingStrategy):
                self.weights = strategy_instance.weights
                self.weights = pd.concat(
                    (
                        self.weights,
                        strategy_instance.weighting_handler.selected_samples_weights,
                    ),
                    ignore_index=False,
                )

    def store_results(self, experiment_index):
        concepts_path = Path(self.simulation_results_dir + "/" + str(experiment_index))
        concepts_path.mkdir(parents=True, exist_ok=True)

        # Save concepts
        for concept in self.concepts:
            concept_path = concepts_path / str(concept.name + ".csv")
            concept.generated_dataset.to_csv(concept_path, index=False)

        # Save test set
        test_set_path = Path(
            self.simulation_results_dir + "/" + str(experiment_index) + "/test_set.csv"
        )
        self.test_set.generated_dataset.to_csv(test_set_path, index=False)

        # Save pre-AL accuracy
        pre_AL_accuracy_path = Path(
            self.simulation_results_dir
            + "/"
            + str(experiment_index)
            + "/pre_AL_accuracy.csv"
        )

        with open(pre_AL_accuracy_path, "w") as f:
            writer = csv.writer(f)
            writer.writerow([self.pre_AL_accuracy])

        # Save AL accuracy
        for key, item in self.AL_accuracy.items():
            AL_accuracy_path = Path(
                self.simulation_results_dir
                + "/"
                + str(experiment_index)
                + "/"
                + str(key[0])
                + "/"
                + str(key[1])
                + "_samples.csv"
            )
            AL_accuracy_path.parent.mkdir(parents=True, exist_ok=True)

            with open(AL_accuracy_path, "w") as f:
                writer = csv.writer(f)
                writer.writerow([item])

        columns = self.concepts[0].generated_dataset.columns
        for strategy_name, samples in self.selected_samples_per_strategy.items():
            selected_samples_path = Path(
                self.simulation_results_dir
                + "/"
                + str(experiment_index)
                + "/"
                + str(strategy_name)
                + "/"
                + "selected_samples.csv"
            )
            pd.DataFrame(samples, columns=columns).to_csv(
                selected_samples_path, index=False
            )

        # Save clairvoyant accuracy
        for key, item in self.clairvoyant_accuracy.items():
            clairvoyant_accuracy_path = Path(
                self.simulation_results_dir
                + "/"
                + str(experiment_index)
                + "/"
                + "clairvoyant"
                + "/"
                + str(key)
                + "_samples.csv"
            )
            clairvoyant_accuracy_path.parent.mkdir(parents=True, exist_ok=True)

            with open(clairvoyant_accuracy_path, "w") as f:
                writer = csv.writer(f)
                writer.writerow([item])

        # save weights
        if self.weights is not None:
            weights_path = Path(
                self.simulation_results_dir
                + "/"
                + str(experiment_index)
                + "/weights.csv"
            )
            self.weights.to_csv(weights_path, index=False)

        # save generation metadata
        metadata_file = (
                self.simulation_results_dir + "/" + str(experiment_index) + "/metadata.json"
        )
        with open(metadata_file, "w") as metadata_file:
            json.dump(self.metadata, metadata_file)

    def soft_reset(self):
        self.generator.reset()

        self.metadata = None
        self.concept_mapping = {}
        self.strategy_instances = []
        self.concepts = []
        self.prior_probs_per_concept = []
        self.selected_samples_per_strategy = {}

        self.test_set = None

        self.AL_accuracy = {}
        self.pre_AL_accuracy = 0.0
        self.weights = None

In [15]:
simulation = SynthClassificationSimulationV2(
        name="synth_classification_fixed_dataset_and_samples_v2",
        generator=SynthClassificationGenerator(
            n_features=N_FEATURES, n_outputs=1, n_classes=N_CLASSES
        ),
        strategies=[
            WeightedSamplingStrategy,
            RandomSamplingStrategyV2,
        ],
        results_dir=get_root_level_dir("results"),
        n_samples=N_SAMPLES,
        estimator_type=MultivariateNormalEstimator,
        test_set_size=TEST_SET_SIZE,
    )

In [16]:
for experiment in tqdm(range(N_EXPERIMENTS)):
        simulation.generate_dataset(
            n_concepts=N_CONCEPTS,
            concept_size=CONCEPT_SIZE,
            last_concept_size=LAST_CONCEPT_SIZE,
        )

        # simulation.store_concepts(experiment)

        simulation.run()

        simulation.store_results(experiment)

        simulation.soft_reset()

  0%|          | 0/5 [00:00<?, ?it/s][DEBUG] [__main__] [2023-06-26 15:56:53] Generating the dataset...
[DEBUG] [__main__] [2023-06-26 15:56:54] Regenerating dataset, some class did not generate any sample
[DEBUG] [__main__] [2023-06-26 15:56:54] Regenerating dataset, some class did not generate any sample
[DEBUG] [__main__] [2023-06-26 15:56:54] Regenerating dataset, some class did not generate any sample
[DEBUG] [__main__] [2023-06-26 15:56:54] Regenerating dataset, some class did not generate any sample
[DEBUG] [__main__] [2023-06-26 15:56:54] Regenerating dataset, some class did not generate any sample
[DEBUG] [__main__] [2023-06-26 15:56:54] Regenerating dataset, some class did not generate any sample
[DEBUG] [__main__] [2023-06-26 15:56:54] Regenerating dataset, some class did not generate any sample
[DEBUG] [__main__] [2023-06-26 15:56:54] Regenerating dataset, some class did not generate any sample
[DEBUG] [__main__] [2023-06-26 15:56:54] Regenerating dataset, some class did no

KeyboardInterrupt: 