In [1]:
config = {
    "experiment_options": {
        "seed": 33,
        "num_feedback": 0.001,
        "noise_level": [0.0, 0.2, 0.4],
        "persistent_noise_level": 0.0,
        "force_global_update": True,
        "save_matrix_data": False,
        "infer_sims": False,
        "infer_sims2": False
    },
    "sim_init_options": {
        "K_init": 10,
        "sim_init": 0.1,
        "sim_init_type": "custom"
    },
    "query_strategy_options": {
        "acq_fn": ["unif", "freq", "uncert", "maxmin", "maxexp", "QECC", "COBRAS", "nCOBRAS"],
        "local_regions": ["none", "pairs", "triangles"],
        "eps": [0.3],
        "beta": 1,
        "tau": [7],
        "alpha": 1
    },
    "dataset_options": {
        "dataset": ["breast_cancer", "ecoli", "forest_type_mapping", "mushrooms", "user_knowledge", "cardiotocography", "yeast", "20newsgroups", "cifar10", "mnist"],
    }
}

In [2]:
from rac.experiment_data import ExperimentReader
metrics = ["rand", "ami"]
ac = ExperimentReader(metrics=metrics)
data = ac.read_all_data(folder="../experiment_results/real_world_experiments")
ac.generate_AL_curves(
    data,
    save_location="../plots",
    categorize=["dataset", "noise_level", "eps"],
    compare=["acq_fn", "local_regions"], 
    vary=["x"],
    auc=True,
    summary_method="auc_max_ind",
    indices=[], 
    threshold=1,
    err_style="band",
    marker="o",
    markersize=6,
    capsize=6,
    linestyle="solid",
    **config
)

<Figure size 640x480 with 0 Axes>

In [54]:
#datasets = ["breast_cancer", "ecoli", "forest_type_mapping", "mushrooms", "user_knowledge", "cardiotocography", "yeast", "20newsgroups", "cifar10", "mnist"]
datasets = ["yeast"]
from genericAL.ac_experiment_data import ACExperimentReader
exp_reader = ACExperimentReader()
K_inits_map = {"20newsgroups": 20, "cifar10": 20, "mnist": 4, "breast_cancer": 2, "cardiotocography": 10, "ecoli": 10, "forest_type_mapping": 10, "mushrooms": 4, "user_knowledge": 2, "yeast": 10}
for dataset in datasets:
    K_init = K_inits_map[dataset]
    config = {
        "experiment_options": {
            "seed": 33,
            "num_feedback": 0.001,
            "noise_level": [0.0, 0.2, 0.4],
            "persistent_noise_level": 0.0,
            "force_global_update": True,
            "save_matrix_data": False,
            "infer_sims": False,
            "infer_sims2": False
        },
        "sim_init_options": {
            "K_init": K_init,
            "sim_init": 0.1,
            "sim_init_type": "custom"
        },
        "query_strategy_options": {
            "acq_fn": ["unif", "freq", "uncert", "maxmin", "maxexp", "QECC", "COBRAS", "nCOBRAS"],
            "local_regions": ["none", "pairs", "triangles"],
            "eps": [0.3],
            "beta": 1,
            "tau": [7],
            "alpha": 1
        },
        "dataset_options": {
            "dataset": dataset + "_small"
        }
    }
    from rac.experiment_data import ExperimentReader
    metrics = ["rand", "ami"]
    ac = ExperimentReader(metrics=metrics)
    data = ac.read_all_data(folder="../experiment_results/real_world_experiments_small/" + dataset + "_small")
    ac.generate_AL_curves(
        data,
        save_location="../plots_small/" + dataset + "/",
        categorize=["noise_level", "eps"],
        compare=["acq_fn", "local_regions"], 
        vary=["x"],
        auc=True,
        summary_method="auc_max_ind",
        indices=[], 
        threshold=1,
        err_style="band",
        marker="o",
        markersize=6,
        capsize=6,
        linestyle="solid",
        **config
    )

  plt.legend(loc='bottom right')
  plt.legend(loc='bottom right')
  plt.legend(loc='bottom right')
  plt.legend(loc='bottom right')
  plt.legend(loc='bottom right')
  plt.legend(loc='bottom right')


<Figure size 1200x1200 with 0 Axes>

In [4]:
#datasets = ["breast_cancer", "ecoli", "forest_type_mapping", "mushrooms", "user_knowledge", "cardiotocography", "yeast", "20newsgroups", "cifar10", "mnist"]
datasets = ["mnist"]
from genericAL.ac_experiment_data import ACExperimentReader
exp_reader = ACExperimentReader()
K_inits_map = {"20newsgroups": 20, "cifar10": 20, "mnist": 4, "breast_cancer": 2, "cardiotocography": 10, "ecoli": 10, "forest_type_mapping": 10, "mushrooms": 4, "user_knowledge": 2, "yeast": 10}
for dataset in datasets:
    K_init = K_inits_map[dataset]
    config = {
        "experiment_options": {
            "seed": 33,
            "num_feedback": 0.001,
            "noise_level": [0.2],
            "persistent_noise_level": 0.0,
            "force_global_update": True,
            "save_matrix_data": False,
            "infer_sims": False,
            "infer_sims2": False
        },
        "sim_init_options": {
            "K_init": K_init,
            "sim_init": 0.1,
            "sim_init_type": "custom"
        },
        "query_strategy_options": {
            "acq_fn": ["unif", "freq", "uncert", "maxmin", "maxexp", "QECC", "COBRAS", "nCOBRAS"],
            "local_regions": ["none", "pairs", "triangles"],
            "eps": [0.3],
            "beta": 1,
            "tau": [7],
            "alpha": 1
        },
        "dataset_options": {
            "dataset": dataset
        }
    }
    from rac.experiment_data import ExperimentReader
    metrics = ["rand", "ami"]
    ac = ExperimentReader(metrics=metrics)
    data = ac.read_all_data(folder="../experiment_results/real_world_experiments/" + dataset)
    ac.generate_AL_curves(
        data,
        save_location="../plots/" + dataset + "/",
        categorize=["noise_level", "eps"],
        compare=["acq_fn", "local_regions"], 
        vary=["x"],
        auc=True,
        summary_method="auc_max_ind",
        indices=[], 
        threshold=1,
        err_style="band",
        marker="o",
        markersize=6,
        capsize=6,
        linestyle="solid",
        **config
    )

  ax.set_xticklabels(labels)
  ax.set_xticklabels(labels)


<Figure size 1200x1200 with 0 Axes>

In [8]:
config = {
    "general_options": {
        "experiment_name": "real_world_experiments_small",
        "num_repeats": 5,
        "n_workers": 15,
        "local": False,
        "verbose": False
    },
    "experiment_options": {
        "seed": 33,
        "num_feedback": 0.001,
        "persistent_noise_level": 0.0,
        "force_global_update": True,
        "save_matrix_data": False,
        "infer_sims": False,
        "infer_sims2": False,
        "noise_level": [
            0.0,
            0.2,
            0.4
        ]
    },
    "sim_init_options": {
        "K_init": 20,
        "sim_init": 0.1,
        "sim_init_type": "custom"
    },
    "query_strategy_options": {
        "acq_fn": "unif",
        "local_regions": "pairs",
        "beta": 1,
        "tau": 7,
        "alpha": 1,
        "eps": [
            0.3
        ]
    },
    "dataset_options": {
        "dataset": "20newsgroups_small"
    }
}

In [9]:
from rac.experiment_data import ExperimentReader
metrics = ["rand", "ami"]
ac = ExperimentReader(metrics=metrics)
data = ac.read_all_data(folder="../experiment_results/real_world_experiments_small")
ac.generate_AL_curves(
    data,
    save_location="../test_plot",
    categorize=["dataset", "noise_level", "eps"],
    compare=["acq_fn", "local_regions"], 
    vary=["x"],
    auc=True,
    summary_method="auc_max_ind",
    indices=[], 
    threshold=1,
    err_style="band",
    marker="o",
    markersize=6,
    capsize=6,
    linestyle="solid",
    **config
)

<Figure size 640x480 with 0 Axes>

In [47]:
config = {"general_options": {
        "experiment_name": "synthetic_experiment",
        "num_repeats": 5,
        "n_workers": 15,
        "local": False,
        "verbose": False
    },
    "experiment_options": {
        "seed": 33,
        "num_feedback": 0.0005,
        "noise_level": [0.0, 0.2, 0.4],
        "persistent_noise_level": 0.0,
        "force_global_update": True,
        "save_matrix_data": False,
        "infer_sims": False,
        "infer_sims2": False
    },
    "sim_init_options": {
        "K_init": 10,
        "sim_init": 0.1,
        "sim_init_type": "custom"
    },
    "query_strategy_options": {
        "acq_fn": ["unif", "freq", "uncert", "maxmin", "maxexp", "QECC", "COBRAS", "nCOBRAS"],
        "local_regions": ["none", "pairs", "triangles"],
        "eps": [0.3],
        "beta": 1,
        "tau": [7],
        "alpha": 1
    },
    "dataset_options": {
        "dataset": "synthetic",
        "n_samples": [250, 500],
        "n_clusters": 10,
        "class_balance": None
    }
}

In [48]:
from rac.experiment_data import ExperimentReader
metrics = ["rand", "ami"]
ac = ExperimentReader(metrics=metrics)
data = ac.read_all_data(folder="../experiment_results/synthetic_experiment")
ac.generate_AL_curves(
    data,
    save_location="../plots_synthetic",
    categorize=["n_samples", "dataset", "noise_level", "eps"],
    compare=["acq_fn", "local_regions"], 
    vary=["x"],
    auc=True,
    summary_method="auc_max_ind",
    indices=[], 
    threshold=1,
    err_style="band",
    marker="o",
    markersize=6,
    capsize=6,
    linestyle="solid",
    **config
)

  ax2.set_xticklabels(labels)
  ax2.set_xticklabels(labels)
  ax2.set_xticklabels(labels)
  ax2.set_xticklabels(labels)
  ax2.set_xticklabels(labels)
  ax2.set_xticklabels(labels)
  ax2.set_xticklabels(labels)
  ax2.set_xticklabels(labels)
  ax2.set_xticklabels(labels)
  ax2.set_xticklabels(labels)
  ax2.set_xticklabels(labels)
  ax2.set_xticklabels(labels)


<Figure size 1200x1200 with 0 Axes>

In [39]:
start_index = 1

In [40]:
from genericAL.ac_experiment_data import ACExperimentReader
exp_reader = ACExperimentReader()
config = {
    "general_options": {
        "experiment_name": "test_exp2",
        "num_repeats": 10,
        "n_workers": 60,
        "local": False,
        "verbose": False
    },
    "experiment_options": {
        "seed": 33,
        "num_feedback": 0.001,
        "noise_level": [0.0, 0.2],
        "persistent_noise_level": 0.0,
        "force_global_update": True,
        "save_matrix_data": False,
        "infer_sims": False,
        "infer_sims2": False
    },
    "sim_init_options": {
        "K_init": 10,
        "sim_init": 0.1,
        "sim_init_type": "custom"
    },
    "query_strategy_options": {
        "acq_fn": ["maxmin", "maxexp"],
        "local_regions": "triangles",
        "eps": [0.3],
        "beta": 1,
        "tau": [7],
        "alpha": 1
    },
    "dataset_options": {
        "dataset": "synthetic",
        "n_samples": 1000,
        "n_clusters": 10,
        "class_balance": None
    }
}

start_index = exp_reader.generate_experiments(
    folder="../configs/test_exp/", 
    options_to_keep=["eps", "noise_level"],
    start_index=start_index,
    **config
)

In [41]:
from genericAL.ac_experiment_data import ACExperimentReader
exp_reader = ACExperimentReader()
config = {
    "general_options": {
        "experiment_name": "test_exp2",
        "num_repeats": 10,
        "n_workers": 60,
        "local": False,
        "verbose": False
    },
    "experiment_options": {
        "seed": 33,
        "num_feedback": 0.001,
        "noise_level": [0.0, 0.2],
        "persistent_noise_level": 0.0,
        "force_global_update": True,
        "save_matrix_data": False,
        "infer_sims": False,
        "infer_sims2": False
    },
    "sim_init_options": {
        "K_init": 10,
        "sim_init": 0.1,
        "sim_init_type": "custom"
    },
    "query_strategy_options": {
        "acq_fn": ["unif", "freq", "uncert"],
        "local_regions": "pairs",
        "eps": [0.3],
        "beta": 1,
        "tau": [7],
        "alpha": 1
    },
    "dataset_options": {
        "dataset": "synthetic",
        "n_samples": 1000,
        "n_clusters": 10,
        "class_balance": None
    }
}

start_index = exp_reader.generate_experiments(
    folder="../configs/test_exp/", 
    options_to_keep=["eps", "noise_level"],
    start_index=start_index,
    **config
)

In [54]:
start_index = 1

In [55]:
datasets = ["breast_cancer", "ecoli", "forest_type_mapping", "mushrooms", "user_knowledge", "cardiotocography", "yeast", "20newsgroups", "cifar10", "mnist"]
from genericAL.ac_experiment_data import ACExperimentReader
exp_reader = ACExperimentReader()
K_inits_map = {"20newsgroups": 20, "cifar10": 20, "mnist": 4, "breast_cancer": 2, "cardiotocography": 10, "ecoli": 10, "forest_type_mapping": 10, "mushrooms": 4, "user_knowledge": 2, "yeast": 10}
for dataset in datasets:
    K_init = K_inits_map[dataset]
    config = {
        "general_options": {
            "experiment_name": "real_world_experiments",
            "num_repeats": 15,
            "n_workers": 15,
            "local": False,
            "verbose": False
        },
        "experiment_options": {
            "seed": 33,
            "num_feedback": 0.001,
            "noise_level": [0.2],
            "persistent_noise_level": 0.0,
            "force_global_update": True,
            "save_matrix_data": False,
            "infer_sims": False,
            "infer_sims2": False
        },
        "sim_init_options": {
            "K_init": K_init,
            "sim_init": 0.1,
            "sim_init_type": "custom"
        },
        "query_strategy_options": {
            "acq_fn": ["unif", "freq", "uncert"],
            "local_regions": "pairs",
            "eps": [0.3],
            "beta": 1,
            "tau": [7],
            "alpha": 1
        },
        "dataset_options": {
            "dataset": dataset,
        }
    }

    start_index = exp_reader.generate_experiments(
        folder="../configs/real_world_experiments/", 
        options_to_keep=["eps", "noise_level"],
        start_index=start_index,
        **config
    )

In [56]:
datasets = ["breast_cancer", "ecoli", "forest_type_mapping", "mushrooms", "user_knowledge", "cardiotocography", "yeast", "20newsgroups", "cifar10", "mnist"]
from genericAL.ac_experiment_data import ACExperimentReader
exp_reader = ACExperimentReader()
K_inits_map = {"20newsgroups": 20, "cifar10": 20, "mnist": 4, "breast_cancer": 2, "cardiotocography": 10, "ecoli": 10, "forest_type_mapping": 10, "mushrooms": 4, "user_knowledge": 2, "yeast": 10}
for dataset in datasets:
    K_init = K_inits_map[dataset]
    config = {
        "general_options": {
            "experiment_name": "real_world_experiments",
            "num_repeats": 15,
            "n_workers": 15,
            "local": False,
            "verbose": False
        },
        "experiment_options": {
            "seed": 33,
            "num_feedback": 0.001,
            "noise_level": [0.2],
            "persistent_noise_level": 0.0,
            "force_global_update": True,
            "save_matrix_data": False,
            "infer_sims": False,
            "infer_sims2": False
        },
        "sim_init_options": {
            "K_init": K_init,
            "sim_init": 0.1,
            "sim_init_type": "custom"
        },
        "query_strategy_options": {
            "acq_fn": ["maxmin", "maxexp"],
            "local_regions": "triangles",
            "eps": [0.3],
            "beta": 1,
            "tau": [7],
            "alpha": 1
        },
        "dataset_options": {
            "dataset": dataset,
        }
    }

    start_index = exp_reader.generate_experiments(
        folder="../configs/real_world_experiments/", 
        options_to_keep=["eps", "noise_level"],
        start_index=start_index,
        **config
    )

In [57]:
datasets = ["breast_cancer", "ecoli", "forest_type_mapping", "mushrooms", "user_knowledge", "cardiotocography", "yeast", "20newsgroups", "cifar10", "mnist"]
from genericAL.ac_experiment_data import ACExperimentReader
exp_reader = ACExperimentReader()
K_inits_map = {"20newsgroups": 20, "cifar10": 20, "mnist": 4, "breast_cancer": 2, "cardiotocography": 10, "ecoli": 10, "forest_type_mapping": 10, "mushrooms": 4, "user_knowledge": 2, "yeast": 10}
for dataset in datasets:
    K_init = K_inits_map[dataset]
    config = {
        "general_options": {
            "experiment_name": "real_world_experiments",
            "num_repeats": 15,
            "n_workers": 15,
            "local": False,
            "verbose": False
        },
        "experiment_options": {
            "seed": 33,
            "num_feedback": 0.001,
            "noise_level": [0.2],
            "persistent_noise_level": 0.0,
            "force_global_update": True,
            "save_matrix_data": False,
            "infer_sims": False,
            "infer_sims2": False
        },
        "sim_init_options": {
            "K_init": K_init,
            "sim_init": 0.1,
            "sim_init_type": "custom"
        },
        "query_strategy_options": {
            "acq_fn": ["QECC", "nCOBRAS", "COBRAS"],
            "local_regions": "none",
            "eps": [0.3],
            "beta": 1,
            "tau": [7],
            "alpha": 1
        },
        "dataset_options": {
            "dataset": dataset,
        }
    }

    start_index = exp_reader.generate_experiments(
        folder="../configs/real_world_experiments/", 
        options_to_keep=["eps", "noise_level"],    
        start_index=start_index,
        **config
    )

In [26]:
start_index = 1

In [27]:
from genericAL.ac_experiment_data import ACExperimentReader
exp_reader = ACExperimentReader()
config = {"general_options": {
        "experiment_name": "synthetic_experiment",
        "num_repeats": 5,
        "n_workers": 15,
        "local": False,
        "verbose": False
    },
    "experiment_options": {
        "seed": 33,
        "num_feedback": 0.0005,
        "noise_level": [0.0, 0.2, 0.4],
        "persistent_noise_level": 0.0,
        "force_global_update": True,
        "save_matrix_data": False,
        "infer_sims": False,
        "infer_sims2": False
    },
    "sim_init_options": {
        "K_init": 10,
        "sim_init": 0.1,
        "sim_init_type": "custom"
    },
    "query_strategy_options": {
        "acq_fn": ["unif", "freq", "uncert"],
        "local_regions": ["pairs"],
        "eps": [0.3],
        "beta": 1,
        "tau": [7],
        "alpha": 1
    },
    "dataset_options": {
        "dataset": "synthetic",
        "n_samples": [250, 500],
        "n_clusters": 10,
        "class_balance": None
    }
}

start_index = exp_reader.generate_experiments(
    folder="../configs/synthetic_experiment/", 
    options_to_keep=["noise_level"],
    start_index=start_index,
    **config
)

In [28]:
from genericAL.ac_experiment_data import ACExperimentReader
exp_reader = ACExperimentReader()
config = {"general_options": {
        "experiment_name": "synthetic_experiment",
        "num_repeats": 5,
        "n_workers": 15,
        "local": False,
        "verbose": False
    },
    "experiment_options": {
        "seed": 33,
        "num_feedback": 0.0005,
        "noise_level": [0.0, 0.2, 0.4],
        "persistent_noise_level": 0.0,
        "force_global_update": True,
        "save_matrix_data": False,
        "infer_sims": False,
        "infer_sims2": False
    },
    "sim_init_options": {
        "K_init": 10,
        "sim_init": 0.1,
        "sim_init_type": "custom"
    },
    "query_strategy_options": {
        "acq_fn": ["maxmin", "maxexp"],
        "local_regions": ["triangles"],
        "eps": [0.3],
        "beta": 1,
        "tau": [7],
        "alpha": 1
    },
    "dataset_options": {
        "dataset": "synthetic",
        "n_samples": [250, 500],
        "n_clusters": 10,
        "class_balance": None
    }
}

start_index = exp_reader.generate_experiments(
    folder="../configs/synthetic_experiment/", 
    options_to_keep=["noise_level"],
    start_index=start_index,
    **config
)

In [29]:
from genericAL.ac_experiment_data import ACExperimentReader
exp_reader = ACExperimentReader()
config = {"general_options": {
        "experiment_name": "synthetic_experiment",
        "num_repeats": 5,
        "n_workers": 15,
        "local": False,
        "verbose": False
    },
    "experiment_options": {
        "seed": 33,
        "num_feedback": 0.0005,
        "noise_level": [0.0, 0.2, 0.4],
        "persistent_noise_level": 0.0,
        "force_global_update": True,
        "save_matrix_data": False,
        "infer_sims": False,
        "infer_sims2": False
    },
    "sim_init_options": {
        "K_init": 10,
        "sim_init": 0.1,
        "sim_init_type": "custom"
    },
    "query_strategy_options": {
        "acq_fn": ["QECC", "nCOBRAS", "COBRAS"],
        "local_regions": ["none"],
        "eps": [0.3],
        "beta": 1,
        "tau": [7],
        "alpha": 1
    },
    "dataset_options": {
        "dataset": "synthetic",
        "n_samples": [250, 500],
        "n_clusters": 10,
        "class_balance": None
    }
}

start_index = exp_reader.generate_experiments(
    folder="../configs/synthetic_experiment/", 
    options_to_keep=["noise_level"],
    start_index=start_index,
    **config
)

In [21]:
start_index = 1

In [22]:
datasets = ["breast_cancer", "ecoli", "forest_type_mapping", "mushrooms", "user_knowledge", "cardiotocography", "yeast", "20newsgroups", "cifar10", "mnist"]
from genericAL.ac_experiment_data import ACExperimentReader
exp_reader = ACExperimentReader()
K_inits_map = {"20newsgroups": 20, "cifar10": 20, "mnist": 4, "breast_cancer": 2, "cardiotocography": 10, "ecoli": 10, "forest_type_mapping": 10, "mushrooms": 4, "user_knowledge": 2, "yeast": 10}
for dataset in datasets:
    K_init = K_inits_map[dataset]
    config = {
        "general_options": {
            "experiment_name": "real_world_experiments_small",
            "num_repeats": 15,
            "n_workers": 15,
            "local": False,
            "verbose": False
        },
        "experiment_options": {
            "seed": 33,
            "num_feedback": 0.001,
            "noise_level": [0.2],
            "persistent_noise_level": 0.0,
            "force_global_update": True,
            "save_matrix_data": False,
            "infer_sims": False,
            "infer_sims2": False
        },
        "sim_init_options": {
            "K_init": K_init,
            "sim_init": 0.1,
            "sim_init_type": "custom"
        },
        "query_strategy_options": {
            "acq_fn": ["unif", "freq", "uncert"],
            "local_regions": "pairs",
            "eps": [0.3],
            "beta": 1,
            "tau": [7],
            "alpha": 1
        },
        "dataset_options": {
            "dataset": dataset + "_small",
        }
    }

    start_index = exp_reader.generate_experiments(
        folder="../configs/real_world_experiments_small/", 
        options_to_keep=["eps", "noise_level"],
        start_index=start_index,
        **config
    )

In [23]:
datasets = ["breast_cancer", "ecoli", "forest_type_mapping", "mushrooms", "user_knowledge", "cardiotocography", "yeast", "20newsgroups", "cifar10", "mnist"]
from genericAL.ac_experiment_data import ACExperimentReader
exp_reader = ACExperimentReader()
K_inits_map = {"20newsgroups": 20, "cifar10": 20, "mnist": 4, "breast_cancer": 2, "cardiotocography": 10, "ecoli": 10, "forest_type_mapping": 10, "mushrooms": 4, "user_knowledge": 2, "yeast": 10}
for dataset in datasets:
    K_init = K_inits_map[dataset]
    config = {
        "general_options": {
            "experiment_name": "real_world_experiments_small",
            "num_repeats": 15,
            "n_workers": 15,
            "local": False,
            "verbose": False
        },
        "experiment_options": {
            "seed": 33,
            "num_feedback": 0.001,
            "noise_level": [0.2],
            "persistent_noise_level": 0.0,
            "force_global_update": True,
            "save_matrix_data": False,
            "infer_sims": False,
            "infer_sims2": False
        },
        "sim_init_options": {
            "K_init": K_init,
            "sim_init": 0.1,
            "sim_init_type": "custom"
        },
        "query_strategy_options": {
            "acq_fn": ["maxmin", "maxexp"],
            "local_regions": "triangles",
            "eps": [0.3],
            "beta": 1,
            "tau": [7],
            "alpha": 1
        },
        "dataset_options": {
            "dataset": dataset + "_small",
        }
    }

    start_index = exp_reader.generate_experiments(
        folder="../configs/real_world_experiments_small/", 
        options_to_keep=["eps", "noise_level"],
        start_index=start_index,
        **config
    )

In [24]:
datasets = ["breast_cancer", "ecoli", "forest_type_mapping", "mushrooms", "user_knowledge", "cardiotocography", "yeast", "20newsgroups", "cifar10", "mnist"]
from genericAL.ac_experiment_data import ACExperimentReader
exp_reader = ACExperimentReader()
K_inits_map = {"20newsgroups": 20, "cifar10": 20, "mnist": 4, "breast_cancer": 2, "cardiotocography": 10, "ecoli": 10, "forest_type_mapping": 10, "mushrooms": 4, "user_knowledge": 2, "yeast": 10}
for dataset in datasets:
    K_init = K_inits_map[dataset]
    config = {
        "general_options": {
            "experiment_name": "real_world_experiments_small",
            "num_repeats": 15,
            "n_workers": 15,
            "local": False,
            "verbose": False
        },
        "experiment_options": {
            "seed": 33,
            "num_feedback": 0.001,
            "noise_level": [0.2],
            "persistent_noise_level": 0.0,
            "force_global_update": True,
            "save_matrix_data": False,
            "infer_sims": False,
            "infer_sims2": False
        },
        "sim_init_options": {
            "K_init": K_init,
            "sim_init": 0.1,
            "sim_init_type": "custom"
        },
        "query_strategy_options": {
            "acq_fn": ["QECC", "nCOBRAS", "COBRAS"],
            "local_regions": "none",
            "eps": [0.3],
            "beta": 1,
            "tau": [7],
            "alpha": 1
        },
        "dataset_options": {
            "dataset": dataset + "_small",
        }
    }

    start_index = exp_reader.generate_experiments(
        folder="../configs/real_world_experiments_small/", 
        options_to_keep=["eps", "noise_level"],    
        start_index=start_index,
        **config
    )

In [81]:
import numpy as np
datasets = ["breast_cancer", "ecoli", "ForestTypeMapping", "mushrooms", "user_knowledge", "cardiotocography", "yeast", "20newsgroups", "cifar10", "mnist"]
folder = "datasets"
for ds in datasets:
    x = np.load("../" + folder + "/" + ds + "_data" + "/X.npy")
    y = np.load("../" + folder + "/" + ds + "_data" + "/y.npy")
    print(ds, x.shape)
    print(np.unique(y, return_counts=True))
    print("--------------------------")



breast_cancer (569, 30)
(array([0, 1], dtype=int64), array([356, 213], dtype=int64))
--------------------------
ecoli (336, 7)
(array([0, 1, 2, 3, 4, 5, 6, 7]), array([137,  76,   1,   2,  37,  26,   5,  52], dtype=int64))
--------------------------
ForestTypeMapping (523, 27)
(array([0, 1, 2, 3]), array([168,  84,  86, 185], dtype=int64))
--------------------------
mushrooms (2437, 22)
(array([0, 1]), array([1238, 1199], dtype=int64))
--------------------------
user_knowledge (403, 5)
(array([0, 1, 2, 3, 4]), array([111, 129, 116,  28,  19], dtype=int64))
--------------------------
cardiotocography (1057, 22)
(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), array([189, 294,  29,  36,  35, 150, 120,  63,  30, 111], dtype=int64))
--------------------------
yeast (1187, 8)
(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), array([375,   5,  37,  23,  36, 151, 209, 314,  14,  23], dtype=int64))
--------------------------
20newsgroups (1884, 768)
(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,

In [82]:
import numpy as np
datasets = ["breast_cancer", "ecoli", "ForestTypeMapping", "mushrooms", "user_knowledge", "cardiotocography", "yeast", "20newsgroups", "cifar10", "mnist"]
folder = "datasets_small"
for ds in datasets:
    x = np.load("../" + folder + "/" + ds + "_data" + "/X.npy")
    y = np.load("../" + folder + "/" + ds + "_data" + "/y.npy")
    print(ds, x.shape)
    print(np.unique(y, return_counts=True))
    print("--------------------------")

breast_cancer (569, 30)
(array([0, 1], dtype=int64), array([356, 213], dtype=int64))
--------------------------
ecoli (336, 7)
(array([0, 1, 2, 3, 4, 5, 6, 7]), array([137,  76,   1,   2,  37,  26,   5,  52], dtype=int64))
--------------------------
ForestTypeMapping (470, 27)
(array([0, 1, 2, 3]), array([152,  74,  78, 166], dtype=int64))
--------------------------
mushrooms (406, 22)
(array([0, 1]), array([195, 211], dtype=int64))
--------------------------
user_knowledge (403, 5)
(array([0, 1, 2, 3, 4]), array([111, 129, 116,  28,  19], dtype=int64))
--------------------------
cardiotocography (423, 22)
(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), array([ 75, 118,  10,  17,  12,  63,  49,  22,  10,  47], dtype=int64))
--------------------------
yeast (445, 8)
(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), array([140,   1,  14,   8,  13,  67,  75, 114,   4,   9], dtype=int64))
--------------------------
20newsgroups (471, 768)
(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 1

In [10]:
import numpy as np
import itertools
def num_edges(cluster_sizes):
    num_pos = 0
    num_neg = 0
    k = len(cluster_sizes)
    clusts = np.arange(k)
    for k1, k2 in itertools.combinations(clusts, 2):
        num_neg = cluster_sizes[k1] * cluster_sizes[k2]

    for k1 in clusts:
        num_pos += cluster_sizes[k1] * (cluster_sizes[k1] - 1) / 2
    return num_neg, num_pos

In [19]:
num_edges([50, 50, 50, 50, 50, 50])

(2500, 7350.0)