Create several datasets with the builder and visualize

In [5]:
from datagen.covshift.selector import  FeatureSelector
from datagen.covshift.builder import  CovShiftBuilder
from datagen.visualize import visualize_shift2d_px, visualize_shift3d_px
from evaluate.evaluate import analyze_data

# configure dataset generation
init_classification = dict(n_samples=5000, n_features=2, n_informative=2, n_repeated=0, n_redundant=0)
selector = FeatureSelector(n_global=1000, n_source=1000, n_target=1000, source_scale=1, target_scale=1, bias_dist=2)
builder = CovShiftBuilder(init_classification, selector)

runs = 2
for _ in range(runs):
    data = builder.generate()
    visualize_shift2d_px(*data)
    metrics = analyze_data(data)
    for key in metrics:
        print(f"{key}: {metrics[key]}")

num-global: 2000
uniqueness-global: 1.0
class-marginal-global: 0.4975
num-source: 1000
uniqueness-source: 0.532
class-marginal-source: 0.514
num-target: 1000
uniqueness-target: 0.711
class-marginal-target: 0.481


num-global: 2000
uniqueness-global: 1.0
class-marginal-global: 0.513
num-source: 1000
uniqueness-source: 0.552
class-marginal-source: 0.509
num-target: 1000
uniqueness-target: 0.701
class-marginal-target: 0.517


In [4]:
from datagen.conceptshift.selector import DomainSelector
from datagen.conceptshift.shifter import Shifter
from datagen.conceptshift.builder import ConceptShiftDataBuilder
from datagen.visualize import visualize_shift2d_px, visualize_shift3d_px
from evaluate.evaluate import analyze_data

# configure dataset generation
init_classification = dict(n_samples=5000, n_features=2, n_informative=2, n_repeated=0, n_redundant=0)
shifter = Shifter(n_domains=2, rot=.25, trans=1, scale=2)
selector = DomainSelector(n_global=1000, n_source=1000, n_target=1000, n_domains_source=1, n_domains_target=1)

builder = ConceptShiftDataBuilder(init_classification, shifter, selector)

runs = 2
for _ in range(runs):
    data = builder.generate()
    visualize_shift2d_px(*data)
    metrics = analyze_data(data)
    for key in metrics:
        print(f"{key}: {metrics[key]}")

num-global: 1000
uniqueness-global: 1.0
class-marginal-global: 0.517
num-source: 1000
uniqueness-source: 1.0
class-marginal-source: 0.485
num-target: 1000
uniqueness-target: 1.0
class-marginal-target: 0.501


num-global: 1000
uniqueness-global: 1.0
class-marginal-global: 0.513
num-source: 1000
uniqueness-source: 1.0
class-marginal-source: 0.499
num-target: 1000
uniqueness-target: 1.0
class-marginal-target: 0.508
