In [2]:
import polars as pl
import opendp.prelude as dp
dp.enable_features("contrib")

In [None]:
data_path = "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv"
metadata_path = "penguin_metadata.yaml"

In [3]:
lf = pl.scan_csv(
    data_path,
    schema_overrides={
        'species': pl.Enum(['Adelie']), 
        'island': pl.Enum(['Dream', 'Torgersen', 'Biscoe']), 
        'sex': pl.Enum(["MALE", "FEMALE"])}
).drop_nulls().collect()

In [4]:
lf.head(2)

species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
enum,enum,f64,f64,i64,i64,enum
"""Adelie""","""Torgersen""",39.1,18.7,181,3750,"""MALE"""
"""Adelie""","""Torgersen""",39.5,17.4,186,3800,"""FEMALE"""


## Context API

In [5]:
context = dp.Context.compositor(
    data=lf.lazy(),
    privacy_unit=dp.unit_of(contributions=5),
    privacy_loss=dp.loss_of(rho=0.19, delta=1e-7),

)

In [6]:
table_aim = (
    context.query(rho=0.1, delta=0.0)
    # transformations/truncation may be applied here
    .select(
        "sex", "species", "island", 'bill_length_mm', 'bill_depth_mm', 'flipper_length_mm'
    ).contingency_table(
        keys={"sex": ["MALE", "FEMALE"],
              "species": ['Adelie'],
              "island": ['Dream', 'Torgersen', 'Biscoe']
             },
        cuts={"bill_length_mm": [30,42,54,65],'bill_depth_mm':[13,18,23], 'flipper_length_mm': [150,200,250]},
        algorithm=dp.mbi.AIM(),
    )
    .release()
)

In [7]:
table_aim.synthesize(rows=1000)

sex,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm
enum,enum,enum,f64,f64,i64
"""MALE""","""Adelie""","""Biscoe""",65.224869,16.186863,158
"""MALE""","""Adelie""","""Torgersen""",65.104732,14.468942,187
"""MALE""","""Adelie""","""Dream""",33.261781,14.656081,173
"""MALE""",,,65.403681,17.740775,150
,"""Adelie""",,36.556358,12.194968,176
…,…,…,…,…,…
"""MALE""","""Adelie""","""Torgersen""",65.38212,14.870705,195
"""MALE""","""Adelie""","""Torgersen""",65.858644,17.358386,177
"""MALE""","""Adelie""",,65.322856,13.052434,195
"""MALE""","""Adelie""","""Torgersen""",65.077998,17.244135,150


## Framework API

In [None]:
with open(metadata_path, "r") as f:
    metadata = yaml.safe_load(f)
dtypes, datetime_columns = get_df_types_from_metadata(metadata)
df_dummy = pd.read_csv(dummy_path, dtype=dtypes, parse_dates=datetime_columns)
df_dummy.head()

In [None]:
d_in = 1
input_metric = dp.symmetric_distance()
input_domain = dp.vector_domain(dp.atom_domain(T=float))

d_out = 1.0
privacy_measure = dp.max_divergence()

bounds = (0.0, 100.0)
imputed_value = 50.0

In [None]:
m_sc = dp.c.make_adaptive_composition(
    input_domain=input_domain,
    input_metric=input_metric,
    output_measure=privacy_measure,
    d_in=d_in,
    d_mids=[d_out / 3] * 3,
)

In [9]:
table_mst = (
    context.query(rho=0.1, delta=0.0)
    # transformations/truncation may be applied here
    .select(
        "sex", "species", "island", 'bill_length_mm', 'bill_depth_mm', 'flipper_length_mm'
    ).contingency_table(
        keys={"sex": ["MALE", "FEMALE"],
              "species": ['Adelie'],
              "island": ['Dream', 'Torgersen', 'Biscoe']
             },
        cuts={"bill_length_mm": [30, 42, 54, 65], 'bill_depth_mm':[13, 18, 23], 'flipper_length_mm': [150, 200, 250]},
        algorithm=dp.mbi.MST(),
    )
    .release()
)

OpenDPException: 
  FailedFunction("unknown ordering between (0.19999999999999998, 0.0) and (0.19, 1e-7)")

In [None]:
table_mst.synthesize(rows=1000)