In [1]:
import polars as pl
import opendp.prelude as dp
dp.enable_features("contrib")

In [2]:
data_path = "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv"
lf = pl.scan_csv(data_path).collect()
lf.head(2)

species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
str,str,f64,f64,i64,i64,str
"""Adelie""","""Torgersen""",39.1,18.7,181,3750,"""MALE"""
"""Adelie""","""Torgersen""",39.5,17.4,186,3800,"""FEMALE"""


In [3]:
context = dp.Context.compositor(
    data=lf.lazy(),
    privacy_unit=dp.unit_of(contributions=36),
    privacy_loss=dp.loss_of(rho=0.19, delta=1e-7),

)

In [4]:
table_aim = (
    context.query(rho=0.1, delta=0.0)
    .select(
        "sex", "species", "island", 'bill_length_mm', 'bill_depth_mm', 'flipper_length_mm'
    ).contingency_table(
        keys={"sex": ["MALE", "FEMALE"],
              "species": ['Adelie', 'Chinstrap', 'Gentoo'],
              "island": ['Dream', 'Torgersen', 'Biscoe']
             },
        cuts={"bill_length_mm": [30,42,54,65],'bill_depth_mm':[13,18,23], 'flipper_length_mm': [150,200,250]},
        algorithm=dp.mbi.AIM(),
    )
    .release()
)

In [5]:
table_aim.synthesize(rows=1000)

sex,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm
str,str,str,f64,f64,i64
,"""Adelie""",,62.563982,23.569734,222
"""MALE""","""Chinstrap""",,54.599675,23.195207,251
"""MALE""","""Adelie""",,55.969097,23.025826,164
,"""Adelie""",,57.980746,23.705597,150
,"""Adelie""",,65.971715,23.329593,251
…,…,…,…,…,…
,"""Adelie""",,56.97835,23.450481,153
,"""Adelie""",,58.445336,23.261066,251
"""MALE""","""Chinstrap""",,65.104864,23.027569,150
"""MALE""","""Adelie""",,65.78653,23.001985,251


In [None]:
plan = lazy_lf.select(
    pl.col("bill_length_mm").dp.mean(bounds=(bl_lb, bl_ub), scale=100), 
    dp.len(scale=1)
)
opendp_pipe = dp.m.make_private_lazyframe(
    lf_domain, dp.symmetric_distance(), ms.max_divergence(), plan
)
cost = opendp_pipe.map(d_in=int(metadata["max_ids"]))
print(f"Cost: {cost}")

release_data = opendp_pipe(lazy_lf)
release_data = release_data.collect()
release_data