In [9]:
%pip install lomas-client

Note: you may need to restart the kernel to use updated packages.


In [10]:
# Step 1
from lomas_client import Client
import numpy as np

# Step 2
APP_URL = "https://user-stuartbenoliel-490979-lomas-server-user.lab.sspcloud.fr"
USER_NAME = "Dr. Antartica"
DATASET_NAME = "PENGUIN"
client = Client(url=APP_URL, user_name = USER_NAME, dataset_name = DATASET_NAME)

# Step 3
#es = client.any_query(parameters)

In [11]:
penguin_metadata = client.get_dataset_metadata()
penguin_metadata

{'max_ids': 1,
 'rows': 344,
 'row_privacy': True,
 'censor_dims': False,
 'columns': {'species': {'private_id': False,
   'nullable': False,
   'max_partition_length': None,
   'max_influenced_partitions': None,
   'max_partition_contributions': None,
   'type': 'string',
   'cardinality': 3,
   'categories': ['Adelie', 'Chinstrap', 'Gentoo']},
  'island': {'private_id': False,
   'nullable': False,
   'max_partition_length': None,
   'max_influenced_partitions': None,
   'max_partition_contributions': None,
   'type': 'string',
   'cardinality': 3,
   'categories': ['Torgersen', 'Biscoe', 'Dream']},
  'bill_length_mm': {'private_id': False,
   'nullable': False,
   'max_partition_length': None,
   'max_influenced_partitions': None,
   'max_partition_contributions': None,
   'type': 'float',
   'precision': 64,
   'lower': 30.0,
   'upper': 65.0},
  'bill_depth_mm': {'private_id': False,
   'nullable': False,
   'max_partition_length': None,
   'max_influenced_partitions': None,
   'm

In [12]:
NB_PENGUINS = penguin_metadata["rows"]

NB_ROWS = 100
SEED = 0

df_dummy = client.get_dummy_dataset(
    nb_rows = NB_ROWS,
    seed = SEED
)

print(df_dummy.shape)
df_dummy.head()

(100, 7)


Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Gentoo,Biscoe,46.799577,16.196816,239.680123,3010.84047,FEMALE
1,Chinstrap,Dream,38.133052,14.875077,208.332005,6689.525543,MALE
2,Chinstrap,Torgersen,58.06582,19.725266,154.021822,2473.883392,MALE
3,Adelie,Torgersen,62.323556,14.951074,221.148682,2024.497075,FEMALE
4,Adelie,Dream,39.31456,18.776879,206.902585,3614.604018,MALE


In [13]:
print(client.get_initial_budget())
print(client.get_total_spent_budget())
print(client.get_remaining_budget())

initial_epsilon=10.0 initial_delta=0.005
total_spent_epsilon=1.4244186046530456 total_spent_delta=0.0
remaining_epsilon=8.575581395346955 remaining_delta=0.005


In [14]:
import opendp.prelude as dp

columns = list(penguin_metadata["columns"].keys())
bill_length_min = penguin_metadata['columns']['bill_length_mm']['lower']
bill_length_max = penguin_metadata['columns']['bill_length_mm']['upper']
bill_length_min, bill_length_max

bill_length_pipeline = (
    dp.t.make_split_dataframe(separator=",", col_names=columns) >>
    dp.t.make_select_column(key="bill_length_mm", TOA=str) >>
    dp.t.then_cast_default(TOA=float) >>
    dp.t.then_clamp(bounds=(bill_length_min, bill_length_max)) >>
    dp.t.then_resize(size=NB_PENGUINS, constant=40.0) >>
    dp.t.then_mean() >>
    dp.m.then_gaussian(scale=5.0) # Noise addition mechanism instructions
)

In [15]:
cost_res = client.opendp.cost(
    opendp_pipeline = bill_length_pipeline,
    fixed_delta=1e-5
)
cost_res

CostResponse(epsilon=0.06698311627234338, delta=1e-05)

In [17]:
dummy_var_res = client.opendp.query(
    opendp_pipeline = bill_length_pipeline,
    dummy=True,
    fixed_delta=1e-5
)
print(f"Dummy result for variance: {np.round(dummy_var_res.result.value, 2)}")

Dummy result for variance: 45.93


In [19]:
var_res = client.opendp.query(
    opendp_pipeline = bill_length_pipeline,
    fixed_delta=1e-5
)

var_bill_length = np.round(var_res.result.value, 2)
print(f"Variance of bill length: {var_bill_length} (from opendp query).")

Variance of bill length: 44.64 (from opendp query).


In [20]:
# Get standard error
standard_error = np.sqrt(var_bill_length / NB_PENGUINS)
print(f"Standard error of bill length: {np.round(standard_error, 2)}.")

# Compute the 95% confidence interval
ZSCORE = 1.96
lower_bound = np.round(40.0 - ZSCORE * standard_error, 2)
upper_bound = np.round(40.0 + ZSCORE * standard_error, 2)
print(f"The 95% confidence interval of the bill length of all penguins is [{lower_bound}, {upper_bound}].")

Standard error of bill length: 0.36.
The 95% confidence interval of the bill length of all penguins is [39.29, 40.71].


In [21]:
previous_queries = client.get_previous_queries()
len(previous_queries)

3

In [22]:
# OpenDP
var_bill_length_query = previous_queries[0]
var_bill_length_query

{'user_name': 'Dr. Antartica',
 'dataset_name': 'PENGUIN',
 'dp_librairy': 'opendp',
 'client_input': {'dataset_name': 'PENGUIN',
  'opendp_json': Measurement(
      input_domain   = AtomDomain(T=String),
      input_metric   = SymmetricDistance(),
      output_measure = MaxDivergence(f64)),
  'fixed_delta': None},
 'response': {'epsilon': 0.7122093023265228,
  'delta': 0.0,
  'requested_by': 'Dr. Antartica',
  'result': {'res_type': 'opendp', 'value': 21.587582284159083}},
 'timestamp': 1744791577.182293}