# Minimal Demo

In [1]:
%%capture
# PostgreSQL setup
!sudo apt-get -y -qq update
!sudo apt-get -y -qq install postgresql
!sudo service postgresql start
!sudo -u postgres psql -U postgres -c "ALTER USER postgres PASSWORD 'password'"
!pip install psycopg2

In [2]:
%%capture
# Pyqrlew installation
!pip install pyqrlew

In [3]:
from pyqrlew.io import PostgreSQL

# Demo IMDB dataset loaded
database = PostgreSQL('postgres', 'postgres', 'password', 5432)
database.load_imdb()
engine = database.engine()

In [4]:
from pyqrlew import Dataset

# Bounds are added to some columns
dataset = (Dataset.from_database('imdb', engine, 'imdb_ijs')
  .imdb_ijs.actors.gender.with_possible_values(['M', 'F'])
  .imdb_ijs.actors.id.with_unique_constraint())

# Privacy unit definition
privacy_unit = [
    # The column `id` directly defines the privacy unit (here we want to protect actors' privacy)
    ("actors", [], "id"),
    # The column `actor_id` refers to the column `id` of table `actors`, the `id` of which defines the privacy unit
    ("roles", [("actor_id", "actors", "id")], "id"),
]
# Privacy budget (see https://en.wikipedia.org/wiki/Differential_privacy)
budget = {"epsilon": 1.0, "delta": 1e-3}

# We create a basic aggregation relation
query = "SELECT gender, COUNT(movie_id) as role_count FROM imdb_ijs.actors JOIN imdb_ijs.roles ON roles.actor_id = actors.id GROUP BY gender"
relation = dataset.relation(query)
# The relation is turned into a DP equivalent
relation_with_dp_event = relation.rewrite_with_differential_privacy(
    dataset,
    privacy_unit,
    budget,
)
# Rewritten relation
dp_relation = relation_with_dp_event.relation()
# The rewritten query
dp_query = dp_relation.to_query()
# Privacy loss (see https://github.com/google/differential-privacy/tree/main/python/dp_accounting)
dpe = relation_with_dp_event.dp_event()
print(dpe)

DpEvent(Gaussian { noise_multiplier: 377.6479532659047 })


In [5]:
from IPython.display import display, Markdown
import pandas as pd

true_res = pd.read_sql(query, database.engine())
dp_res = pd.read_sql(dp_query, database.engine())

display(Markdown(f'## Number of roles per gender in the DB'))
display(true_res)

display(Markdown(f'## Differentially Private number of roles per gender in the DB'))
display(dp_res)

## Number of roles per gender in the DB

Unnamed: 0,gender,role_count
0,F,1135174
1,M,2296792


## Differentially Private number of roles per gender in the DB

Unnamed: 0,gender,role_count
0,M,2186204
1,F,1119683
