In [None]:
import os
import csv
import math
import itertools
import pickle

import pandas as pd
pd.set_option('display.max_columns', None)
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn import preprocessing
from scipy import sparse
import networkx as nx

from mbi import (
    Dataset,
    FactoredInference,
    Domain,
    LocalInference,
    MixtureInference,
    PublicInference,
)

In [None]:
data = Dataset.load("./data/german_processed.csv", "./data/german_processed.json")
domain = data.domain
total = data.df.shape[0]

In [None]:
# adapted from https://github.com/ryan112358/private-pgm/blob/master/examples/adult_example.py

cliques = [('Credit amount', 'Duration'), ('Checking account', 'Credit amount'), ('Job', 'Credit amount'), ('Saving accounts', 'Credit amount'), ('Age', 'Housing'), ('Age', 'Duration'), ('Credit amount', 'Purpose'), ('Age', 'Credit amount'), ('Credit amount', 'Risk'), ('Sex', 'Credit amount'), ('Housing', 'Credit amount')]

# spend half of privacy budget to measure all 1 way marginals
np.random.seed(0)

epsilon = 1.0
epsilon_split = epsilon / (len(data.domain) + len(cliques))
sigma = 2.0 / epsilon_split

measurements = []
for col in data.domain:
    x = data.project(col).datavector()
    y = x + np.random.laplace(loc=0, scale=sigma, size=x.size)
    I = sparse.eye(x.size)
    measurements.append((I, y, sigma, (col,)))

# spend half of privacy budget to measure some more 2 and 3 way marginals
for cl in cliques:
    x = data.project(cl).datavector()
    y = x + np.random.laplace(loc=0, scale=sigma, size=x.size)
    I = sparse.eye(x.size)
    measurements.append((I, y, sigma, cl))

In [None]:
# THIS TAKES ~400 MINUTES!
engine = FactoredInference(domain, log=True, iters=500)
model = engine.estimate(measurements, total=total)

In [None]:
# Save the model to a file
# this is about 1.6GB
with open('./model/german_synth.pkl', 'wb') as f:
    pickle.dump(model, f)

In [None]:
# this takes around 1 seconds
synth = model.synthetic_data(rows=1000)
sdf = synth.df
sdf