The main process consists of:
- Start with data that contains personalized information.
- Find ML models that can generate synthetic data that accurately represents the original data.
- Transform the data so it's legible by the ML model, per ML model.
- Start the energy measuring magic machine.
- Run the ML model on the transformed data, resulting in synthetic data.
- Stop the energy measuring.
- From the data, measure the privacy metrics (and the accuracy to the original data?)
- From the measurement, read off the energy usage.
- To get more results, run again with different subsets of the data.
- Compare and contrast.

In [None]:
import pandas as pd
import numpy as np

## The data:

In [None]:
# Census data set:
census = pd.read_csv("data/census/adult.data", header=None, delimiter=", ", engine="python")

# The names of the columns:
columns = ["age", "workclass", "fnlwgt", "education", "education-num", "marital-status", "occupation", "relationship", "race", "sex", "capital-gain", "capital-loss", "hours-per-week", "native-country", "income"]
census.columns = columns

# Dropping the rows with question marks:
census = census.replace("?", np.nan)
census = census.dropna()

# Manually fixing the types of the columns:
census["workclass"] = census["workclass"].map({
    "Private": 0, "Self-emp-not-inc": 1, "Self-emp-inc": 2, "Federal-gov": 3, "Local-gov": 4, "State-gov": 5, "Without-pay": 6, "Never-worked": 7
})
census["education"] = census["education"].map({
    "Bachelors": 0, "Some-college": 1, "11th": 2, "HS-grad": 3, "Prof-school": 4, "Assoc-acdm": 5, "Assoc-voc": 6, "9th": 7, "7th-8th": 8, "12th": 9, "Masters": 10, "1st-4th": 11, "10th": 12, "Doctorate": 13, "5th-6th": 14, "Preschool": 15
})
census["marital-status"] = census["marital-status"].map({
    "Married-civ-spouse": 0, "Divorced": 1, "Never-married": 2, "Separated": 3, "Widowed": 4, "Married-spouse-absent": 5, "Married-AF-spouse": 6
})
census["occupation"] = census["occupation"].map({
    "Tech-support": 0, "Craft-repair": 1, "Other-service": 2, "Sales": 3, "Exec-managerial": 4, "Prof-specialty": 5, "Handlers-cleaners": 6, "Machine-op-inspct": 7, "Adm-clerical": 8, "Farming-fishing": 9, "Transport-moving": 10, "Priv-house-serv": 11, "Protective-serv": 12, "Armed-Forces": 13
})
census["relationship"] = census["relationship"].map({
    "Wife": 0, "Own-child": 1, "Husband": 2, "Not-in-family": 3, "Other-relative": 4, "Unmarried": 5
})
census["race"] = census["race"].map({
    "White": 0, "Asian-Pac-Islander": 1, "Amer-Indian-Eskimo": 2, "Other": 3, "Black": 4
})
census["sex"] = census["sex"].map({"Female": True, "Male": False})
census["native-country"] = census["native-country"].map({
    "United-States": 0, "Cambodia": 1, "England": 2, "Puerto-Rico": 3, "Canada": 4, "Germany": 5, "Outlying-US(Guam-USVI-etc)": 6, "India": 7, "Japan": 8, "Greece": 9, "South": 10, "China": 11, "Cuba": 12, "Iran": 13, "Honduras": 14, "Philippines": 15, "Italy": 16, "Poland": 17, "Jamaica": 18, "Vietnam": 19, "Mexico": 20, "Portugal": 21, "Ireland": 22, "France": 23, "Dominican-Republic": 24, "Laos": 25, "Ecuador": 26, "Taiwan": 27, "Haiti": 28, "Columbia": 29, "Hungary": 30, "Guatemala": 31, "Nicaragua": 32, "Scotland": 33, "Thailand": 34, "Yugoslavia": 35, "El-Salvador": 36, "Trinadad&Tobago": 37, "Peru": 38, "Hong": 39, "Holand-Netherlands": 40
})
census["income"] = census["income"].map({"<=50K": False, ">50K": True})

print(census.size)
census.head()