In [120]:
# General imports
import numpy as np
import pandas as pd
import itertools

# Sklearn imports
from sklearn.preprocessing import LabelEncoder


In [121]:
df_compas = pd.read_csv('Datasets/Real_data/Compas/compas_good_outcome.csv')

In [122]:
df_compas

Unnamed: 0,sex,race,age_cat,priors_count,v_decile_score
0,1,0,0,0,1
1,1,0,2,1,0
2,1,1,0,1,0
3,0,1,0,0,1
4,1,1,2,0,0
...,...,...,...,...,...
5910,1,0,0,0,0
5911,1,0,2,0,0
5912,1,0,2,0,0
5913,1,0,2,0,0


In [123]:
# Target attribute
target = 'v_decile_score' 

# Protected attribute 
protected_att = 'race'

In [124]:
# Privacy settings
dict_setting = {"combsLDP": ['age_cat','sex','priors_count']}

In [125]:
setting = "combsLDP"
lst_sensitive = dict_setting[setting]


# New sensitive attribute (marginal of all in lst_sensitive)
new_protected_att = '_'.join(lst_sensitive)

In [126]:
new_protected_att

'age_cat_sex_priors_count'

In [127]:
# to compute all possible permutations
all_list = [list(df_compas[att].unique()) for att in lst_sensitive]
all_perm = list(itertools.product(*all_list))
k = len(all_perm)  # new domain size



In [128]:
all_list

[[0, 2, 1], [1, 0], [0, 1]]

In [129]:
all_perm

[(0, 1, 0),
 (0, 1, 1),
 (0, 0, 0),
 (0, 0, 1),
 (2, 1, 0),
 (2, 1, 1),
 (2, 0, 0),
 (2, 0, 1),
 (1, 1, 0),
 (1, 1, 1),
 (1, 0, 0),
 (1, 0, 1)]

In [130]:
df_compas[new_protected_att] = df_compas[lst_sensitive].astype(str).T.agg(', '.join)

In [131]:
df_compas

Unnamed: 0,sex,race,age_cat,priors_count,v_decile_score,age_cat_sex_priors_count
0,1,0,0,0,1,"0, 1, 0"
1,1,0,2,1,0,"2, 1, 1"
2,1,1,0,1,0,"0, 1, 1"
3,0,1,0,0,1,"0, 0, 0"
4,1,1,2,0,0,"2, 1, 0"
...,...,...,...,...,...,...
5910,1,0,0,0,0,"0, 1, 0"
5911,1,0,2,0,0,"2, 1, 0"
5912,1,0,2,0,0,"2, 1, 0"
5913,1,0,2,0,0,"2, 1, 0"


In [132]:
from sklearn.preprocessing import LabelEncoder

df_compas['age_cat_sex_priors_count'] = LabelEncoder().fit_transform(df_compas['age_cat_sex_priors_count'])

In [133]:
df_compas

Unnamed: 0,sex,race,age_cat,priors_count,v_decile_score,age_cat_sex_priors_count
0,1,0,0,0,1,2
1,1,0,2,1,0,11
2,1,1,0,1,0,3
3,0,1,0,0,1,0
4,1,1,2,0,0,10
...,...,...,...,...,...,...
5910,1,0,0,0,0,2
5911,1,0,2,0,0,10
5912,1,0,2,0,0,10
5913,1,0,2,0,0,10


In [134]:
# variables to be used  
df_compas = df_compas [['race','v_decile_score','age_cat_sex_priors_count']]

In [135]:
df_compas

Unnamed: 0,race,v_decile_score,age_cat_sex_priors_count
0,0,1,2
1,0,0,11
2,1,0,3
3,1,1,0
4,1,0,10
...,...,...,...
5910,0,0,2
5911,0,0,10
5912,0,0,10
5913,0,0,10


In [49]:
df_compas.X.value_counts()

0, 1, 1    1410
0, 1, 0    1292
2, 1, 0     816
1, 1, 0     522
1, 1, 1     490
0, 0, 0     432
0, 0, 1     244
2, 1, 1     221
2, 0, 0     211
1, 0, 0     173
1, 0, 1      71
2, 0, 1      33
Name: X, dtype: int64

In [53]:
df_compas

Unnamed: 0,sex,race,age_cat,priors_count,v_decile_score,X
0,1,0,0,0,1,"0, 1, 0"
1,1,0,2,1,0,"2, 1, 1"
2,1,1,0,1,0,"0, 1, 1"
3,0,1,0,0,1,"0, 0, 0"
4,1,1,2,0,0,"2, 1, 0"
...,...,...,...,...,...,...
5910,1,0,0,0,0,"0, 1, 0"
5911,1,0,2,0,0,"2, 1, 0"
5912,1,0,2,0,0,"2, 1, 0"
5913,1,0,2,0,0,"2, 1, 0"


In [None]:
# creating a list of column names
column_values = ['A', 'X', 'Y']

lists = [[0,1],[0,1,2], [0,1]]
# creating the patterns of features
#pattern_features = [x for x in itertools.product([0, 1], repeat=len(column_values))]

pattern_features = [x for x in itertools.product(*lists)]

# # creating the dataframe
df = pd.DataFrame(data = pattern_features, 
                   columns = column_values)
  
# # displaying the dataframe
display(df)