In [22]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
import itertools
from collections import defaultdict
import io
import gurobipy as gp

In [23]:
def gen_data_set(n, atts, m):
    """n is number of people, atts is dict of attribute, m is number of attributes used"""
    db = []
    heads = list(atts.keys())
    for i in range(min(len(heads), m)):
        db.append(np.random.choice(list(atts[heads[i]]), n))
        
    out_db = pd.DataFrame(np.array(db).T.tolist(), columns = heads[0:min(len(heads), m)])
    out_db.insert(0, "row_key",
                np.random.randint(0, 10, out_db.shape[0]))
    return out_db

In [24]:
# np.random.seed(0)
bin_atts = {'Income': {'rich', 'poor'}, 
        'Sex': {'male', 'female'}, 
        'Height': {'tall', 'short'},
        'Age': {'old', 'young'},
        'Race': {'a', 'b'},
        'Married': {'married', 'unmarried'},
        'Work': {'working', 'not_working'}
        }
db = gen_data_set(10, bin_atts, 20)
display(db)

Unnamed: 0,row_key,Income,Sex,Height,Age,Race,Married,Work
0,4,poor,female,short,old,a,married,not_working
1,0,rich,male,short,old,b,married,working
2,1,rich,male,short,young,a,married,not_working
3,2,poor,female,short,old,b,unmarried,working
4,4,rich,female,short,old,a,unmarried,not_working
5,8,poor,male,short,young,a,unmarried,working
6,7,poor,male,tall,old,b,unmarried,working
7,4,rich,male,short,young,b,married,not_working
8,6,rich,female,tall,old,b,married,not_working
9,8,poor,male,short,young,a,unmarried,not_working


In [25]:
data = io.StringIO('''
value,cell_key,peturbation
0    ,0       ,-1
0    ,1       ,1
0    ,2       ,0
''')

ptable = pd.read_csv(data).set_index(["value", "cell_key"])
# display(ptable)

In [26]:
def gen_tables_cell_key(db,atts1,atts2,ptable):
    
    counts = pd.crosstab(
    [db[v] for v in atts1],
    [db[v] for v in atts2],
    dropna=False,
    margins=True)
    
    sum_rkey = pd.pivot_table(
        db,
        values=['row_key'],
        index=atts1,
        columns=atts2,
        aggfunc=np.sum,
        fill_value=0,
        dropna=False,
        margins=True)

    cell_keys = sum_rkey.mod(3)
    
    perturbations = counts * 0
    
    for col_idx in range(counts.shape[0]):
        for row_idx in range(counts.shape[1]):
            cell_key = cell_keys.iat[col_idx, row_idx]
            count = counts.iat[col_idx,row_idx]
            perturbation = 0
            if count > 0:
                perturbation = ptable.loc[0, cell_key]
            perturbations.iat[col_idx, row_idx] += perturbation
    perturbed_counts = counts + perturbations
    def apply_color(x):
        colors = {-1: 'lightcoral', 0: 'white', 1: 'lightgreen'}
        return perturbations.applymap(lambda val: 'background-color: {}'.format(colors.get(val,'')))
    
    print(is_unique(perturbed_counts.to_numpy()))
    
    return perturbed_counts.style.apply(apply_color, axis=None)

In [27]:
def is_unique(table):
    m = gp.Model()
    m.Params.OutputFlag = 0
    out_table = table

    i_max = len(out_table) - 1
    j_max = len(out_table[0]) - 1

    # Initialize the decision variables
    x = np.array([[m.addVar(vtype='I', name=f"x_{i}_{j}") 
                   for i in range(i_max + 1)] for j in range(j_max +1)])

    # Add the constraints to the model
    for i in range(i_max+1):
        for j in range(j_max+1):
            m.addConstr(x[i][j] <= out_table[i][j] +1)
            m.addConstr(x[i][j] >= out_table[i][j] -1)

    for i in range(i_max+1):
        m.addConstr(sum(x[i,:][0:j_max]) == x[i][j_max])

    for j in range(j_max+1):
        m.addConstr(sum(x[:,j][0:i_max]) == x[i_max][j])

    # Parameters
    m.Params.PoolSearchMode = 2
    # m.Params.PoolSolutions = 10**8
    m.Params.PoolSolutions = 2
    m.Params.PoolGap = 0.0

    # Optimize
    m.optimize()

    if m.SolCount > 1:
        return False
    else:
        return True 

In [7]:
def gen_sols(table,n):
    m = gp.Model()
    m.Params.OutputFlag = 0
    out_table = table

    i_max = len(out_table) - 1
    j_max = len(out_table[0]) - 1

    # Initialize the decision variables
    x = np.array([[m.addVar(vtype='I', name=f"x_{i}_{j}") 
                   for i in range(i_max + 1)] for j in range(j_max +1)])

    # Add the constraints to the model
    for i in range(i_max+1):
        for j in range(j_max+1):
            m.addConstr(x[i][j] <= out_table[i][j] +1)
            m.addConstr(x[i][j] >= out_table[i][j] -1)

    for i in range(i_max+1):
        m.addConstr(sum(x[i,:][0:j_max]) == x[i][j_max])

    for j in range(j_max+1):
        m.addConstr(sum(x[:,j][0:i_max]) == x[i_max][j])

    # Parameters
    m.Params.PoolSearchMode = 2
    m.Params.PoolSolutions = n
    # m.Params.PoolSolutions = 2
    m.Params.PoolGap = 0.0

    # Optimize
    m.optimize()
    
    out_lst = []


    for k in range(m.SolCount):
        m.Params.SolutionNumber = k
        out_x = np.zeros_like(x)
        for i in range(len(x)):
            for j in range(len(x[0])):
                out_x[i][j] = x[i][j].Xn
        # print([var.Xn for var in m.getVars()])
        out_lst.append(out_x)
        
    return out_lst

In [30]:
# np.random.seed(0)
db = gen_data_set(10, bin_atts, 20)
# np.random.seed(12)
db["row_key"] = np.random.randint(0, 10, db.shape[0])

display(gen_tables_cell_key(db,['Income'],['Sex'],ptable))
display(gen_tables_cell_key(db,['Race'],['Sex'],ptable))
display(gen_tables_cell_key(db,['Race'],['Income'],ptable))

False


Sex,female,male,All
Income,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
poor,1,2,4
rich,1,4,6
All,3,7,11


False


Sex,female,male,All
Race,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,6,5
b,1,3,5
All,3,7,11


False


Income,poor,rich,All
Race,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,3,5
b,2,2,5
All,4,6,11


In [44]:
display(gen_sols([[4,0,5],[0,5,6],[5,6,9]],10))
print(fixed_positions(gen_sols([[4,0,5],[0,5,6],[5,6,9]],10)))

[array([[4.0, -0.0, 4.0],
        [-0.0, 5.0, 5.0],
        [4.0, 5.0, 9.0]], dtype=object),
 array([[4.0, 1.0, 5.0],
        [-0.0, 5.0, 5.0],
        [4.0, 6.0, 10.0]], dtype=object),
 array([[4.0, -0.0, 4.0],
        [1.0, 5.0, 6.0],
        [5.0, 5.0, 10.0]], dtype=object),
 array([[3.0, 1.0, 4.0],
        [1.0, 4.0, 5.0],
        [4.0, 5.0, 9.0]], dtype=object),
 array([[5.0, -0.0, 5.0],
        [-0.0, 5.0, 5.0],
        [5.0, 5.0, 10.0]], dtype=object),
 array([[4.0, 1.0, 5.0],
        [1.0, 4.0, 5.0],
        [5.0, 5.0, 10.0]], dtype=object),
 array([[4.0, -0.0, 4.0],
        [-0.0, 6.0, 6.0],
        [4.0, 6.0, 10.0]], dtype=object),
 array([[3.0, 1.0, 4.0],
        [1.0, 5.0, 6.0],
        [4.0, 6.0, 10.0]], dtype=object)]

[]


In [59]:
sols1 = gen_sols([[1,3,2],[3,2,6],[5,6,9]],10)
display(sols1)
print(fixed_positions(sols1))

[array([[0.0, 2.0, 2.0],
        [4.0, 3.0, 7.0],
        [4.0, 5.0, 9.0]], dtype=object),
 array([[0.0, 3.0, 3.0],
        [4.0, 2.0, 6.0],
        [4.0, 5.0, 9.0]], dtype=object),
 array([[1.0, 2.0, 3.0],
        [3.0, 3.0, 6.0],
        [4.0, 5.0, 9.0]], dtype=object),
 array([[0.0, 3.0, 3.0],
        [4.0, 3.0, 7.0],
        [4.0, 6.0, 10.0]], dtype=object),
 array([[1.0, 2.0, 3.0],
        [4.0, 3.0, 7.0],
        [5.0, 5.0, 10.0]], dtype=object)]

[]


In [57]:
def fixed_positions(sols):
    i_max = len(sols[0]) - 1
    j_max = len(sols[0][0]) - 1
    
    out_lst = []
    
    for i in range(i_max+1):
        for j in range(j_max+1):
            lst = [x[i][j] for x in sols]
            if len(set(lst)) == 1:
                out_lst.append((i,j))
                
    return out_lst