In [126]:
import sys
import import_ipynb
import algorithm as alg
import utilities as util
import numpy as np
from user_variables import attr_to_change, attr_with_max_dom, attr_relaxed_privacy, number_of_conditions
import time

##### Input Params

In [127]:
sys.argv = [sys.argv[0]]
sys.argv.append("Absenteeism_at_work.csv")
sys.argv.append(10) # k
sys.argv.append('pt') # Option


# Main Function

In [128]:
# Input params

if (len(sys.argv) < 3):
    sys.exit("Not enough arguments")

k = int(sys.argv[2])
option = sys.argv[3]


# Load Dataset

start = time.time()

data_path = sys.argv[1]
data = util.read_csv(data_path)
    
tuple_length = len(data[0])

end  = time.time()

time_load_dataset = (end - start)

In [129]:
# Extend the dataset with itself so to simulate a bigger dataset
for i in range(0,10):
    data = np.concatenate((data,data))

In [130]:
# Set gloabal variables

util.set_max_dom()

if (option == "pf"):
    attr_relaxed_privacy[str(attr_with_max_dom["attr"])] = 0
    

# Program Execution Module

start = time.time()

buckets = alg.program_execution_module(data, k, number_of_conditions)

end = time.time()

time_buckets_gen = (end - start)

# Constraint generation and solver module

start = time.time()

new_data = alg.constraint_and_data_generation_module(data, buckets, tuple_length, option)

end = time.time()

time_new_data_gen = (end - start)

time_total = time_load_dataset + time_buckets_gen + time_new_data_gen

print("\nDataset size: " + str(len(data)) + ", number of paths: " + str(len(new_data)))
print("Option = " + option + ", k = " + str(k))
print("\nExecution Time for loading dataset: " + str(time_load_dataset))
print("Execution Time for buckets creation: " + str(time_buckets_gen))
print("Execution Time for creating new data: " + str(time_new_data_gen))
print("Total Execution Time: " + str(time_total) + "\n")

for entry in new_data:
    print(entry)

Paths:

10111X0X
1000XXXX
10011XXX
110101XX
10010XXX
000XXXXX
110100XX
010XX1XX
001XXX0X
10110X0X
10111X10
1100X1XX
1010XX0X
1010XX11

Dataset size: 757760, number of paths: 14
Option = pt, k = 10

Execution Time for loading dataset: 0.023970842361450195
Execution Time for buckets creation: 6.84642767906189
Execution Time for creating new data: 0.8281693458557129
Total Execution Time: 7.698567867279053

[0, 26, 0, 6, 0, 387, 0, 0, 30, 0, 0, 0, 0, 3, 1, 0, 0, 81, 0, 0, 0]
[0, 10, 0, 5, 0, 293, 0, 0, 53, 0, 0, 0, 0, 1, 1, 0, 0, 105, 0, 0, 0]
[0, 11, 0, 2, 0, 203, 0, 0, 33, 0, 0, 0, 0, 0, 1, 0, 0, 107, 0, 0, 0]
[0, 24, 0, 3, 0, 333, 0, 0, 33, 0, 0, 0, 0, 4, 1, 1, 0, 54, 0, 0, 0]
[0, 19, 0, 4, 0, 388, 0, 0, 30, 0, 0, 0, 0, 3, 1, 0, 0, 78, 0, 0, 0]
[0, 5, 0, 4, 0, 323, 0, 0, 34, 0, 0, 0, 0, 0, 0, 0, 0, 75, 0, 0, 0]
[0, 6, 0, 5, 0, 326, 0, 0, 26, 0, 0, 0, 0, 0, 1, 1, 0, 72, 0, 0, 0]
[0, 24, 0, 5, 0, 185, 0, 0, 54, 0, 0, 0, 0, 4, 0, 1, 0, 63, 0, 0, 0]
[0, 26, 0, 3, 0, 112, 0, 0, 55, 0, 0, 0, 

In [None]:
# Test if behaviour is preserved

new_buckets = alg.program_execution_module(new_data, 1, number_of_conditions)

In [None]:
# Write result to csv file

util.write_to_file(new_data, "Anonymized_" + data_path)
