In [1]:
import numpy as np
import random
import csv
from user_variables import attr_to_change, attr_with_max_dom

ModuleNotFoundError: No module named 'user_variables'

In [None]:
""" Self made exception with one parameter """

class CardinalityException(Exception):
    def __init__(self, attr):
        self.attr = attr
        

In [2]:
""" Takes a full list of path conditions and returns the first element as string """

def list_to_string(l):

    s = ""
    for k in l:
        s += str(k[2])
    return s

def read_csv(data_path):

    # Read data from file
    with open(data_path, 'r') as f:
        reader = csv.reader(f, delimiter=';')
        # get header from first row
        headers = next(reader)
        # get all the rows as a list
        data = list(reader)
        # transform data into numpy array
        data = np.array(data).astype(float)
        return data
    

In [None]:
''' Implementation of PT algorithm '''

# Each tuple of the dataset must have a unique value for the attribute with highest cardinality
def same_path_no_tuple_repeat(bucket):
    
    # Select the attribute with highest cardinality 
    attr = attr_with_max_dom["attr"]
    # Build the set of constraints for a given buckets
    constraints = []
    for entry in bucket:
        if (not check_duplicate(constraints, attr, entry[attr])):
            # Append new lambda while avoiding repeating same triplets
            constraints.append([attr, lambda x, val = entry[attr]: x != val, 1])
    return constraints    
          

In [None]:
''' Implementation of PF algorithm '''

# For each tuple and for each field there must be a unique value
def same_path_no_field_repeat(data):
    
    # Build the set of constraints for the whole dataset
    constraints = []
    for field in attr_to_change:
        for entry in data:
            if (not check_duplicate(constraints, int(field), int(entry[int(field)]))):
                # Append new lambda while avoiding repeating same triplets
                constraints.append([int(field), lambda x, val = int(entry[int(field)]): x != val, 1])
    return constraints 
    

In [None]:
""" Check for duplicated lambda functions """

def check_duplicate(constraints, field, value):
    for entry in constraints:
        if (entry[0] == field):
            # If the lambda returns false it means that if we added a new one we would have duplicated entries in constraints
            if (not entry[1](value)):
                return True
    return False


In [None]:
""" Remove constraints from the global_constraints list """

def remove_constraints(global_constraints, attr):
    
    for i in range(0, len(global_constraints)):
        if (global_constraints[i][0] == attr):
            # Remove a constraint for a given attribute since we don't have duplicates
            global_constraints.pop(i)
            return
    raise Exception("No more constraints to delete for attribute: " + str(attr))    


In [None]:
""" Return the sub-array of constraints with only those that involve a given attribute """

def get_constraints(index, constraints):
    
    temp_const = []
    for entry in constraints:
        if (entry[0] == index):
            temp_const.append(entry)
            
    return temp_const
            

In [5]:
""" Check whether the set of constraints hold on val """

def check_constraints(val, constraints, attr):
    
    is_valid = True
    
    for i in range(0, len(constraints)):
        temp = constraints[i][1](val)
        
        # A lambda returns an integer if it is of type "==" since we are obliged not to choose a value randomly
        # This works since for a given attribute in a path_condition we can only have a single "==" 
        if (str(type(temp)) == "<class 'int'>"):

            if (temp != val):
                val = temp
                i = 0
        else:
            if(not(temp)):
                is_valid = False
                break
                
    return [is_valid, val]           


In [None]:
''' Look for the attribute with the largest range of values '''

def set_max_dom():
    for attr in attr_to_change:
        cardinality = attr_to_change[attr][1] - attr_to_change[attr][0] + 1
        if (cardinality > attr_with_max_dom["dom_card"]):
            attr_with_max_dom["attr"] = int(attr)
            attr_with_max_dom["dom_card"] = cardinality
            

In [None]:
'''Set Lambda function according to op'''

def get_lambda(pc, if_id, field, op, val, taken):
    pc[if_id][0] = field
    pc[if_id][2] = taken
    if (op == "<="):
        pc[if_id][1] = lambda x, val = val: x <= val
    if (op == "<"):
        pc[if_id][1] = lambda x, val = val: x < val
    if (op == "=="):
        # Return the value we need to preserve
        pc[if_id][1] = lambda x, val = val: val
    if (op == "!="):
        pc[if_id][1] = lambda x, val = val: x != val        
    if (op == ">"):
        pc[if_id][1] = lambda x, val = val: x > val
    if (op == ">="):
        pc[if_id][1] = lambda x, val = val: x >= val
    if (op == "None"):
        # Any value will be accepted (X cases inside a path)
        pc[if_id][1] = lambda x: True
        

In [None]:
''' Generate a new value according to parameters defined in user_variables '''

def gen_value(attr, generated_values):
    
    try:
        
        candidate = random.randint(attr_to_change[str(attr)][0], attr_to_change[str(attr)][1])
        while(candidate in generated_values):
            if (len(generated_values) == (attr_to_change[str(attr)][1] - attr_to_change[str(attr)][0] + 1)):
                # Raise exception if we generated any possible value and still check_constraints returns false.
                # Special case "==": even if we generate a different value we overwrite it with the value of the lambda.
                # Assume if (x == 5)... -> lambda x: 5. The exception will be raised until constraints of the kind 
                # lambda x: x != 5 are removed from the set of global (or bucket) constraints
                raise CardinalityException(attr)
            candidate = random.randint(attr_to_change[str(attr)][0], attr_to_change[str(attr)][1])
            
        generated_values.append(candidate)    
        return candidate     
    
    except KeyError:
        # The attribute is not inside the attr_to_change dictionary. Thus, we don't need to generate specific values
        return 0
    

In [None]:
''' Write the result to a new csv file '''

def write_to_file(new_data, filename):
    
    file = open(filename, "w")
    writer = csv.writer(file, delimiter = ";", lineterminator = "\n")


    for entry in new_data:
        writer.writerow(entry)

    file.close()
    