<h1>inconsistency-measurer</h1>

<h4> Imports </h4>

In [82]:
import pandas as pd
import ipywidgets as widgets
from ipywidgets import IntProgress
import gurobipy as gp
import numpy as numpy
from gurobipy import GRB
import random
import re
import os
from subprocess import PIPE, run
import pandasql as psql
import time
import matplotlib.pyplot as plt
import subprocess
import ViolationsAlgorithm as vio

In [83]:
def build_dynamic_queries(constraintSets,df):
    """
    build_dynamic_queries - generates dynamic queries based on the given constraints.
    This function will generate two queries:
    1. unionOfAllTuples - returns the ids of the tuples participating in a violation of the constraints.
    2. unionOfAllPairs - returns pairs (i1,i2) of ids of tuples that jointly violate the constraints.
    
    Parameters
    ----------
    constraintSets : set of strings
        each string represents a constraint from the dcs file
    df : dataframe
        the database frame
        
    Returns
    -------
    list of three string values:
        unionOfAllTuples, unionOfAllPairs are the generated queries
        allColumns is a string consisting of all column names seperated by ','
    """ 
    
    allColumns = ' '.join([str(elem) for elem in df.columns.values.tolist()]).replace(' ',',')

    #Additional conditions for the queries, in order to ignore missing values in the database
    count = 1
    columnsT1 = ""
    for col in df.columns: 
        columnsT1 += "t1."+col
        if count!=len(df.columns) :
            columnsT1+= ' IS NOT NULL AND '
        count+=1
    columnsT1+=" IS NOT NULL "
    columnsT2 = columnsT1.replace('t1','t2')

    count = 0
    for con in constraintSets: 
        if count == 0:
            unionOfAllPairs = " SELECT t1.rowid as t1ctid ,t2.rowid as t2ctid FROM df t1,df t2 WHERE "
            unionOfAllTuples = " SELECT * FROM df t1,df t2 WHERE " 
        else : 
            unionOfAllPairs += " UNION SELECT t1.rowid as t1ctid ,t2.rowid as t2ctid FROM df t1,df t2 WHERE "
            unionOfAllTuples += " UNION SELECT * FROM df t1,df t2 WHERE "
            
        rep = {" ": "_", "&": " and ","not(":"",")":""} 
        rep = dict((re.escape(k), v) for k, v in rep.items()) 
        pattern = re.compile("|".join(rep.keys()))
        con1 = pattern.sub(lambda m: rep[re.escape(m.group(0))], con)     

        # in case the constraint refers to a single tuple
        if "t2" not in con: 
            con1 = re.sub(r'(t1.*?)t1', r'\1t2', con1, 1)
            unionOfAllPairs += con1 +" and t1.ROWID==t2.ROWID and ("+columnsT1+")"
            unionOfAllTuples += con1 +" and t1.ROWID==t2.ROWID and ("+columnsT1+")"
        else:
            unionOfAllPairs += con1 +" and t1.ROWID!=t2.ROWID and ("+columnsT1+" and "+columnsT2+")"
            unionOfAllTuples += con1 +" and t1.ROWID!=t2.ROWID and ("+columnsT1+" and "+columnsT2+")"
        count+=1
        
    return unionOfAllTuples,unionOfAllPairs,allColumns

<h4>Find all the iconsistencies in the database for a given set of constraints</h4>

In [84]:
def constraints_check(df,constraintSets, allColumns, unionOfAllTuples, unionOfAllPairs):
    """
    constraints_check - runs the dynamic queries that have been generated on the database.
    This function will run two queries:
    1. unionOfAllTuples - returns the ids of the tuples participating in a violation of the constraints.
    2. unionOfAllPairs - returns pairs (i1,i2) of ids of tuples that jointly violate the constraints.
    
    Parameters
    ----------
    constraintSets : set of strings
        each string represents a constraint from the dcs file
    unionOfAllTuples : string
    unionOfAllPairs : string    
    df : dataframe
        the database frame
        
    Returns
    -------
    list of two strings and two double variables:
        sdfcWithRep, sdfcNoRep are the results of the unionOfAllPairs and unionOfAllTuples queries, respectively.
        end1-start, end2-start2 are the running times the queries.
    """     
    
    # finds the pairs of tuples that jointly violate a constraint
    start = time.time()    
    violatingPairs =  psql.sqldf("SELECT DISTINCT * FROM (SELECT CASE WHEN t1ctid <= t2ctid THEN t1ctid ELSE t2ctid END AS id1,CASE WHEN t1ctid <= t2ctid THEN t2ctid ELSE t1ctid END AS id2 FROM ("+unionOfAllPairs+")AS A)AS B")
    end1 = time.time()
    
    # finds the tuples that participate in a violation
    start2 = time.time()
    violatingTuples = set()
    for pair in violatingPairs.values:
        for item in pair:
            violatingTuples.add(item)
    end2 = time.time()
    
    return violatingPairs, violatingTuples, end1-start, end2-start2

<h4>Functions for computing the measurments</h4>

In [85]:
def first_measurer_I_D(uniquePairsDf):
    """
    first_measurer_I_D: computes the drastic inconsistency measure I_d.
    This function checks whether the result of the query that finds the violating pairs of tuples is empty.
    In case it is empty ,the database is consistent. Otherwise, it is inconsistent.
    
    Parameters
    ----------
    uniquePairsDf : dataframe
        the result of the query that finds all pairs of tuples that jointly violate a constraint.
        
    Returns
    -------
    int
        0 if database is consistent, and 1 otherwise
    """  
    if len(uniquePairsDf):
        return 1
    return 0

def second_measurer_I_MI(uniquePairsDf):
    """
    second_measurer_I_MI: computes the measure I_MI that counts the minimal inconsistent subsets of the database.
    
    Parameters
    ----------
    uniquePairsDf : dataframe
        the result of the query that finds all pairs of tuples that jointly violate a constraint.
        
    Returns
    -------
    int
        number of pairs of tuples that jointly violate a constraint.
    """ 
    
    return len(uniquePairsDf)

def third_measurer_I_P(uniqueTuplesDf):
    """
    third_measurer_I_P: computes the measure I_P that counts the number of problematic tuples 
    (tuples participating in a violation of the constraints).
    
    Parameters
    ----------
    uniqueTuplesDf : dataframe
        the result of the query that finds all tuples that particiapte in a violation.
        
    Returns
    -------
    int
        number of tuples participating in a violation of the constraints.
    """ 
    
    return len(uniqueTuplesDf)

def fourth_measurer_I_R(uniquePairsDf):
    """
    fourth_measurer_I_R: computes the measure I_R that is based on the minimal number of tuples that should
    be removed from the database for the constraints to hold.
    The measure is computed via an ILP and the Gurobi optimizer is used to solve the ILP.
    
    - There is a binary variable x for every tuple in the database.
    - The constraints are of the form x + y >= 1 where x and y represent two tuples that jointly vioalte a constraint.
    - The objective function is to minimize the sum of all x's.
    
    Parameters
    ----------
    uniquePairsDf : dataframe
        the result of the query that finds all pairs of tuples that jointly violate a constraint.
        
    Returns
    -------
    list of two int variables:
        database_measurer.objVal is the minimal number of tuples that should be removed for the constraints to hold.
        end1 - start is the running time of the function.
    """ 
    
    start = time.time()
    rows_violations = uniquePairsDf.values
    varsDict2 = {}
    database_measurer = gp.Model('Minimal deletions of tuples')
    database_measurer.setParam('OutputFlag', 0)  # do not show any comments on the screen 
    
    # variables
    for i in rows_violations :
        varsDict2[i[0]] = database_measurer.addVar(vtype=GRB.BINARY, name="x")
        varsDict2[i[1]] = database_measurer.addVar(vtype=GRB.BINARY, name="x")
    
    # constraints
    for i in rows_violations :
        database_measurer.addConstr(varsDict2[i[0]]+varsDict2[i[1]]>=1, name='con')
    vars= []
    for i in varsDict2:
        vars.append(varsDict2[i])
        
    # objective function    
    database_measurer.setObjective(sum(vars), GRB.MINIMIZE)
    
    opt = database_measurer.optimize()
    end1 = time.time()
    return database_measurer.objVal , end1 - start

def fifth_measurer_I_lin_R(uniquePairsDf):
    """
    fifth_measurer_I_lin_R: computes the measure I^lin_R that is the linear relaxation of the ILP used for computing
    the measure I_R.
    
    - There is a variable x for every tuple in the database such that 0<=x<=1.
    - The constraints are of the form x + y >= 1 where x and y represent two tuples that jointly vioalte a constraint.
    - The objective function is to minimize the sum of all x's.
    
    Parameters
    ----------
    uniquePairsDf : dataframe
        the result of the query that finds all pairs of tuples that jointly violate a constraint.
        
    Returns
    -------
    list of two int variables:
        database_measurer.objVal is the result of the LP.
        end2 - start is the running time of the function.
    """ 
    
    start = time.time()
    rows_violations = uniquePairsDf.values
    varsDict2 = {}
    database_measurer = gp.Model('Minimal deletions of tuples relaxed')
    database_measurer.setParam('OutputFlag', 0)  # do not show any comments on the screen 
    
    # variables
    for i in rows_violations :
        varsDict2[i[0]] = database_measurer.addVar(lb=0, ub=1, vtype=GRB.CONTINUOUS, name="x")
        varsDict2[i[1]] = database_measurer.addVar(lb=0, ub=1, vtype=GRB.CONTINUOUS, name="x")
    
    # constraints
    for i in rows_violations :
        database_measurer.addConstr(varsDict2[i[0]]+varsDict2[i[1]]>=1, name='con')
    vars= []
    for i in varsDict2:
        vars.append(varsDict2[i])
    
    # objective function
    database_measurer.setObjective(sum(vars), GRB.MINIMIZE)
    
    opt = database_measurer.optimize()
    end2 = time.time()
    return database_measurer.objVal , end2 -start

def sixth_measurer_I_MC(fullPath, uniquePairsDf):
    """
    sixth_measurer_I_MC: computes the measure I_MC that counts the maximal consistent subsets (i.e., repairs),
    which are also the maximal independent sets of the conflict graph wherein nodes represent tuples
    and edges represent pairs of tuples that jointly violate a constraint.
    This function generates the complement of the conflict graph (where edges represent pairs of tuples that do not 
    jointly violate any constraint). Then, an algorithm for enumearing maximal cliques in a graph is invoked.

    Parameters
    ----------
    fullPath : string
        the path of the directory where the graph will be generated
    uniquePairsDf : dataframe
         the result of the query that finds all pairs of tuples that jointly violate a constraint.
        
    Returns
    -------
    list of two int variables:
        result_output is the number of maximal cliques the algorithm generated.
        end - start is the function running time of the function.
    """
    
    start = time.time()
    rows_violations = uniquePairsDf.values
    num_of_rows = len(df.index)

    varsDict = {}
    for i in range(num_of_rows):
        varsDict[i] = num_of_rows - 1 - numpy.count_nonzero(rows_violations == i+1) 
    
    # cart_prod contains all possible edges in the graph
    all_rows1 = list(range(0, num_of_rows)) 
    all_rows2 = list(range(0, num_of_rows)) 
    cart_prod = [(a,b,1) for a in all_rows1 for b in all_rows2]
    rows_violations = rows_violations - 1
    
    # for each pair that violates the constraints turn off the valid bit
    for i in rows_violations:
        lst = list(cart_prod[i[0]*num_of_rows+i[1]])
        lst[2] = 0
        cart_prod[i[0]*num_of_rows+i[1]] = tuple(lst)
        lst = list(cart_prod[i[1]*num_of_rows+i[0]])
        lst[2] = 0
        cart_prod[i[1]*num_of_rows+i[0]] = tuple(lst)
    
    graphFileName = fullPath + '/graph.nde'

    f = open(graphFileName, "w+")
    
    # construct the nodes with their degrees [degree = number of rows - 1 - number of appereances in rows_vioalations]
    f.write(str(num_of_rows))
    for k, v in varsDict.items():
        f.write('\n'+ str(k) + ' '+ str(v))
    
    # construct the edges 
    for i in cart_prod:
        if i[2] and i[0]!=i[1]:
            f.write('\n'+str(i[0]) + ' '+ str(i[1]))
            lst = list(cart_prod[i[1]*num_of_rows+i[0]])
            lst[2] = 0
            cart_prod[i[1]*num_of_rows+i[0]] = tuple(lst)
    
    f.close()    
    
    # locate the full path to the graph and text_ui
    buildFullPath = os.path.abspath("parallel_enum/build/text_ui")
    graphFullPath = os.path.abspath(graphFileName)
    
    # invoke the algorithm for enumerating maximal cliques with the graph as a parameter
    result = run(buildFullPath+' -system="clique" '+ graphFullPath,shell=True,capture_output=True)
    results = ""
    results = result.stdout
    result_output = int((str(results.split()[14]).replace('b',"").replace("'","")))
    
    end = time.time()
    return result_output, end - start 

<h4>Function to run measurments: </h4>

In [86]:
def runTest(testDirectoryPath, timesToRunTheTest, measuresToRun, singleIteration):
    """
    runTest - the main function that computes the measures specified by the user on the given database.
    
    If singleIteration is true, then all the measures will be computed once on the given database.
    
    Otherwise, the function will run a simulation that generates random violations in the given database, and
    computes, after each iteration (i.e., after each change in the database), the values of all the measures.

    Parameters
    ----------
    testDirectoryPath : string
        the name of the folder containing the database
    timesToRunTheTest : int
        if singleIteration is false, this is the number of iteration in the simulation.
    measuresToRun : dictionary
        a dictionary in which the measures are the keys and true/false are the values.
        The function will compute the measures for which the value is true.
    singleIteration : bool
        true if the measures should be computed once on the given database, and false for a simulation.
        
    Returns
    -------
    Generate a chart for each measure where the y axis is the value of the measure and the x axis is the 
    iteration number. The charts will be saved under the folder containing the database.
    
    The files "Running_Time.txt" and "All_results.txt" contain the average running time of each maasure and
    all the results of the execution, respectively.
    
    """
    global df
    # messages at start
    if not singleIteration:
        print('Test '+testDirectoryPath+' : running ' + str(timesToRunTheTest) + ' iterations; startTime:' + str(time.time()))
    else:
        print('Test '+testDirectoryPath+' ; startTime:' + str(time.time()))

    # constracting paths for the results     
    resultsDirectoryPath = '/' + str(time.time()) + '_results'
    fullPath = 'Data/'+ testDirectoryPath + resultsDirectoryPath
    if (not os.path.exists(fullPath)):
        os.makedirs(fullPath);
    runningTimesFileName = fullPath +'/Running_Time.txt'
    allResultsFileName = fullPath +'/All_results.txt'

    start = time.time()
    
    # load the csv file and generate a list of constraints
    df = pd.read_csv('Data/'+ testDirectoryPath + '/' + "inputDB.csv", keep_default_na=False, na_values=['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', 'N/A', 'n/a','', '#NA', 'NULL','null', 'NaN', '-NaN', 'nan', '-nan', ''] , header=0)
    constraints_raw = open('Data/'+ testDirectoryPath+'/dcs.txt', 'r')
    constraints = [line.strip() for line in constraints_raw.readlines()]
    constraints = [x.replace(' ', '_') for x in constraints] #in case the columns names include spaces

    pd.options.mode.chained_assignment = None 
    
    # in case the column names include spaces
    allColumns = {}
    for col in df.columns: 
        allColumns[col] = col.replace(' ','_')
    df = df.rename(columns=allColumns)
    
    # initializations
    exes,measurments1,measurments2,measurments3,measurments4,measurments5,measurments6 = [],[],[],[],[],[],[]
    sum2,sum3,sum4,sum5,sum6 = 0,0,0,0,0
    
    # construct the dynamic queries which will be used for detecting violations in the database
    allConstraints = build_dynamic_queries(constraints,df)
    allColumns = allConstraints[2]  
    
    # calculations for the first stage - the database should be consistent
    exes.append(0)
    sdfc = constraints_check(df,constraints, allColumns, allConstraints[0], allConstraints[1])
    if (measuresToRun["I_D"]):
        measurments1.append(first_measurer_I_D(sdfc[0]))
    if (measuresToRun["I_MI"]):
        measurments2.append(second_measurer_I_MI(sdfc[0]))
    if (measuresToRun["I_P"]):
        measurments3.append(third_measurer_I_P(sdfc[1]))
    if (measuresToRun["I_R"]):    
        measurments4.append(fourth_measurer_I_R(sdfc[0])[0])
    if (measuresToRun["I_lin_R"]): 
        measurments5.append(fifth_measurer_I_lin_R(sdfc[0])[0])
    if (measuresToRun["I_MC"]):
        measurments6.append(sixth_measurer_I_MC(fullPath, sdfc[0])[0])
    
    # progress bar
    f = IntProgress(min = 1, max = timesToRunTheTest,description='Computing...',bar_style='success')
    display(f)
    
    # in case the user wishes to run the violations algorithm and introduce random violations in the database    
    if not singleIteration:    
        for x in range(1, timesToRunTheTest):
            global t1,t2
            f.value += 1
            time.sleep(.1)
            
            # choose two tuples randomly
            sample = df.sample(n=2)
            t1 = sample.iloc[0]
            t2 = sample.iloc[1]
        
            # clean constraint from excessive chars
            constraintSetRaw = random.choice(constraints)
            constraintSet = constraintSetRaw[4:-1].split('&')
            constraintSet = [re.split('(!=|>=|<=|>|<|=)', i) for i in constraintSet]
            
            # in case the constraint refers to a single tuple
            if "t2" not in constraintSet:
                t2 = t1
                
            # generate violations using the fittingViolationAlgorithm in ViolationsAlgorithm.py
            t = vio.fittingViolationAlgorithm(constraintSet,df,t1,t2)
            vio.updateTable(df,t[0],t[1],sample)

            # calcuate the queries needed for the measures
            sdfc = constraints_check(df,constraints, allColumns, allConstraints[0], allConstraints[1])
            exes.append(x)

            if (measuresToRun["I_D"]):
                measurments1.append(first_measurer_I_D(sdfc[0]))

            if (measuresToRun["I_MI"]):
                measurments2.append(second_measurer_I_MI(sdfc[0]))
                sum2 += sdfc[2]

            if (measuresToRun["I_P"]):
                measurments3.append(third_measurer_I_P(sdfc[1]))
                sum3 += sdfc[3]

            if (measuresToRun["I_R"]):    
                res1 = fourth_measurer_I_R(sdfc[0])
                measurments4.append(res1[0])
                sum4 += res1[1]

            if (measuresToRun["I_lin_R"]): 
                res2 = fifth_measurer_I_lin_R(sdfc[0])
                measurments5.append(res2[0])
                sum5 += res2[1]

            if (measuresToRun["I_MC"]):
                res3 = sixth_measurer_I_MC(fullPath, sdfc[0])
                measurments6.append(res3[0])
                sum6 += res3[1]
    
    # messages at finish
    print('Test '+testDirectoryPath+' : runTime = ' + str(time.time()))
    print('Test '+testDirectoryPath+' finished, preparing the results.')
    
    f_times   = open(runningTimesFileName, "a+")
    f_results = open(allResultsFileName,"a+")
    
    if (measuresToRun["I_D"]):
        plt.scatter(exes, measurments1, c='r')
        plt.title('Drastic inconsistency value I_D:')
        plt.ylabel('results')
        plt.xlabel('number of changes')
        plt.savefig('Data/'+ testDirectoryPath + resultsDirectoryPath + '/I_D.jpg', dpi=300)
        plt.clf()

    if (measuresToRun["I_MI"]):
        plt.scatter(exes, measurments2, c='b')
        plt.title('Minimal inconsistent subsets of D I_MI:')
        plt.ylabel('results')
        plt.xlabel('number of changes')
        plt.savefig('Data/'+ testDirectoryPath + resultsDirectoryPath + '/I_MI.jpg', dpi=300) 
        f_times.write("AVG for I_MI: ")
        f_times.write(str(float(sum2/timesToRunTheTest)))
        f_results.write("I_MI results: ")
        f_results.write(str(measurments2))
        plt.clf()

    if (measuresToRun["I_P"]):
        plt.scatter(exes, measurments3, c='g')
        plt.title('Problematic facts I_P:')
        plt.ylabel('results')
        plt.savefig('Data/'+ testDirectoryPath + resultsDirectoryPath + '/I_P.jpg', dpi=300)
        f_times.write("\nAVG for I_P: ")
        f_times.write(str(float(sum3/timesToRunTheTest))) 
        f_results.write("\nI_P results: ")
        f_results.write(str(measurments3))
        plt.clf()

    if (measuresToRun["I_R"]):
        plt.scatter(exes, measurments4, c='y')
        plt.title('Minimal cost of a sequence of operations that repairs the database I_R:')
        plt.ylabel('results')
        plt.xlabel('number of changes')
        plt.savefig('Data/'+ testDirectoryPath + resultsDirectoryPath + '/I_R.jpg', dpi=300)  
        f_times.write("\nAVG for I_R: ")
        f_times.write(str(float((sum4+sum2)/timesToRunTheTest)))
        f_results.write("\nI_R results: ")
        f_results.write(str(measurments4))
        plt.clf()

    if (measuresToRun["I_lin_R"]):
        plt.scatter(exes, measurments5, c='pink')
        plt.title('Linear relaxation of the fourth measurer I_lin_R:')
        plt.ylabel('results')
        plt.xlabel('number of changes')
        plt.savefig('Data/'+ testDirectoryPath + resultsDirectoryPath + '/I_lin_R.jpg', dpi=300)  
        f_times.write("\nAVG for I_lin_R: ")
        f_times.write(str(float((sum5+sum2)/timesToRunTheTest)))
        f_results.write("\nI_lin_R results: ")
        f_results.write(str(measurments5))
        plt.clf()

    if (measuresToRun["I_MC"]):
        plt.scatter(exes, measurments6, c='purple')
        plt.title('Maximal cliques I_MC:')
        plt.ylabel('results')
        plt.xlabel('number of changes')
        plt.savefig('Data/'+ testDirectoryPath + resultsDirectoryPath + '/I_MC.jpg', dpi=300)  
        f_times.write("\nAVG for I_MC: ")
        f_times.write(str(float((sum6+sum2)/timesToRunTheTest)))
        f_results.write("\nI_MC results: ")
        f_results.write(str(measurments6))
        plt.clf()

    end = time.time()
    f_times.write("\ntotal time ")
    f_times.write(str(end - start))

    f_times.write("\n---\n")
    f_results.write("\n---\n")

    f_times.close()
    f_results.close()

    print('End of test '+testDirectoryPath + '; total time = ' + str(end - start))
    print('\033[1m'+"Computation finished, outputs can be found in "+'Data/'+ testDirectoryPath + resultsDirectoryPath +'\n \033[0m')

In [87]:
def runMeasurers(databasesNamesToRun,IterationsNum,measurments,selected_data,singleIteration):
    """
    runMeasurers - processes the data obtained by HelloNewUser and runs the function runTest
    on each of the databases specified by the user.

    Parameters
    ----------
    databasesNamesToRun : list of strings
        list of the databases names which the user specified
    IterationsNum : int
        if singleIteration is false, this is the number of iteration in the simulation.
    measurments : dictionary
        a dictionary in which the measures are the keys and the values are boolean.
    selected_data : list of strings
        a list of the measurments which were chosen by the user    
    singleIteration : bool
        true if the measures should be computed once on the given database, and false for a simulation.
        
    Returns
    -------
    none
    """ 

    databasesNamesToRun = re.split(',',databasesNamesToRun)
    for n in databasesNamesToRun:
        n = n.replace("'","")
    
    validMeasurments = []
    for m in selected_data:
        measurments[m] = True;
        validMeasurments.append(m)
            
    print('---')
    print('Starting tests ' + str(databasesNamesToRun) +' from database inputDB.csv \nwith the following measurers: '+ str(validMeasurments)+ '; iterationsNum = ' + str(IterationsNum))
    print('---')

    for testName in databasesNamesToRun:
        runTest(testName, int(IterationsNum), measurments, singleIteration)
        

In [88]:
def HelloNewUser():
    """
    HelloNewUser - receives the input from the user and processes it in order to call the 
    runMeasurers function with the parameters the user has specified 

    Parameters
    ----------
    none
        
    Returns
    -------
    none
    """
    
    #defaults
    singleIteration = True
    IterationsNum = "1"
    
    print('\033[1m'+"Welcome to the inconsistency measurer\n\n"+'\033[0m')
    
    databasesNamesToRun = input("Please specify the databases you wish to compute the measures on (seperated by ',')\nThe databases included are: Adult,Airport,Flight,Food,Hospital,Stock,Tax,Voters\n")
    
    print("\nDo you wish to run a simulation that introduces random violations in the database? [y/n]")
    default2 = widgets.HTML(value="<i>Default value is: y</i>")
    display(default2)
    violations = input()
    
    # in case the user wishes to run the violations algorithm 
    if violations == "y" or violations != "n":
        singleIteration = False
        print("\nPlease specify the number of iterations of the simulation")
        default1 = widgets.HTML(value="<i>Default value is: 100</i>")
        display(default1)
        IterationsNum = int(input() or "100")
        
    # each checked measurer will appear in the measurments list as 'True'
    print("\nPlease choose the measures you wish to compute: ")
    measurments = {"I_D":False, "I_MI":False, "I_P":False, "I_R":False, "I_lin_R":False, "I_MC":False}
    names = []
    checkbox_objects = []
    
    for key in measurments:
        checkbox_objects.append(widgets.Checkbox(value=False, description=key))
        names.append(key)

    arg_dict = {names[i]: checkbox for i, checkbox in enumerate(checkbox_objects)}

    ui = widgets.VBox(children=checkbox_objects)
    selected_data = []
    def select_data(**kwargs):
        selected_data.clear()
        for key in kwargs:
            if kwargs[key] is True:
                selected_data.append(key)
                
    out = widgets.interactive_output(select_data, arg_dict)
    display(ui, out)
    
    # messages in case the user wishes to run the I_MC measurer
    print('\033[1m'+"\nThis message only applies in case you wish to compute the I_MC measure "+'\033[0m')
    print("\nPlease make sure :")
    print("1. To build the parallel_enum project inside the current folder")
    print("2. that the file text_ui created by parallel_enum is located in /parallel_enum/build/")
    print('\033[91m'+'For additional instructions regarding the parallel_enum algorithm, please visit: '+'\033[0m')
    link = widgets.HTML(value="<a style='color:red;' target='_blank' rel='noopener noreferrer' href='https://github.com/veluca93/parallel_enum'>parallel_enum github</a>")
    display(link)
    
    # once the button is clicked the function of runMeasurers is called and the users input will be passed on
    button = widgets.Button(description='Proceed',disabled=False,button_style='success',tooltip='Click me',icon='check')
    output = widgets.Output()
    display(button, output)
    
    def on_button_clicked(b): 
        with output:
            runMeasurers(databasesNamesToRun,IterationsNum,measurments,selected_data,singleIteration)

    button.on_click(on_button_clicked)

In [89]:
HelloNewUser()

[1mWelcome to the inconsistency measurer

[0m


Please specify the databases you wish to compute the measures on (seperated by ',')
The databases included are: Adult,Airport,Flight,Food,Hospital,Stock,Tax,Voters
 Stock



Do you wish to run a simulation that introduces random violations in the database? [y/n]


HTML(value='<i>Default value is: y</i>')

 



Please specify the number of iterations of the simulation


HTML(value='<i>Default value is: 100</i>')

 5



Please choose the measures you wish to compute: 


VBox(children=(Checkbox(value=False, description='I_D'), Checkbox(value=False, description='I_MI'), Checkbox(v…

Output()

[1m
This message only applies in case you wish to compute the I_MC measure [0m

Please make sure :
1. To build the parallel_enum project inside the current folder
2. that the file text_ui created by parallel_enum is located in /parallel_enum/build/
[91mFor additional instructions regarding the parallel_enum algorithm, please visit: [0m


HTML(value="<a style='color:red;' target='_blank' rel='noopener noreferrer' href='https://github.com/veluca93/…

Button(button_style='success', description='Proceed', icon='check', style=ButtonStyle(), tooltip='Click me')

Output()