In [19]:
import operator

operator_dict = {
    "<" : operator.lt,
    "<=" : operator.le,
    "=" : operator.eq,
    "!=" : operator.ne,
    ">=" : operator.ge,
    ">" : operator.gt,
}

operator_inv_dict = {
    ">=" : operator.lt,
    ">" : operator.le,
    "!=" : operator.eq,
    "=" : operator.ne,
    "<" : operator.ge,
    "<=" : operator.gt,
}
type_dict = {
    "int" : int,
    "str" : str,
}


class Predicate:
    #Predicates contain an attribute (here a position so a number)
    #and an operator. See operator_dict for all possible operator.
    def __init__(self, attributePos, operator, attributeType):
        self.attribute = attributePos #int
        self.operator = operator #string
        self.attributeType = attributeType
    #return true if (row1,row2) does satisfy the predicates
    def is_true(self,row1,row2):
        return (operator_dict[self.operator](type_dict[self.attributeType](row1[self.attribute]),type_dict[self.attributeType](row2[self.attribute])))
    
    def __str__(self):
        return "(" + str(self.attribute) + "," +str(self.__dict__) + ")"

    def __eq__(self, other): 
        return self.__dict__ == other.__dict__

    
class DC:
     #A denial constraint is a conjunction of multiple predicates.
    #They cannot be all true at the same time
    
    #predicates should be none or a list of predicates!
    def __init__(self):
        self.predicates = []
    
    def add(self,pred):
        self.predicates.append(pred)
        
    def __str__(self):
        stringreturn = "{"
        for pred in self.predicates:
            stringreturn = stringreturn + str(pred) + ","
        return stringreturn + "}"
    
    def remove(self,predicate):
        for pred in self.predicates:
            if pred==predicate:
                del pred
    
    def size(self):
        return len(self.predicates)
    
    #return true if at least one of the predicates is false    
    def is_satisfy(self,row1,row2):
        for predicate in self.predicates:
            if not(predicate.is_true(row1,row2)):
                return True
        return False
    
    def get_violations(self,dataBase):
        viol_set =[]
        print("start")
        for i in range(len(dataBase)):
            for j in range (len(dataBase)):
                if (i != j and not(self.is_satisfy(dataBase[i],dataBase[j]))):
#                    print("here")
#                    print( str(i) + " " + str(j))
                    tupleviol = (i,j)         
                    viol_set.append(tupleviol)            
        return viol_set
    
class Sigma:
    #Set of DC
    #Correspond to Sigma in theory
    
    def __init__(self,DCs = []):
        self.DCs =  DCs
        
    #return true if at least one of the DC is not satisfy  
    def is_satisfy(self,row1,row2):
        for DC in self.DCs:
            if not(DC.is_satisfy(row1,row2)):
                return True
        return False
    
    def add(self,DC):
        self.DCs.append(DC)
        
    def remove(self,DC1):
        for DC2 in self.DCs:
            if DC1==DC2:
                del DC2
    
    def size(self):
        return len(self.DCs)

In [20]:
#Read file data.txt
file = open("Data.txt","r")
data = file.readlines()
columnName = data[0]
del data[0]
columnType = data[0]
columnType = columnType.split()
del data[0]
print(columnType)
print(columnType[4])
# now data is a list of list: each list in data is a row of the database
for i in range (len(data)):
    data[i] =  data[i].split()

print("Data size = " + str(len(data)))

['string', 'string', 'string', 'int', 'int', 'int']
int
Data size = 10


In [21]:
#Test some predicates
pred1 = Predicate(4,">",columnType[4])
print(pred1.is_true(data[0],data[1])==False)
print(pred1.is_true(data[1],data[0])==True)
pred2 = Predicate(5,"<",columnType[5])
print(pred2.is_true(data[0],data[1])==False)
print(pred2.is_true(data[1],data[0])==False)

<class 'int'>
True
<class 'int'>
True
<class 'int'>
True
<class 'int'>
True


In [22]:
#Test some DC

den1 = DC()

den1.add(pred1)
print(den1.is_satisfy(data[0],data[1]) == True) 
print(den1.is_satisfy(data[1],data[0]) == False) 
print(den1.size())

den1.add(pred2)
print(den1.is_satisfy(data[0],data[1])==True) 
print(den1.is_satisfy(data[1],data[0])==True)
print(den1.size())

print(den1.is_satisfy(data[4],data[3])==False) 
print(data[3])
print(data[4])
print("____")
print(pred1.is_true(data[4],data[3]))
print(pred2.is_true(data[4],data[3]))
print("____")
print(den1.get_violations(data))
den2=DC()
den2.add(pred1)
den2.add(pred2)
print(den2)

<class 'int'>
True
<class 'int'>
True
1
<class 'int'>
True
<class 'int'>
<class 'int'>
True
2
<class 'int'>
<class 'int'>
True
['Stanley', '13-8-1987', '868-701', '2007', '23', '3']
['Stanley', '31-7-1983', '***-198', '2007', '24', '0']
____
<class 'int'>
True
<class 'int'>
True
____
start
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'i

In [24]:
print(den1.is_satisfy(data[0],data[7])) 
print(pred1.is_true(data[0],data[7]))
print(pred2.is_true(data[0],data[7]))
print(den1.predicates[0])

<class 'int'>
True
<class 'int'>
False
<class 'int'>
True
(4,{'attribute': 4, 'operator': '>', 'attributeType': 'int'})
