# 0. Table - EDB

## Table 1: Product

### Col 1: item

### Col 2: price

## Table 2: Product

### Col 1: item

### Col 2: country

In [422]:
productCol = ["Product","Price"]
madeinCol = ["Product", "Country"]

In [423]:
tables = ["madein", "price"]

In [424]:
tabCol = {tables[0]:madeinCol, tables[1]:productCol}

In [425]:
countries = ["germany","belgium","uk","china","usa","japan"]
prices = ["5","6","10"]

In [426]:
consDict = {"Country":countries,  "Price": prices}

In [427]:
consDict

{'Country': ['germany', 'belgium', 'uk', 'china', 'usa', 'japan'],
 'Price': ['5', '6', '10']}

# 1. From Datalog to Sql

## Datalog Programs Predicates

## $p = \{p_1,p_2,p_3\}$

### $p_1:$ good_product

- Made in Germany or Belgium

### $p_2:$ bad_product

- Not made in Germany, Belgium or UK

### $p_3:$ perfect_product

- Price < 5 and not a bad product

## Datalog Program P

In [428]:
datalog_goodProd = "good_product(Product):-madein(Product,germany).good_product(Product):-madein(Product,belgium)."

datalog_badProd = "bad_product(Product):-madein(Product,Country), Country!=germany, Country!=belgium, Country!=uk."

datalog_perfProd = "perfect_product(Product):-good_product(Product), not bad_product(Product), price(Product,Price), Price<5."

In [429]:
datalog_goodProd

'good_product(Product):-madein(Product,germany).good_product(Product):-madein(Product,belgium).'

In [430]:
datalog_badProd

'bad_product(Product):-madein(Product,Country), Country!=germany, Country!=belgium, Country!=uk.'

In [431]:
datalog_perfProd

'perfect_product(Product):-good_product(Product), not bad_product(Product), price(Product,Price), Price<5.'

In [432]:
import re

In [433]:
sep1 = '.'
sep2 = ':-'
sep3 = ', '
cmps = ["!=",">","<","<=",">=","=="]

In [434]:
datalog_goodProd
datalog_badProd
datalog_perfProd

'perfect_product(Product):-good_product(Product), not bad_product(Product), price(Product,Price), Price<5.'

In [435]:
def getClauses(datalog_query):
    return [item for item in datalog_query.split(sep1) if item != '']

In [436]:
def getHeads(datalog_query):
    clauses = getClauses(datalog_query)
    return [clause.split(sep2)[0] for clause in clauses]

In [437]:
def getAllPredicates(datalog_query):
    return [head.split('(')[0] for head in getHeads(datalog_query)]

In [438]:
def getVariablesInHeads(datalog_query):
    heads = getHeads(datalog_query)
    query = []
    for head in heads:
        query += [q.strip() for q in head[head.find("(")+1:head.find(")")].split(',')]
    return query

In [439]:
def getBodies(datalog_query):
    clauses = getClauses(datalog_query)
    return [clause.split(sep2)[1] for clause in clauses]

In [440]:
def getHBDict(datalog_query):
    result = {}
    clauses = getClauses(datalog_query)
    i = 1
    for clause in clauses:
        result["Clause"+str(i)] = clause.split(sep2)[0]+sep2+clause.split(sep2)[1]
        i += 1
    return result

In [441]:
def getTotalPredicates(datalog_query):
    result = []
    for body in getBodies(datalog_query):
        for term in body.split(', '):
            result.append(term)
    for head in getHeads(datalog_query):
        result.append(head)
    return result

In [442]:
def getLiteralHeadDict(datalog_query):
    bH = getHBDict(datalog_query)
    result = {}
    for key in bH.keys():
        if len(key.split(sep3))>1:
            for term in key.split(sep3):
                result[term] = bH[key]
        else:
            result[key.split(sep3)[0]] = bH[key]
    return result

In [443]:
def getVariablesInBodies(datalog_query):
    query = []
    for body in getHBDict(datalog_query).keys():
        for term in body.split(', '):
            if term.find("(") != -1:
                query += [q.strip() for q in term[term.find("(")+1:term.find(")")].split(',')]
            else:
                for cmp in cmps:
                    if term.find(cmp) != -1:
                        query.append(term.split(cmp)[0])
                        query.append(term.split(cmp)[1])
    return query

In [444]:
def getVariablesInClauses(datalog_query):
    query = []
    for body in getHBDict(datalog_query).keys():
        for term in body.split(', '):
            if term.find("(") != -1:
                query += [q.strip() for q in term[term.find("(")+1:term.find(")")].split(',')]
            else:
                for cmp in cmps:
                    if term.find(cmp) != -1:
                        query.append(term.split(cmp)[0])
                        query.append(term.split(cmp)[1])
    for body in getHBDict(datalog_query).values():
        for term in body.split(', '):
            if term.find("(") != -1:
                query += [q.strip() for q in term[term.find("(")+1:term.find(")")].split(',')]
            else:
                for cmp in cmps:
                    if term.find(cmp) != -1:
                        query.append(term.split(cmp)[0])
                        query.append(term.split(cmp)[1])
    return query

In [445]:
def getPVDict(datalog_query):
    predConsDict = {}
    i = 1
    for var in getVariablesInClauses(datalog_query):
        for ls in consDict.values():
            if var in ls:
                if var in predConsDict.keys():
                    continue
                else:
                    predConsDict[var] = 'P'+str(i)
                    i = i + 1
    return predConsDict

In [446]:
inp = datalog_goodProd+datalog_badProd+datalog_perfProd

In [447]:
inp

'good_product(Product):-madein(Product,germany).good_product(Product):-madein(Product,belgium).bad_product(Product):-madein(Product,Country), Country!=germany, Country!=belgium, Country!=uk.perfect_product(Product):-good_product(Product), not bad_product(Product), price(Product,Price), Price<5.'

## Step 1. Replace Constant with variables and add new Predicates

In [448]:
pvDict = getPVDict(inp)

In [449]:
pvDict

{'5': 'P4', 'belgium': 'P2', 'germany': 'P1', 'uk': 'P3'}

In [450]:
def newPredicates4Constants(pvDict):
    """
    :type pvDict, predicate-constant pair get in 1st step: dict
    :rtype newPredicates: list
    """
    newPredicates = []
    for key in pvDict.keys():
        newPredicates.append(pvDict[key]+'='+'{('+key+')};')   
    return newPredicates

In [451]:
newPredicates = newPredicates4Constants(pvDict)

In [452]:
newPredicates

['P1={(germany)};', 'P2={(belgium)};', 'P3={(uk)};', 'P4={(5)};']

In [453]:
bH = getHBDict(inp)

In [454]:
def replaceConstantInClauses(consDict, bH):
    """
    :type pvDict, predicate-constant pair get in 1st step: dict
    :rtype newPredicates: list
    """
    consts = []
    for key in consDict.keys():
        consts+=consDict[key]
    
    newDict = {}
    for key in bH.keys():
        i = 1
        clause = bH[key]
        for const in consts:     
            if clause.find(const) != -1:
                variable = "v" + str(i)
                clause = clause.replace(const,variable)
                add = ", " + pvDict[const] + "(" + variable + ")"
                temp = clause + add
                i = i + 1
        newDict[key] = temp
    return newDict

In [455]:
newDict 

{'Clause1': 'good_product(Product):-madein(Product,v1), P1(v1)',
 'Clause2': 'good_product(Product):-madein(Product,v1), P2(v1)',
 'Clause3': 'bad_product(Product):-madein(Product,Country), Country!=v1, Country!=v2, Country!=v3, P3(v3)',
 'Clause4': 'perfect_product(Product):-good_product(Product), not bad_product(Product), price(Product,Price), Price<v1, P4(v1)'}

## Step 2. Replace Equation

## Step 3. Combine Multiple-Clause Predicate Definitions

In [456]:
def comBineMulCalPreds(pvDict, newDict):
    """
    :type pvDict, predicate-constant pair get in 1st step: dict
    :type newDict, clauses filterd after previous steps: dict
    :rtype newClause: dict
    """
    newPredStart = len(pvDict)
    # Find duplicate predicate
    predCount = {}
    duplicatePred = []
    for key in newDict.keys():
        a = newDict[key].split(':')[0]
        b = a.split('(')[0]
        if b in predCount:
            duplicatePred.append(b)
            predCount[b] += 1
        else:
            predCount[b] = 1
    
    i = len(pvDict) + 1
    newPredicate3 = {}
    for key in newDict.keys():
        a = newDict[key].split(':')[0]
        b = a.split('(')[0]
        newPredicate3[b] = ''
    
    for key in newDict.keys():
        a = newDict[key].split(':')[0]
        b = a.split('(')[0]
        if predCount[b] > 1:
            newDict[key] = newDict[key].replace(b,'P'+str(i))
            newPredicate3[b] += 'P'+str(i)+' logicAnd '
            i += 1
            
    newPredicate3Cleard = {}
    for key in newPredicate3.keys():
        if newPredicate3[key] != '':
            newPredicate3Cleard[key] = newPredicate3[key].strip('logicAnd ')
    
    return newPredicate3Cleard

In [457]:
newDict

{'Clause1': 'good_product(Product):-madein(Product,v1), P1(v1)',
 'Clause2': 'good_product(Product):-madein(Product,v1), P2(v1)',
 'Clause3': 'bad_product(Product):-madein(Product,Country), Country!=v1, Country!=v2, Country!=v3, P3(v3)',
 'Clause4': 'perfect_product(Product):-good_product(Product), not bad_product(Product), price(Product,Price), Price<v1, P4(v1)'}

In [458]:
pvDict

{'5': 'P4', 'belgium': 'P2', 'germany': 'P1', 'uk': 'P3'}

In [459]:
newPredStart = len(pvDict)

In [460]:
a = comBineMulCalPreds(pvDict, newDict)

In [461]:
newDict

{'Clause1': 'P5(Product):-madein(Product,v1), P1(v1)',
 'Clause2': 'P6(Product):-madein(Product,v1), P2(v1)',
 'Clause3': 'bad_product(Product):-madein(Product,Country), Country!=v1, Country!=v2, Country!=v3, P3(v3)',
 'Clause4': 'perfect_product(Product):-good_product(Product), not bad_product(Product), price(Product,Price), Price<v1, P4(v1)'}

In [463]:
newPredicates

['P1={(germany)};', 'P2={(belgium)};', 'P3={(uk)};', 'P4={(5)};']

## Step 4. Rename variables which appears multiple times in a clauses

### Get variables for each clause

In [818]:
def renameVariableInClauses(newDict):
    results = ''
    # Count variables in head
    for key in newDict.keys():
        head = newDict[key].split(':-')[0]
        body = newDict[key].split(':-')[1]
        # get each term in body
        predicates = body.split(', ')
    
        bvPreds = [] 
        bvEqs = []
        bvNegPreds = []
        for predicate in predicates:
            if predicate.find("not ") != -1:
                bvNegPreds.append(predicate)
            elif predicate.find("(") != -1:
                bvPreds.append(predicate)
            else:
                bvEqs.append(predicate)
    
        # First, deal with the variables in positive predicate
        i = 1
        btDict = {}
        for bvPred in bvPreds:
            terms = bvPred[bvPred.find("(")+1:bvPred.find(")")].split(',')
            terms = [term for term in terms if term != '']
            for term in terms:
                btDict['x'+str(i)] = term
                i += 1
        # Generate additional predicate
        a = {}
        newEq = ''
        for item in btDict.items():
            if item[1] in a:
                newEq += a[item[1]] + '==' + item[0] + ', '
            else:
                a[item[1]] = item[0]
    
        newBody = ''
        btDict4Mapping = btDict
        # use an other variable when same term appears in another prediacate
        usedVars = []
        for bvPred in bvPreds:
            newPred = bvPred
            for variable in btDict4Mapping.keys():
                if newPred.find(btDict4Mapping[variable]) != -1:
                    if btDict4Mapping[variable] not in varCount and variable not in usedVars:
                        newPred = newPred.replace(btDict4Mapping[variable],variable)
                        usedVars.append(variable)
            newBody += newPred +  ', '
        newBody = newBody + newEq
    
        # Second, deal with varibles in Eq
        # Create additional variables in pre-defined predicates
        
        # List Variables in pre-defined predicates
        varInIneq = []
        for bvEq in bvEqs:
            for cmp in cmps:
                if bvEq.find(cmp) != -1:
                    varInIneq.extend(bvEq.split(cmp))
        varInIneq = sorted(list(set(varInIneq)))
        newVarFlag = len(btDict)
    
        # Create dictionaries for new variables
        varDict4Ineq = {}
        for variable in varInIneq:
            if variable not in btDict.values():
                varDict4Ineq['x'+str(newVarFlag+1)] = variable
                newVarFlag += 1
        
        btDict = MergeDicts(btDict, varDict4Ineq)
        newbvEqs = []
        for bvEq in bvEqs:
            newbvEq = bvEq
            for variable,bt in btDict.items():
                if newbvEq.find(bt) != -1:
                    newbvEq = newbvEq.replace(bt, variable)
            newbvEqs.append(newbvEq)
    
        for newbvEq in newbvEqs:
            newBody += newbvEq + ', '
    
        # Third, deal with variables in negated predicate
        newbvNegPreds = [] 
        for bvNegPred in bvNegPreds:
            newbvNegPred = bvNegPred
            for variable,bt in btDict.items():
                if bvNegPred.find(bt) != -1:
                    newbvNegPred = newbvNegPred.replace(bt, variable)
            newbvNegPreds.append(newbvNegPred)
    
        for newbvNegPred in newbvNegPreds:
            newBody += newbvNegPred + ', '
    
        newBody = newBody.strip(', ')
    
        newHead = head
        for variable in btDict.keys():
            if head.find(btDict[variable]) != -1:
                newHead = newHead.replace(btDict[variable],variable)
    
        newClause = newHead+":-"+newBody
        results += newClause + '.'
    return results

In [819]:
newDict1 = newDict

In [820]:
rr = renameVariableInClauses(newDict1)

In [824]:
rr.split('.')

['P5(x1):-madein(x1,x2), P1(x3), x2==x3',
 'P6(x1):-madein(x1,x2), P2(x3), x2==x3',
 'bad_product(x1):-madein(x1,x2), P3(x3), x2!=x4, x2!=x5, x2!=x3',
 'perfect_product(x1):-good_product(x1), price(x2,x3), P4(x4), x1==x2, x3<x4, not bad_product(x1)']

In [822]:
newDict

{'Clause1': 'P5(Product):-madein(Product,v1), P1(v1)',
 'Clause2': 'P6(Product):-madein(Product,v1), P2(v1)',
 'Clause3': 'bad_product(Product):-madein(Product,Country), Country!=v1, Country!=v2, Country!=v3, P3(v3)',
 'Clause4': 'perfect_product(Product):-good_product(Product), not bad_product(Product), price(Product,Price), Price<v1, P4(v1)'}