# 1. Assumptions

In [478]:
tokens = ['.', ':- ', ', ']

|Token Type|Description|Examples|
|:-|:-|:-|
|COMMA|The ',' character|,|
|PERIOD|The '.' character|.|
|Q_MARK|The '?' character|?|
|LEFT_PAREN|The '(' character|(|
|RIGHT_PAREN|The ')' character|)|
|COLON|The ':' character|	:|
|COLON_DASH|The string “:-”|:-|
|MULTIPLY|The '*' character|*|
|ADD|The '+' character|+|
|SCHEMES|The string “Schemes”|Schemes|
|FACTS|The string “Facts”|Facts|
|RULES|The string “Rules”|Rules|
|QUERIES|The string “Queries”|Queries|

In [479]:
def getClauses(datalog_query):
    '''
    Description: Seperate each clause by character '.'
    type string: datalog_query
    rtype list
    '''
    return [item for item in datalog_query.split('.') if item != '']

In [480]:
def seperateHeadBody(clause):
    '''
    Description: get head and body for each clause
    type string: clauses
    rtype list
    '''
    head = clause.split(':-')[0]
    body = clause.split(':-')[1]
    return head,body

In [481]:
def getTermsInParens(a):
    '''
    Description: get each term in parentheses
    type string: a
    rtype list: results
    '''
    results = []
    if a.find('(') != -1 and a.find(')') != -1:
        terms = a[a.find("(")+1:a.find(")")].split(',')
        for term in terms:
            results.append(term)
    return results

def getColsFromHead(head):
    cols = getTermsInParens(head)
    return cols

In [482]:
def getPredPerTerm(term):
    predicate = ''
    if term.find('(') != -1 and term.find(')') != -1:
        predicate = term.split('(')[0]
    return predicate

def getTabsPerBody(body):
    tabs = []
    bodyLiterals = body.split(', ')
    for bodyLiteral in bodyLiterals:
        if bodyLiteral.find('(') != -1 and bodyLiteral.find(')') != -1:
            predicate = bodyLiteral.split('(')[0]
            tabs.append(predicate)
    return tabs

In [483]:
class Node(object):
    def __init__(self, data):
        self.data = data
        self.children = []

    def add_child(self, obj):
        self.children.append(obj)

In [484]:
def parseDatalog2Tree(clauses):
    '''
    type list[string]: clauses
    rtype Node: n
    '''
    n = Node(seperateHeadBody(clauses[0])[0])
    for literal in seperateHeadBody(clauses[0])[1].split(', '):
        n.add_child(literal)
    return n

In [485]:
def mapFactSchema(factList, inputSchemas):
    '''
    Description: make a dictionary to contain the fact schama and corresponds fact
    type list: factList
    type list: inputSchema
    '''
    results = {}
    for fact in factList:
        pred = getPredPerTerm(fact)
        for schema in inputSchemas:
            if schema.find(pred) != -1:
                for i in range(len(getTermsInParens(schema))):
                    key = pred + "." + getTermsInParens(schema)[i]
                    if key in results:
                        results[key].append(getTermsInParens(fact)[i])
                    else:
                        results[key] = [getTermsInParens(fact)[i]]
    return results

In [486]:
def getSchemaDict(inputSchemas):
    '''
    Description: Create a dictionary for schemas
    type list: inputSchemas
    rtype dictionary
    '''
    schemaDict = {}
    for schema in inputSchemas:
        pred = getPredPerTerm(schema)
        if pred in schemaDict:  
            schemaDict[pred] += getTermsInParens(schema)
        else:
            schemaDict[pred] = getTermsInParens(schema)
    return schemaDict

In [497]:
def translateDatalog2Sql(n, factList, inputSchemas):
    '''
    type Node: n
    type list: inputSchemas
    type list: factList
    rtype sqlQuery: string
    '''
    schemaDict = getSchemaDict(inputSchemas)
    factSchemaDict = mapFactSchema(factList, inputSchemas)
    sqlQuery = ''
    sqlQueryFrom = 'From '
    sqlQueryWhere = 'Where '
    sqlQuerySelect = 'Select '
    duplicateTerms = {}
    head = getPredPerTerm(n.data)
    sqlQueryView = 'Create View ' + head + ' as('
    headTerms = getTermsInParens(n.data)
    colNums = len(headTerms)
    i = 1
    for children in n.children:
        tab = getPredPerTerm(children)
        sqlQueryFrom += tab + ', '
        terms = getTermsInParens(children)
        # Update where clause
        for termPos in range(len(terms)):
            termCol = tab + '.' + terms[termPos]
            if termCol not in factSchemaDict:
                ### Need to edit
                sqlQueryWhere += tab + '.' + schemaDict[tab][termPos] + '=' + terms[termPos]  +" and "
            if terms[termPos] not in duplicateTerms:
                duplicateTerms[terms[termPos]] = termCol
            else:
                sqlQueryWhere += duplicateTerms[terms[termPos]] + "=" + termCol + " and "
        # Update select clause
        for term in terms:        
            if term in headTerms and i <= 2:
                sqlQuerySelect += termCol + ", "
                i += 1

    sqlQuery += sqlQueryView + "\n" + sqlQueryFrom.strip(', ') + "\n" + sqlQueryWhere.strip(' and ') + "\n" + sqlQuerySelect.strip(', ') + ')'
    return sqlQuery

In [498]:
inp = 'r1(a,b):-fact1(x,a), fact2(20,y), fact3(y,b).'

In [499]:
clauses = getClauses(inp)

In [501]:
factList =['fact1(2,1)','fact1(2,3)','fact1(2,4)','fact2(20,10)','fact2(20,30)','fact2(20,40)', 'fact3(22,21)','fact3(22,23)','fact3(22,24)']

In [502]:
inputSchemas = ['fact1(x,a)', 'fact2(a,y)', 'fact3(y,b)']

In [503]:
n = parseDatalog2Tree(clauses)

In [504]:
sqlQuery = translateDatalog2Sql(n, factList, inputSchemas)

In [505]:
print(sqlQuery)

Create View r1 as(
From fact1, fact2, fact3
Where fact2.a=20 and fact2.y=fact3.y
Select fact1.a, fact3.b)
