# 1. Assumptions

In [1]:
tokens = ['.', ':- ', ', ']

|Token Type|Description|Examples|
|:-|:-|:-|
|COMMA|The ',' character|,|
|PERIOD|The '.' character|.|
|Q_MARK|The '?' character|?|
|LEFT_PAREN|The '(' character|(|
|RIGHT_PAREN|The ')' character|)|
|COLON|The ':' character|	:|
|COLON_DASH|The string “:-”|:-|
|MULTIPLY|The '*' character|*|
|ADD|The '+' character|+|
|SCHEMES|The string “Schemes”|Schemes|
|FACTS|The string “Facts”|Facts|
|RULES|The string “Rules”|Rules|
|QUERIES|The string “Queries”|Queries|

In [1]:
def getClauses(datalog_query):
    '''
    Description: Seperate each clause by character '.'
    type string: datalog_query
    rtype list
    '''
    return [item for item in datalog_query.split('.') if item != '']

In [2]:
def seperateHeadBody(clause):
    '''
    Description: get head and body for each clause
    type string: clauses
    rtype list
    '''
    head = clause.split(':-')[0]
    body = clause.split(':-')[1]
    return head,body

In [3]:
def getTermsInParens(a):
    '''
    Description: get each term in parentheses
    type string: a
    rtype list: results
    '''
    results = []
    if a.find('(') != -1 and a.find(')') != -1:
        terms = a[a.find("(")+1:a.find(")")].split(',')
        for term in terms:
            results.append(term)
    return results

def getColsFromHead(head):
    cols = getTermsInParens(head)
    return cols

In [4]:
def getPredPerTerm(term):
    predicate = ''
    if term.find('(') != -1 and term.find(')') != -1:
        predicate = term.split('(')[0]
    return predicate

def getTabsPerBody(body):
    tabs = []
    bodyLiterals = body.split(', ')
    for bodyLiteral in bodyLiterals:
        if bodyLiteral.find('(') != -1 and bodyLiteral.find(')') != -1:
            predicate = bodyLiteral.split('(')[0]
            tabs.append(predicate)
    return tabs

In [5]:
class Node(object):
    def __init__(self, data):
        self.data = data
        self.children = []

    def add_child(self, obj):
        self.children.append(obj)

In [6]:
def parseDatalog2Tree(clauses):
    '''
    type list[string]: clauses
    rtype Node: n
    '''
    n = Node(seperateHeadBody(clauses[0])[0])
    for literal in seperateHeadBody(clauses[0])[1].split(', '):
        n.add_child(literal)
    return n

In [7]:
def translateDatalog2Sql(n):
    '''
    type Node: n
    rtype sqlQuery: string
    '''
    sqlQuery = ''
    sqlQueryFrom = 'From '
    sqlQueryWhere = 'Where '
    sqlQuerySelect = 'Select '
    duplicateTerms = {}
    head = getPredPerTerm(n.data)
    sqlQueryView = 'Create View ' + head + ' as('
    headTerms = getTermsInParens(n.data)
    colNums = len(headTerms)
    i = 1
    for children in n.children:
        tab = getPredPerTerm(children)
        sqlQueryFrom += tab + ', '
        terms = getTermsInParens(children)
        for term in terms:
            termCol = tab + '.' + term
            if term not in duplicateTerms:
                duplicateTerms[term] = termCol
            else:
                sqlQueryWhere += duplicateTerms[term] + "=" + termCol + ", "
            if term in headTerms and i <= 2:
                sqlQuerySelect += termCol + ", "
                i += 1

    sqlQuery += sqlQueryView + "\n" + sqlQueryFrom.strip(', ') + "\n" + sqlQueryWhere.strip(', ') + "\n" + sqlQuerySelect.strip(', ') + ')'
    return sqlQuery


In [9]:
clauses = getClauses(inp)

In [10]:
n = parseDatalog2Tree(clauses)

In [11]:
sqlQuery = translateDatalog2Sql(n)

In [12]:
print(sqlQuery)

Create View r1 as(
From fact1, fact2, fact3
Where fact1.X=fact2.X, fact2.Y=fact3.Y
Select fact1.A, fact3.B)
