In [1]:
import re

In [10]:
def parse_tokens(tokens):
    if tokens:
        return tokens[0]
    else:
        raise ParseException("Could not parse the SQL query")

class QueryParser(object):
    """Parse Query into into keywords."""

    def __init__(self, query):
        super(QueryParser, self).__init__()
        self.query = query

    def _parse(self, tokens):
        stripped_token = tokens.strip()

        # meta commands
        # .t -> List all Tables
        if stripped_token in (".t"):
            return tokens

        if stripped_token.startswith("create"):
            tokens = re.findall(r"create\s+(table|database)\s+([a-zA-Z_]*)", tokens)
            object, name = parse_tokens(tokens)
            return object, name

        # dml commands
        if stripped_token.startswith("insert"):
            # insert
            tokens = re.findall(
                r"insert\s+into\s+([a-zA-Z_]*).*\((.*?)\).*\s+values.*\((.*?)\)", tokens
            )
            tablename, cols, values = parse_tokens(tokens)
            return tablename, cols, values

        if stripped_token.startswith("select"):
            # select
            cols = tablename = filters = limit = None
            # Limit
            if "limit" in tokens:
                limit = int(parse_tokens(re.findall(r"limit\s+(\d*)", tokens)))
                tokens = tokens.split("limit")[0].strip()
            # Filters
            if "where" in tokens:
                filters = parse_tokens(re.findall(r"where\s+(.*)", tokens))
                tokens = tokens.split("where")[0].strip()
            # Tablename and Columns
            tokens = re.findall(r"select\s+(.*?)\s*from\s+(\w*)\s?", tokens)
            cols, tablename = parse_tokens(tokens)

            return cols, tablename, filters, limit

        else:
            return ParseException("Could not parse the SQL query")

    def parse(self):
        # Add space & lowercase
        tokens = self.query.center(3).lower()
        return self._parse(tokens)

In [14]:
qc = QueryParser(
"""
SELECT name
FROM tablename
where name='bheem'
limit 10
"""
)
qc.parse()

('name', 'tablename', "name='bheem'", 10)

In [12]:
qc = QueryParser("INSERT INTO person(id,age,name) VALUES(3,32,'Phoebe')")
qc.parse()

('person', 'id,age,name', "3,32,'phoebe'")

In [13]:
qc = QueryParser("CREATE database bruh")
qc.parse()

('database', 'bruh')