Skip to content

Commit

Permalink
Merge pull request #1 from wuxiaohua1011/mongoconverter
Browse files Browse the repository at this point in the history
Looks good
  • Loading branch information
dwinston committed Nov 30, 2018
2 parents 22127d3 + e73838c commit 20d52c1
Show file tree
Hide file tree
Showing 20 changed files with 678 additions and 11 deletions.
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ language: python
python:
- "3.6"
script:
- pip install -e .
- pytest
2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1 +1 @@
recursive-include *.txt *.g *.py
recursive-include *.txt *.g *.py *.ini
25 changes: 25 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,29 @@ pydot__tree_to_png(tree, "exampletree.png")
```
![example tree](exampletree.png)

### Flow for Parsing User-Supplied Filter and Converting to Backend Query
`Parser` will take user input to generate a tree and feed that to a `Converter` which will turn that tree into your desired query language.
![Optimade General Procedure](optimade_general_procedure.jpg)


###### Example: Comnverting to MongoDB Query Syntax
The `Parser` class from `optimade/filter.py` will transform user input into a `Lark` tree using [lark-parser](https://github.com/lark-parser/lark).

The `Lark` tree will then be passed into a desired `converter`, for instance, the `mongoconverter` located at `optimade/converter/mongoconverter` for transformation into your desired database query language. We have adapted our mongoconverter by using the [python query language(pql)](https://github.com/alonho/pql)

![Optimade to Mongodb Procedure](optimade_to_mongodb_procedure.jpg)

Usage examples for `mongoconverter` script:
```bash
$ mongoconverter "filter=a<3"
{'a': {'$lt': 3.0}}
$ mongoconverter "filter=_mp_bandgap > 5.0 AND _cod_molecular_weight < 350"
{'$and': [{'_mp_bandgap': {'$gt': 5.0}}, {'_cod_molecular_weight': {'$lt': 350.0}}]}
```

### Developing New Filter Converters
If you would like to add your converter, for instance, a OPTIMade to NoSQL converter, please
1. add your project in the `optimade/converter` folder,
2. add any requirements in the `requirements.txt`,
3. if you wish to have a console entry point, add the that to the `console_scripts` in the `setup.py` file
4. and run `pip install -r requirements.txt` and `pip install -e .`
Empty file added optimade/converter/__init__.py
Empty file.
12 changes: 12 additions & 0 deletions optimade/converter/mongoconverter/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# OPTIMade to MongoDB Converter
A converter that will take in a Lark tree and convert that to a MongoDB query

### Getting Started
1. Download project by running `git clone https://github.com/Materials-Consortia/optimade-python-tools`
2. `cd optimade-python-tools`
3. Install requirements by running `pip install -r requirements.txt` and `pip install -e .`
4. Now you should have mongoconverter installed as well as its dependency Lark, simply run `mongoconverter -h` for help


#### Note
This is not a stand alone project, it depends on the `Parser` output from `optimade/filter.py`
Empty file.
85 changes: 85 additions & 0 deletions optimade/converter/mongoconverter/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import sys, os
import configparser
# below is a hack, not too sure why without this line the program would crash at from mongo...
sys.path.append(os.path.dirname(__file__))
from mongo import optimadeToMongoDBConverter
import json
import argparse
import ast
import re

def main(args=None):
"""The main routine."""
if args is None:
args = sys.argv[1:]
def prepVersion(v):
"""
@param v: user input Version
Procedure:
1. if v is None, then return None
2. otherwise, split v into array
3. remove all other characters such as "()" from each index
4. And change string to int
5. turn the resulting list into a tuple
"""
if(v == None or v == ""):
return None
else:
array = v.split(",")
r = range(len(array))
result = list(r)
for i in r:
result[i] = int(re.sub("\D", "", array[i]))
return tuple(result)

def prepAlias(a):
"""
@param a: user input Aliases
Procedure:
1. if Alias is None, return None
2. otherwise, literal_eval a to get a dictionary, return the resultant
"""
if(a == None):
return None
else:
return ast.literal_eval(a)



ap = argparse.ArgumentParser()
ap.add_argument("Query", help="Query with quotation mark around it. ex: 'filter= a < 0'")
ap.add_argument("-config", "--Config", required=False, help="Path to customized config file. Please see config.ini for example config file format")
args=ap.parse_args()

config = configparser.ConfigParser()
if(args.Config != None):
path = args.Config
config.read(path)
class ConfigFileNotFoundException(Exception):
pass
if(not (config.has_section('aliases') or config.has_section('version'))):
raise ConfigFileNotFoundException("Config File Not Found at Location: {}".format(args.Config))
else:
config.read(os.path.join(os.path.dirname(__file__), 'config.ini'))

alias = dict()
v = None

if(config.has_section('aliases')):
d = dict(config.items('aliases'))
for key in d:
alias[key] = config['aliases'][key]

if(config.has_section('version')):
a = config['version']['major']
b = config['version']['minor']
c = config['version']['patch']
v = (int(a), int(b) , int(c))

result = optimadeToMongoDBConverter(args.Query, v, alias)
return result



if __name__ == "__main__":
main()
9 changes: 9 additions & 0 deletions optimade/converter/mongoconverter/config.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# A config file must have EITHER [aliases] section or [version] section
[aliases]
a = a
c = c

[version]
major = 0
minor = 9
patch = 6
173 changes: 173 additions & 0 deletions optimade/converter/mongoconverter/mongo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
import pql
import sys, os
sys.path.append(os.path.dirname(os.path.dirname(__file__)))
from optimade.filter import Parser
from lark import Transformer
import re

# data for pql to mongodb symbol
optiMadeToPQLOperatorSwitch = {
"=":"==",
"<=":"<=",
">=":">=",
"!=": "!=",
"<":"<",
">":">",
}

class OperatorError(Exception):
pass

def OptiMadeToPQLOperatorValidator(x):
"""
convert pql to mongodb symbol
"""
item = optiMadeToPQLOperatorSwitch.get(x)
if(type(item) != None):
return item
else:
raise OperatorError("<{}> is not a valid operator".format(x))


def combineMultiple(PQL, index):
"""
@param string -- input raw optimade input
@param index -- index in which "," was found
Procedure:
1. find the first and last quote centering from the index of the ","
2. get everything between first and last quote
3. split the string into individual elements
4. put them into Python Query Language format
"""
for i in reversed(range(index)):
if(PQL[i] == "'" or PQL[i] == '"'):
firstIndex = i
break
for i in range(index, len(PQL)):
if(PQL[i] == "'" or PQL[i]== '"'):
lastIndex = i
break
insertion = PQL[firstIndex + 1 : lastIndex] # since the first index is inclusive, need to exclude the quote
insertion = insertion.split(",")
# remove the preceding 0 for all individual entries, insert those as array format into the original PQL query
result = PQL[:firstIndex] + "all({})".format([item.lstrip() for item in insertion])
# update pointer to after the combined sequence
result_index = len(result)
result = result + PQL[lastIndex + 1:]
return result, result_index

def cleanPQL(PQL):
"""
@param PQL: raw PQL
Procedure:
1. go through PQL, find "," to find where i need to combine multiple elements
2. combine multiple
3. return the cleaned PQL
"""
length = len(PQL)
i = 0
while(i < length):
if(PQL[i] == ","):
PQL, newIndex = combineMultiple(PQL, i)
i = newIndex
i = i + 1
return PQL

class UnknownMongoDBQueryError(Exception):
pass

def cleanMongo(rawMongoDbQuery):
"""
@param rawMongoDbQuery -- input that needs to be cleaned
Procedure:
recursively go through the rawMongoDbQuery, turn string into float if possible in the value field
"""
if(type(rawMongoDbQuery) != dict):
return
for k in rawMongoDbQuery:
value = rawMongoDbQuery[k]
if(type(value) == list):
for v in value:
cleanMongo(v)
elif(type(value) == dict):
cleanMongo(value)
elif(type(value) == str):
try:
value = float(value)
rawMongoDbQuery[k] = float(value)
except:
f = value
else:
raise UnknownMongoDBQueryError("Unrecognized MongoDB Query \n {}".format(rawMongoDbQuery))


class OptimadeToPQLTransformer(Transformer):
"""
class for transforming Lark tree into PQL format
"""
def comparison(self, args):
A = str(args[0])
B = ""
for b in args[2:]:
if B == "":
B = b
else:
B = B + ", " + b
operator = OptiMadeToPQLOperatorValidator(args[1])
return A + operator + '"' + B + '"'
def atom(self, args):
return args[0]
def term(self, args):
result = ""
for arg in args:
if arg.lower() == "and" or arg.lower() == "or":
arg = arg.lower()
result = result + " " + arg
return "(" + result.lstrip() + ")"

def expression(self, args):
result = ""
for arg in args:
result = result + " " + arg
return result.lstrip()
def start(self, args):
return args[1]
def combined(self, args):
return args[0]

def parseAlias(query, aliases):
"""
@param optimadeQuery -- the query to be parsed
@param aliases -- dictionary with structure {"OPTIMADE_STRUCTURE_NAME": "YOUR_DB_STRUCTURE_NAME"}
Procedure:
1. loop through all aliases
2. replace all occurences of OPTIMADE_STRUCTURE_NAME with YOUR_DB_STRUCTURE_NAME
3. return the resultant optimadeQuery
"""
if(aliases != None):
for alias in aliases:
query = re.sub(r"\b%s\b"%alias, aliases[alias], query)
return query
def optimadeToMongoDBConverter(optimadeQuery, version=None, aliases=None):
"""
main function for converting optimade query to mongoDB query
Procedure:
1. converting optimadeQuery into Lark tree
2. converting tree into raw PQL
3. parsing the rawPQL into cleaned PQL (putting combined item in place)
4. parse cleaned PQL into raw MongoDB query
5. parse raw MongoDB Query into cleaned MongoDb Query (turn values in string into float if possible)
"""

p = Parser(version=version)
optimadeQuery = parseAlias(optimadeQuery, aliases)
try:
tree = p.parse(optimadeQuery)
rawPQL = OptimadeToPQLTransformer().transform(tree)
cleanedPQL = cleanPQL(rawPQL)
mongoDbQuery = pql.find(cleanedPQL)
except Exception as e:
return e

cleanMongo(mongoDbQuery)
return mongoDbQuery
Empty file.

0 comments on commit 20d52c1

Please sign in to comment.