-
Notifications
You must be signed in to change notification settings - Fork 40
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from wuxiaohua1011/mongoconverter
Looks good
- Loading branch information
Showing
20 changed files
with
678 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,4 +2,5 @@ language: python | |
python: | ||
- "3.6" | ||
script: | ||
- pip install -e . | ||
- pytest |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
recursive-include *.txt *.g *.py | ||
recursive-include *.txt *.g *.py *.ini |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# OPTIMade to MongoDB Converter | ||
A converter that will take in a Lark tree and convert that to a MongoDB query | ||
|
||
### Getting Started | ||
1. Download project by running `git clone https://github.com/Materials-Consortia/optimade-python-tools` | ||
2. `cd optimade-python-tools` | ||
3. Install requirements by running `pip install -r requirements.txt` and `pip install -e .` | ||
4. Now you should have mongoconverter installed as well as its dependency Lark, simply run `mongoconverter -h` for help | ||
|
||
|
||
#### Note | ||
This is not a stand alone project, it depends on the `Parser` output from `optimade/filter.py` |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
import sys, os | ||
import configparser | ||
# below is a hack, not too sure why without this line the program would crash at from mongo... | ||
sys.path.append(os.path.dirname(__file__)) | ||
from mongo import optimadeToMongoDBConverter | ||
import json | ||
import argparse | ||
import ast | ||
import re | ||
|
||
def main(args=None): | ||
"""The main routine.""" | ||
if args is None: | ||
args = sys.argv[1:] | ||
def prepVersion(v): | ||
""" | ||
@param v: user input Version | ||
Procedure: | ||
1. if v is None, then return None | ||
2. otherwise, split v into array | ||
3. remove all other characters such as "()" from each index | ||
4. And change string to int | ||
5. turn the resulting list into a tuple | ||
""" | ||
if(v == None or v == ""): | ||
return None | ||
else: | ||
array = v.split(",") | ||
r = range(len(array)) | ||
result = list(r) | ||
for i in r: | ||
result[i] = int(re.sub("\D", "", array[i])) | ||
return tuple(result) | ||
|
||
def prepAlias(a): | ||
""" | ||
@param a: user input Aliases | ||
Procedure: | ||
1. if Alias is None, return None | ||
2. otherwise, literal_eval a to get a dictionary, return the resultant | ||
""" | ||
if(a == None): | ||
return None | ||
else: | ||
return ast.literal_eval(a) | ||
|
||
|
||
|
||
ap = argparse.ArgumentParser() | ||
ap.add_argument("Query", help="Query with quotation mark around it. ex: 'filter= a < 0'") | ||
ap.add_argument("-config", "--Config", required=False, help="Path to customized config file. Please see config.ini for example config file format") | ||
args=ap.parse_args() | ||
|
||
config = configparser.ConfigParser() | ||
if(args.Config != None): | ||
path = args.Config | ||
config.read(path) | ||
class ConfigFileNotFoundException(Exception): | ||
pass | ||
if(not (config.has_section('aliases') or config.has_section('version'))): | ||
raise ConfigFileNotFoundException("Config File Not Found at Location: {}".format(args.Config)) | ||
else: | ||
config.read(os.path.join(os.path.dirname(__file__), 'config.ini')) | ||
|
||
alias = dict() | ||
v = None | ||
|
||
if(config.has_section('aliases')): | ||
d = dict(config.items('aliases')) | ||
for key in d: | ||
alias[key] = config['aliases'][key] | ||
|
||
if(config.has_section('version')): | ||
a = config['version']['major'] | ||
b = config['version']['minor'] | ||
c = config['version']['patch'] | ||
v = (int(a), int(b) , int(c)) | ||
|
||
result = optimadeToMongoDBConverter(args.Query, v, alias) | ||
return result | ||
|
||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
# A config file must have EITHER [aliases] section or [version] section | ||
[aliases] | ||
a = a | ||
c = c | ||
|
||
[version] | ||
major = 0 | ||
minor = 9 | ||
patch = 6 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
import pql | ||
import sys, os | ||
sys.path.append(os.path.dirname(os.path.dirname(__file__))) | ||
from optimade.filter import Parser | ||
from lark import Transformer | ||
import re | ||
|
||
# data for pql to mongodb symbol | ||
optiMadeToPQLOperatorSwitch = { | ||
"=":"==", | ||
"<=":"<=", | ||
">=":">=", | ||
"!=": "!=", | ||
"<":"<", | ||
">":">", | ||
} | ||
|
||
class OperatorError(Exception): | ||
pass | ||
|
||
def OptiMadeToPQLOperatorValidator(x): | ||
""" | ||
convert pql to mongodb symbol | ||
""" | ||
item = optiMadeToPQLOperatorSwitch.get(x) | ||
if(type(item) != None): | ||
return item | ||
else: | ||
raise OperatorError("<{}> is not a valid operator".format(x)) | ||
|
||
|
||
def combineMultiple(PQL, index): | ||
""" | ||
@param string -- input raw optimade input | ||
@param index -- index in which "," was found | ||
Procedure: | ||
1. find the first and last quote centering from the index of the "," | ||
2. get everything between first and last quote | ||
3. split the string into individual elements | ||
4. put them into Python Query Language format | ||
""" | ||
for i in reversed(range(index)): | ||
if(PQL[i] == "'" or PQL[i] == '"'): | ||
firstIndex = i | ||
break | ||
for i in range(index, len(PQL)): | ||
if(PQL[i] == "'" or PQL[i]== '"'): | ||
lastIndex = i | ||
break | ||
insertion = PQL[firstIndex + 1 : lastIndex] # since the first index is inclusive, need to exclude the quote | ||
insertion = insertion.split(",") | ||
# remove the preceding 0 for all individual entries, insert those as array format into the original PQL query | ||
result = PQL[:firstIndex] + "all({})".format([item.lstrip() for item in insertion]) | ||
# update pointer to after the combined sequence | ||
result_index = len(result) | ||
result = result + PQL[lastIndex + 1:] | ||
return result, result_index | ||
|
||
def cleanPQL(PQL): | ||
""" | ||
@param PQL: raw PQL | ||
Procedure: | ||
1. go through PQL, find "," to find where i need to combine multiple elements | ||
2. combine multiple | ||
3. return the cleaned PQL | ||
""" | ||
length = len(PQL) | ||
i = 0 | ||
while(i < length): | ||
if(PQL[i] == ","): | ||
PQL, newIndex = combineMultiple(PQL, i) | ||
i = newIndex | ||
i = i + 1 | ||
return PQL | ||
|
||
class UnknownMongoDBQueryError(Exception): | ||
pass | ||
|
||
def cleanMongo(rawMongoDbQuery): | ||
""" | ||
@param rawMongoDbQuery -- input that needs to be cleaned | ||
Procedure: | ||
recursively go through the rawMongoDbQuery, turn string into float if possible in the value field | ||
""" | ||
if(type(rawMongoDbQuery) != dict): | ||
return | ||
for k in rawMongoDbQuery: | ||
value = rawMongoDbQuery[k] | ||
if(type(value) == list): | ||
for v in value: | ||
cleanMongo(v) | ||
elif(type(value) == dict): | ||
cleanMongo(value) | ||
elif(type(value) == str): | ||
try: | ||
value = float(value) | ||
rawMongoDbQuery[k] = float(value) | ||
except: | ||
f = value | ||
else: | ||
raise UnknownMongoDBQueryError("Unrecognized MongoDB Query \n {}".format(rawMongoDbQuery)) | ||
|
||
|
||
class OptimadeToPQLTransformer(Transformer): | ||
""" | ||
class for transforming Lark tree into PQL format | ||
""" | ||
def comparison(self, args): | ||
A = str(args[0]) | ||
B = "" | ||
for b in args[2:]: | ||
if B == "": | ||
B = b | ||
else: | ||
B = B + ", " + b | ||
operator = OptiMadeToPQLOperatorValidator(args[1]) | ||
return A + operator + '"' + B + '"' | ||
def atom(self, args): | ||
return args[0] | ||
def term(self, args): | ||
result = "" | ||
for arg in args: | ||
if arg.lower() == "and" or arg.lower() == "or": | ||
arg = arg.lower() | ||
result = result + " " + arg | ||
return "(" + result.lstrip() + ")" | ||
|
||
def expression(self, args): | ||
result = "" | ||
for arg in args: | ||
result = result + " " + arg | ||
return result.lstrip() | ||
def start(self, args): | ||
return args[1] | ||
def combined(self, args): | ||
return args[0] | ||
|
||
def parseAlias(query, aliases): | ||
""" | ||
@param optimadeQuery -- the query to be parsed | ||
@param aliases -- dictionary with structure {"OPTIMADE_STRUCTURE_NAME": "YOUR_DB_STRUCTURE_NAME"} | ||
Procedure: | ||
1. loop through all aliases | ||
2. replace all occurences of OPTIMADE_STRUCTURE_NAME with YOUR_DB_STRUCTURE_NAME | ||
3. return the resultant optimadeQuery | ||
""" | ||
if(aliases != None): | ||
for alias in aliases: | ||
query = re.sub(r"\b%s\b"%alias, aliases[alias], query) | ||
return query | ||
def optimadeToMongoDBConverter(optimadeQuery, version=None, aliases=None): | ||
""" | ||
main function for converting optimade query to mongoDB query | ||
Procedure: | ||
1. converting optimadeQuery into Lark tree | ||
2. converting tree into raw PQL | ||
3. parsing the rawPQL into cleaned PQL (putting combined item in place) | ||
4. parse cleaned PQL into raw MongoDB query | ||
5. parse raw MongoDB Query into cleaned MongoDb Query (turn values in string into float if possible) | ||
""" | ||
|
||
p = Parser(version=version) | ||
optimadeQuery = parseAlias(optimadeQuery, aliases) | ||
try: | ||
tree = p.parse(optimadeQuery) | ||
rawPQL = OptimadeToPQLTransformer().transform(tree) | ||
cleanedPQL = cleanPQL(rawPQL) | ||
mongoDbQuery = pql.find(cleanedPQL) | ||
except Exception as e: | ||
return e | ||
|
||
cleanMongo(mongoDbQuery) | ||
return mongoDbQuery |
Empty file.
Oops, something went wrong.