In [1]:
from engine import client, data_explore, get_parser, generate_sql, restructure_query, parse_tree_to_sql, query_sql, query_nosql, SQLToMongoConverter
from warnings import filterwarnings
filterwarnings('ignore')

RULES FOR NLQ (NATURAL LANGUAGE QUERY)
*1 Always use KEYWORDS [where, order by, group by] in NL; slight deviations allowed
*2 Always join multiword operators with '-' [standard-deviation, minimum-of /etc]; other connectors allowed except space
*3 Where clauses don't support pure cat field comparisions 



[nltk_data] Error loading stopwords: <urlopen error [SSL:
[nltk_data]     CERTIFICATE_VERIFY_FAILED] certificate verify failed:
[nltk_data]     unable to get local issuer certificate (_ssl.c:1002)>


In [None]:
"""
SQL db -> JSONs -> schema -> SQL gen1
                          -> NL-SQL parser -> SQL gen2
"""

schema_description=data_explore('SQL/thrombosis_prediction')
query = "select sum of symptoms from examinations where the id > 3853710 grouped by people's thrombosis status" 
parser, corrs = get_parser(schema_description, 'Examination')
query = restructure_query(query, corrs)
tokens = query.split()
tree = next(parser.parse(tokens))
sql = parse_tree_to_sql(tree)
query_sql('SQL/thrombosis_prediction/thrombosis_prediction.sqlite', sql)


"""

NoSQL db -> JSONs -> schema -> SQL gen1 -> NoSQL gen1
                            -> NL-SQL parser -> SQL gen2 -> NoSQL gen2

"""


schema_description=data_explore('NoSQL/formula_1')
query = "select maximum wins from constructorStandings where position placed is < 3 and grouped by constructorId" 
parser, corrs = get_parser(schema_description, 'constructorStandings')
query = restructure_query(query, corrs)
tokens = query.split()
tree = next(parser.parse(tokens))
sqlt = parse_tree_to_sql(tree)
nosql = SQLToMongoConverter().convert_to_mongo(sqlt)
query_nosql('formula_1', nosql)

_ = "end"

In [1]:
from test_cases import NoSQLTestCases, SQLTestCases

RULES FOR NLQ (NATURAL LANGUAGE QUERY)
*1 Always use KEYWORDS [where, order by, group by] in NL; slight deviations allowed
*2 Always join multiword operators with '-' [standard-deviation, minimum-of /etc]; other connectors allowed except space
*3 Where clauses don't support pure cat field comparisions 



[nltk_data] Error loading stopwords: <urlopen error [SSL:
[nltk_data]     CERTIFICATE_VERIFY_FAILED] certificate verify failed:
[nltk_data]     unable to get local issuer certificate (_ssl.c:1002)>


In [5]:
tester = NoSQLTestCases()
cases = tester.t 
_ = list(map(tester, cases))

####################################################################################################
NL Query :  Fetch the minimum points from constructorStandings, grouped by constructorStandingsId.
Reconstructed Query :  fetch minimum points constructorStandings group by constructorStandingsId
Parse Tree :  (S
  (CMD fetch)
  (AGG minimum)
  (COLUMN (NUM points))
  (TABLE constructorStandings)
  (CLAUSE
    group
    by
    (COLUMN (NOMCAT constructorStandingsId))
    (CLAUSE )))
SQL :  SELECT MIN(points) FROM constructorStandings GROUP BY constructorStandingsId
NoSQL :  constructorStandings.aggregate([{'$group': {'_id': '$constructorStandingsId', 'result': {'$min': '$points'}}}])
Results :  [{'_id': 10586, 'result': 0.0}, {'_id': 3482, 'result': 0.0}, {'_id': 19077, 'result': 3.0}]
####################################################################################################
NL Query :  Get the minimum position from constructorStandings where points = 7.




Reconstructed Query :  get minimum position constructorStandings where points = <number>
Parse Tree :  (S
  (CMD get)
  (AGG minimum)
  (COLUMN (NOMCAT position))
  (TABLE constructorStandings)
  (CLAUSE
    where
    (COLUMN (NUM points))
    (NOP =)
    (VALUE <number>)
    (CLAUSE )))
SQL :  SELECT MIN(position) FROM constructorStandings WHERE points = 7.
NoSQL :  constructorStandings.aggregate([{'$match': {'points': {'$eq': 7.0}}}, {'$group': {'_id': None, 'result': {'$min': '$position'}}}])
Results :  [{'_id': None, 'result': 2}]
####################################################################################################
NL Query :  Get the mean of points from constructorStandings.
Reconstructed Query :  get median points constructorStandings
Parse Tree :  (S
  (CMD get)
  (AGG median)
  (COLUMN (NUM points))
  (CLAUSE )
  (TABLE constructorStandings))
SQL :  SELECT MEDIAN(points) FROM constructorStandings 
NoSQL :  constructorStandings.aggregate([{'$group': {'_id': None, 



Reconstructed Query :  fetch variance lng circuits where circuitId > <number>
Parse Tree :  (S
  (CMD fetch)
  (AGG variance)
  (COLUMN (NUM lng))
  (TABLE circuits)
  (CLAUSE
    where
    (COLUMN (ORDCAT circuitId))
    (NOP >)
    (VALUE <number>)
    (CLAUSE )))
SQL :  SELECT VARIANCE(lng) FROM circuits WHERE circuitId > 50
NoSQL :  circuits.aggregate([{'$match': {'circuitId': {'$gt': 50}}}, {'$group': {'_id': None, 'result': {'$variance': '$lng'}}}])
Results :  []
####################################################################################################
NL Query :  Fetch _id from constructors.
Reconstructed Query :  fetch _id constructors
Parse Tree :  (S (CMD fetch) (COLUMN (CAT _id)) (CLAUSE ) (TABLE constructors))
SQL :  SELECT _id FROM constructors 
NoSQL :  constructors.aggregate([{'$project': {'_id': 1}}])
Results :  [{'_id': ObjectId('67380451baeca311226f8280')}, {'_id': ObjectId('67380451baeca311226f8281')}, {'_id': ObjectId('67380451baeca311226f8282')}]
########

In [2]:
tester = SQLTestCases('thrombosis_prediction')
cases = tester.t 
list(map(tester, cases))


####################################################################################################
NL Query :  Get the average of ANA where Thrombosis = 1 from Examination.




Reconstructed Query :  get average ANA where Thrombosis = <number> Examination
Parse Tree :  (S
  (CMD get)
  (AGG average)
  (COLUMN (NOMCAT ANA))
  (CLAUSE
    where
    (COLUMN (NOMCAT Thrombosis))
    (NOP =)
    (VALUE <number>)
    (CLAUSE ))
  (TABLE Examination))
SQL :  SELECT AVG(ANA) FROM Examination WHERE Thrombosis = 1
Results :  [(639.2452830188679,)]
####################################################################################################
NL Query :  Retrieve the sum of ANA where Thrombosis = 2 in Examination.




Reconstructed Query :  retrieve summation ANA where Thrombosis = <number> Examination
Parse Tree :  (S
  (CMD retrieve)
  (AGG summation)
  (COLUMN (NOMCAT ANA))
  (CLAUSE
    where
    (COLUMN (NOMCAT Thrombosis))
    (NOP =)
    (VALUE <number>)
    (CLAUSE ))
  (TABLE Examination))
SQL :  SELECT SUM(ANA) FROM Examination WHERE Thrombosis = 2
Results :  [(27848,)]
####################################################################################################
NL Query :  Fetch unique values of Symptoms where Thrombosis = 3 from Examination.




Reconstructed Query :  fetch Symptoms where Thrombosis = <number> Examination
Parse Tree :  (S
  (CMD fetch)
  (COLUMN (CAT Symptoms))
  (CLAUSE
    where
    (COLUMN (NOMCAT Thrombosis))
    (NOP =)
    (VALUE <number>)
    (CLAUSE ))
  (TABLE Examination))
SQL :  SELECT Symptoms FROM Examination WHERE Thrombosis = 3
Results :  [('thrombocytepenia',), ('thrombocytopenia',), ('thrombocytopenia',)]
####################################################################################################
NL Query :  Fetch distinct Diagnosis values where Thrombosis = 2 in Examination.




Reconstructed Query :  fetch distinct Diagnosis where Thrombosis = <number> Examination
Parse Tree :  (S
  (CMD fetch)
  (AGG distinct)
  (COLUMN (CAT Diagnosis))
  (CLAUSE
    where
    (COLUMN (NOMCAT Thrombosis))
    (NOP =)
    (VALUE <number>)
    (CLAUSE ))
  (TABLE Examination))
SQL :  SELECT DISTINCT(Diagnosis) FROM Examination WHERE Thrombosis = 2
Results :  [(None,), ('SLE',), ('SLE+Psy',)]
####################################################################################################
NL Query :  Fetch the maximum value of GOT where GOT > 100 from Laboratory.




Reconstructed Query :  fetch maximum GOT where GOT > <number> Laboratory
Parse Tree :  (S
  (CMD fetch)
  (AGG maximum)
  (COLUMN (NOMCAT GOT))
  (CLAUSE
    where
    (COLUMN (NOMCAT GOT))
    (NOP >)
    (VALUE <number>)
    (CLAUSE ))
  (TABLE Laboratory))
SQL :  SELECT MAX(GOT) FROM Laboratory WHERE GOT > 100
Results :  [(21480,)]
####################################################################################################
NL Query :  Get the average of CPK where TG > 100 from Laboratory.




Reconstructed Query :  get average CPK where TG > <number> Laboratory
Parse Tree :  (S
  (CMD get)
  (AGG average)
  (COLUMN (NOMCAT CPK))
  (CLAUSE
    where
    (COLUMN (NOMCAT TG))
    (NOP >)
    (VALUE <number>)
    (CLAUSE ))
  (TABLE Laboratory))
SQL :  SELECT AVG(CPK) FROM Laboratory WHERE TG > 100
Results :  [(119.23266219239373,)]
####################################################################################################
NL Query :  Retrieve the average value of ALB where HGB > 12 from Laboratory.




Reconstructed Query :  retrieve average ALB where HGB > <number> Laboratory
Parse Tree :  (S
  (CMD retrieve)
  (AGG average)
  (COLUMN (NUM ALB))
  (CLAUSE
    where
    (COLUMN (NUM HGB))
    (NOP >)
    (VALUE <number>)
    (CLAUSE ))
  (TABLE Laboratory))
SQL :  SELECT AVG(ALB) FROM Laboratory WHERE HGB > 12
Results :  [(4.281120716270456,)]
####################################################################################################
NL Query :  Get the total count of RBC where T-BIL > 0.2 in Laboratory.




Reconstructed Query :  get count RBC where T-BIL > <number> Laboratory
Parse Tree :  (S
  (CMD get)
  (AGG count)
  (COLUMN (NUM RBC))
  (CLAUSE
    where
    (COLUMN (NUM T-BIL))
    (NOP >)
    (VALUE <number>)
    (CLAUSE ))
  (TABLE Laboratory))
SQL :  SELECT COUNT(RBC) FROM Laboratory WHERE T-BIL > 0.2
Results :  []
####################################################################################################
NL Query :  Fetch the range of UA values where CRP = 2 in Laboratory.




Reconstructed Query :  fetch RA UA where CRP = <number> Laboratory
Parse Tree :  (S
  (CMD fetch)
  (COLUMN (CAT RA) (COLUMN (NUM UA)))
  (CLAUSE
    where
    (COLUMN (CAT CRP))
    (NOP =)
    (VALUE <number>)
    (CLAUSE ))
  (TABLE Laboratory))
SQL :  SELECT RA, UA FROM Laboratory WHERE CRP = 2
Results :  [('-', 7.0), (None, 8.3), (None, 6.6)]
####################################################################################################
NL Query :  Retrieve the minimum value of ALP where ALP > 15 in Laboratory.




Reconstructed Query :  retrieve minimum ALP where ALP > <number> Laboratory
Parse Tree :  (S
  (CMD retrieve)
  (AGG minimum)
  (COLUMN (NOMCAT ALP))
  (CLAUSE
    where
    (COLUMN (NOMCAT ALP))
    (NOP >)
    (VALUE <number>)
    (CLAUSE ))
  (TABLE Laboratory))
SQL :  SELECT MIN(ALP) FROM Laboratory WHERE ALP > 15
Results :  [(22,)]
####################################################################################################
NL Query :  Fetch the total count of PatientID where SEX = 1 from Patient.




Reconstructed Query :  fetch count ID where SEX = <number> Patient
Parse Tree :  (S
  (CMD fetch)
  (AGG count)
  (COLUMN (NOMCAT ID))
  (CLAUSE
    where
    (COLUMN (CAT SEX))
    (NOP =)
    (VALUE <number>)
    (CLAUSE ))
  (TABLE Patient))
SQL :  SELECT COUNT(ID) FROM Patient WHERE SEX = 1
Results :  [(0,)]
####################################################################################################
NL Query :  Get all unique values of SEX where ID = 4060811 from Patient.




Reconstructed Query :  get SEX where ID = <number> Patient
Parse Tree :  (S
  (CMD get)
  (COLUMN (CAT SEX))
  (CLAUSE
    where
    (COLUMN (NOMCAT ID))
    (NOP =)
    (VALUE <number>)
    (CLAUSE ))
  (TABLE Patient))
SQL :  SELECT SEX FROM Patient WHERE ID = 4060811
Results :  [('F',)]
####################################################################################################
NL Query :  Fetch all non-null Description values where PatientID = 1124385 from Patient.




Reconstructed Query :  fetch Description where ID = <number> Patient
Parse Tree :  (S
  (CMD fetch)
  (COLUMN (CAT Description))
  (CLAUSE
    where
    (COLUMN (NOMCAT ID))
    (NOP =)
    (VALUE <number>)
    (CLAUSE ))
  (TABLE Patient))
SQL :  SELECT Description FROM Patient WHERE ID = 1124385
Results :  [('1997-07-31',)]


[[(639.2452830188679,)],
 [(27848,)],
 [('thrombocytepenia',), ('thrombocytopenia',), ('thrombocytopenia',)],
 [(None,), ('SLE',), ('SLE+Psy',)],
 [(21480,)],
 [(119.23266219239373,)],
 [(4.281120716270456,)],
 [],
 [('-', 7.0), (None, 8.3), (None, 6.6)],
 [(22,)],
 [(0,)],
 [('F',)],
 [('1997-07-31',)]]