In [1]:
from engine import client, data_explore, get_parser, generate_sql, restructure_query, parse_tree_to_sql, query_sql, query_nosql, SQLToMongoConverter
from warnings import filterwarnings
filterwarnings('ignore')

RULES FOR NLQ (NATURAL LANGUAGE QUERY)
*1 Always use KEYWORDS [where, order by, group by] in NL; slight deviations allowed
*2 Always join multiword operators with '-' [standard-deviation, minimum-of /etc]; other connectors allowed except space
*3 Where clauses don't support pure cat field comparisions 



[nltk_data] Error loading stopwords: <urlopen error [SSL:
[nltk_data]     CERTIFICATE_VERIFY_FAILED] certificate verify failed:
[nltk_data]     unable to get local issuer certificate (_ssl.c:1002)>


In [None]:
"""
SQL db -> JSONs -> schema -> SQL gen1
                          -> NL-SQL parser -> SQL gen2
"""

schema_description=data_explore('SQL/thrombosis_prediction')
query = "select sum of symptoms from examinations where the id > 3853710 grouped by people's thrombosis status" 
parser, corrs = get_parser(schema_description, 'Examination')
query = restructure_query(query, corrs)
tokens = query.split()
tree = next(parser.parse(tokens))
sql = parse_tree_to_sql(tree)
query_sql('SQL/thrombosis_prediction/thrombosis_prediction.sqlite', sql)


"""

NoSQL db -> JSONs -> schema -> SQL gen1 -> NoSQL gen1
                            -> NL-SQL parser -> SQL gen2 -> NoSQL gen2

"""


schema_description=data_explore('NoSQL/formula_1')
query = "select maximum wins from constructorStandings where position placed is < 3 and grouped by constructorId" 
parser, corrs = get_parser(schema_description, 'constructorStandings')
query = restructure_query(query, corrs)
tokens = query.split()
tree = next(parser.parse(tokens))
sqlt = parse_tree_to_sql(tree)
nosql = SQLToMongoConverter().convert_to_mongo(sqlt)
query_nosql('formula_1', nosql)

_ = "end"

In [1]:
from test_cases import NoSQLTestCases, SQLTestCases

RULES FOR NLQ (NATURAL LANGUAGE QUERY)
*1 Always use KEYWORDS [where, order by, group by] in NL; slight deviations allowed
*2 Always join multiword operators with '-' [standard-deviation, minimum-of /etc]; other connectors allowed except space
*3 Where clauses don't support pure cat field comparisions 



[nltk_data] Error loading stopwords: <urlopen error [SSL:
[nltk_data]     CERTIFICATE_VERIFY_FAILED] certificate verify failed:
[nltk_data]     unable to get local issuer certificate (_ssl.c:1002)>


In [3]:
tester = NoSQLTestCases(db_name="debit_card_specialization")
cases = tester.t
_ = list(map(tester, cases))

####################################################################################################
NL Query :  Fetch unique values of CustomerID from Customers where segment = 'SME'.




'SME'
Reconstructed Query :  fetch unique CustomerID customers where Segment = <string>
Parse Tree :  (S
  (CMD fetch)
  (AGG (DISTINCT unique))
  (COLUMN (ORDCAT CustomerID))
  (TABLE customers)
  (CLAUSE
    where
    (COLUMN (CAT Segment))
    (NOP =)
    (VALUE <string>)
    (CLAUSE )))
SQL :  SELECT DISTINCT(CustomerID) FROM customers WHERE Segment = 'SME'
NoSQL :  customers.aggregate([{'$match': {'Segment': {'$eq': 'SME'}}}, {'$group': {'_id': None, 'result': {'$addToSet': '$CustomerID'}}}])
Results :  [{'_id': None, 'result': [43995, 40477, 42236, 10656, 2990, 15933, 23599, 28966, 38718, 21930, 46294, 48143, 49812, 40387, 49902, 7048, 38808, 51751, 48053, 46204, 47963, 4929, 3170, 23509, 10566, 25268, 33261, 35020, 36779, 38538, 48213, 49722, 46114, 33171, 19901, 47873, 42416, 36869, 8717, 6958, 44175, 42326, 18052, 19811, 25088, 4479, 33711, 40747, 42506, 47783, 51301, 53060, 7498, 30365, 44355, 871, 12685, 14444, 37319, 47693, 19721, 23419, 40657, 26757, 28606, 27027, 28786, 5



'EUR'
Reconstructed Query :  fetch unique CustomerID customers where Currency = <string>
Parse Tree :  (S
  (CMD fetch)
  (AGG (DISTINCT unique))
  (COLUMN (ORDCAT CustomerID))
  (TABLE customers)
  (CLAUSE
    where
    (COLUMN (CAT Currency))
    (NOP =)
    (VALUE <string>)
    (CLAUSE )))
SQL :  SELECT DISTINCT(CustomerID) FROM customers WHERE Currency = 'EUR'
NoSQL :  customers.aggregate([{'$match': {'Currency': {'$eq': 'EUR'}}}, {'$group': {'_id': None, 'result': {'$addToSet': '$CustomerID'}}}])
Results :  [{'_id': None, 'result': [4331, 3023, 4782, 2527, 4827, 45515, 3519, 3068, 45560, 48492, 52822, 272, 4872, 4421, 3609, 53092, 4917, 1444, 3113, 2346, 4511, 3248, 2797, 51514, 4150, 5007, 4601, 2752, 407, 3654, 5052, 2887, 4646, 45740, 5097, 3789, 2842, 47995, 3293, 4691, 3834, 2932, 136, 2436, 3924, 4781, 4330, 52235, 3879, 3428, 4826, 3112, 4014, 4420, 48627, 4916, 3969, 2616, 4603, 50117, 138, 52688, 4152, 3791, 3340, 5099, 2889, 3746, 50658, 3295, 46013, 3430, 2528, 4287, 52



'Premium'
Reconstructed Query :  fetch unique ChainID gasstations where Segment = <string>
Parse Tree :  (S
  (CMD fetch)
  (AGG (DISTINCT unique))
  (COLUMN (CAT ChainID))
  (TABLE gasstations)
  (CLAUSE
    where
    (COLUMN (CAT Segment))
    (NOP =)
    (VALUE <string>)
    (CLAUSE )))
SQL :  SELECT DISTINCT(ChainID) FROM gasstations WHERE Segment = 'Premium'
NoSQL :  gasstations.aggregate([{'$match': {'Segment': {'$eq': 'Premium'}}}, {'$group': {'_id': None, 'result': {'$addToSet': '$ChainID'}}}])
Results :  [{'_id': None, 'result': [77, 31, 130, 33, 6, 15, 4, 1, 51, 29]}]
####################################################################################################
NL Query :  Get unique values of GasStationID from gasstations where Country = 'CZE'.




'CZE'
Reconstructed Query :  get unique GasStationID gasstations where Country = <string>
Parse Tree :  (S
  (CMD get)
  (AGG (DISTINCT unique))
  (COLUMN (ORDCAT GasStationID))
  (TABLE gasstations)
  (CLAUSE
    where
    (COLUMN (CAT Country))
    (NOP =)
    (VALUE <string>)
    (CLAUSE )))
SQL :  SELECT DISTINCT(GasStationID) FROM gasstations WHERE Country = 'CZE'
NoSQL :  gasstations.aggregate([{'$match': {'Country': {'$eq': 'CZE'}}}, {'$group': {'_id': None, 'result': {'$addToSet': '$GasStationID'}}}])
Results :  [{'_id': None, 'result': [813, 2572, 4331, 3474, 2527, 4376, 1715, 3429, 1625, 3519, 5278, 768, 2617, 1670, 723, 4421, 5368, 3609, 948, 2707, 91, 46, 1850, 993, 3564, 5413, 2662, 1895, 1760, 2797, 5458, 858, 3699, 1805, 2752, 5503, 903, 3654, 2887, 226, 3789, 5548, 2842, 2030, 1128, 3744, 5593, 1038, 3834, 181, 1940, 2932, 5638, 1083, 136, 1985, 5683, 2165, 3924, 406, 1263, 3022, 361, 1308, 3879, 2977, 2210, 5728, 316, 3112, 2075, 4014, 1173, 271, 1218, 2120, 3067, 3969



Reconstructed Query :  get Description products where ProductID = <number>
Parse Tree :  (S
  (CMD get)
  (COLUMN (CAT Description))
  (TABLE products)
  (CLAUSE
    where
    (COLUMN (ORDCAT ProductID))
    (NOP =)
    (VALUE <number>)
    (CLAUSE )))
SQL :  SELECT Description FROM products WHERE ProductID = 5
NoSQL :  products.aggregate([{'$match': {'ProductID': {'$eq': 5}}}, {'$project': {'Description': 1}}])
Results :  [{'_id': ObjectId('67452b0d9d6b2993a8e4e27b'), 'Description': 'Natural'}]
####################################################################################################
NL Query :  Fetch the minimum Price from transactions_1k.
Reconstructed Query :  fetch minimum Price transactions_1k
Parse Tree :  (S
  (CMD fetch)
  (AGG (MIN minimum))
  (COLUMN (NUM Price))
  (CLAUSE )
  (TABLE transactions_1k))
SQL :  SELECT MIN(Price) FROM transactions_1k 
NoSQL :  transactions_1k.aggregate([{'$group': {'_id': None, 'result': {'$min': '$Price'}}}])
Results :  [{'_id': None,



Reconstructed Query :  fetch variance Amount transactions_1k where Price > <number>
Parse Tree :  (S
  (CMD fetch)
  (AGG (VARIANCE variance))
  (COLUMN (NUM Amount))
  (TABLE transactions_1k)
  (CLAUSE
    where
    (COLUMN (NUM Price))
    (NOP >)
    (VALUE <number>)
    (CLAUSE )))
SQL :  SELECT VARIANCE(Amount) FROM transactions_1k WHERE Price > 100
NoSQL :  transactions_1k.aggregate([{'$match': {'Price': {'$gt': 100}}}, {'$group': {'_id': None, 'result': {'$stdDevSamp': '$Amount'}}}])
Results :  [{'_id': None, 'result': 21.185836377630338}]
####################################################################################################
NL Query :  Get CardID from transactions_1k.
Reconstructed Query :  get CardID transactions_1k
Parse Tree :  (S (CMD get) (COLUMN (CAT CardID)) (CLAUSE ) (TABLE transactions_1k))
SQL :  SELECT CardID FROM transactions_1k 
NoSQL :  transactions_1k.aggregate([{'$project': {'CardID': 1}}])
Results :  [{'_id': ObjectId('67452b0c9d6b2993a8e45fc2'), 

In [5]:
print(tester.cfg)


    S -> CMD "*" TABLE CLAUSE | CMD COLUMN TABLE CLAUSE | CMD AGG COLUMN TABLE CLAUSE | CMD "*" CLAUSE TABLE | CMD COLUMN CLAUSE TABLE| CMD AGG COLUMN CLAUSE TABLE 
    CLAUSE -> "where" COLUMN NOP VALUE CLAUSE | "group" "by" COLUMN CLAUSE | "order" "by" COLUMN CLAUSE 
    CLAUSE -> 
    COLUMN -> CAT COLUMN | NUM COLUMN | NOMCAT COLUMN | ORDCAT COLUMN | CAT AGG COLUMN | NUM AGG COLUMN | NOMCAT AGG COLUMN | ORDCAT AGG COLUMN
    COLUMN -> CAT | NUM | NOMCAT | ORDCAT
    CAT -> "_id" | "`Academic" "Year`" | "CDSCode" | "`Charter" "Funding" "Type`" | "`Charter" "School" "Number`" | "`County" "Code`" | "`County" "Name`" | "`District" "Name`" | "`District" "Type`" | "`Educational" "Option" "Type`" | "`High" "Grade`" | "`Low" "Grade`" | "`NSLP" "Provision" "Status`" | "`School" "Code`" | "`School" "Name`" | "`School" "Type`"
    NUM -> "`Enrollment" "(Ages" "5-17)`" | "`Enrollment" "(K-12)`" | "`FRPM" "Count" "(Ages" "5-17)`" | "`FRPM" "Count" "(K-12)`" | "`Free" "Meal" "Count" "(Ages" "5-

In [8]:
import re
def _split_ignore_quoticks(s):
    return re.findall(r"'[^']*'|`[^`]*`|\S+", s)


def _split_ignore_ticks(s):
    return re.findall(r"`[^`]*`|\S+", s)

_split_ignore_quoticks("Extract `Percent-(%)-Eligible-FRPM-(Ages-5-17)` where name = 'praneeth'")

['Extract',
 '`Percent-(%)-Eligible-FRPM-(Ages-5-17)`',
 'where',
 'name',
 '=',
 "'praneeth'"]

In [3]:
print(tester.cfg)


    S -> CMD "*" TABLE CLAUSE | CMD COLUMN TABLE CLAUSE | CMD AGG COLUMN TABLE CLAUSE | CMD "*" CLAUSE TABLE | CMD COLUMN CLAUSE TABLE| CMD AGG COLUMN CLAUSE TABLE 
    CLAUSE -> "where" COLUMN NOP VALUE CLAUSE | "group" "by" COLUMN CLAUSE | "order" "by" COLUMN CLAUSE 
    CLAUSE -> 
    COLUMN -> CAT COLUMN | NUM COLUMN | NOMCAT COLUMN | ORDCAT COLUMN | CAT AGG COLUMN | NUM AGG COLUMN | NOMCAT AGG COLUMN | ORDCAT AGG COLUMN
    COLUMN -> CAT | NUM | NOMCAT | ORDCAT
    CAT -> "_id" | "Academic-Year" | "CDSCode" | "Charter-Funding-Type" | "Charter-School-Number" | "County-Code" | "County-Name" | "District-Name" | "District-Type" | "Educational-Option-Type" | "High-Grade" | "Low-Grade" | "NSLP-Provision-Status" | "School-Code" | "School-Name" | "School-Type"
    NUM -> "Enrollment-(Ages-5-17)" | "Enrollment-(K-12)" | "FRPM-Count-(Ages-5-17)" | "FRPM-Count-(K-12)" | "Free-Meal-Count-(Ages-5-17)" | "Free-Meal-Count-(K-12)" | "Percent-(%)-Eligible-FRPM-(Ages-5-17)" | "Percent-(%)-Eligible

In [3]:
int("'123'")

ValueError: invalid literal for int() with base 10: "'123'"

In [4]:
print(tester.parser)

AttributeError: 'NoSQLTestCases' object has no attribute 'parser'

In [7]:
tester.schema_description

{'transactions_1k': {'Amount': [1000,
   'int',
   'num',
   'non-unique',
   'non-null',
   0,
   264,
   []],
  'CardID': [1000, 'int', 'cat', 'non-unique', 'non-null', 99745, 753582, []],
  'CustomerID': [1000,
   'int',
   'cat',
   'non-unique',
   'non-null',
   7626,
   49788,
   []],
  'Date': [1000,
   'str',
   'date',
   'non-unique',
   'non-null',
   '2012-08-23',
   '2012-08-24',
   []],
  'GasStationID': [1000, 'int', 'cat', 'non-unique', 'non-null', 48, 3869, []],
  'Price': [1000,
   'float',
   'num',
   'non-unique',
   'non-null',
   15.41,
   5762.49,
   []],
  'ProductID': [1000, 'int', 'cat', 'non-unique', 'non-null', 2, 23, []],
  'Time': [1000,
   'str',
   'time',
   'non-unique',
   'non-null',
   '03:53:00',
   '23:59:00',
   []],
  'TransactionID': [1000,
   'int',
   'ordinal-cat',
   'unique',
   'non-null',
   1,
   1000,
   []]},
 'products': {'Description': [200,
   'str',
   'cat',
   'non-unique',
   'non-null',
   -1,
   -1,
   ['Rucní zadání', 'Naf

In [5]:
a = set([4, 5, 6])
b = set([1, 2, 3])

a = a.union(b)
a

{1, 2, 3, 4, 5, 6}

In [4]:
import re
re.match('[^a-zA-Z0-9\s-]*([\w\s-]+)[^a-zA-Z0-9\s-]*', "'SME'.").group(1).replace("-", " ")

'SME'

In [5]:
from engine import STRING_STORE

STRING_STORE

RULES FOR NLQ (NATURAL LANGUAGE QUERY)
*1 Always use KEYWORDS [where, order by, group by] in NL; slight deviations allowed
*2 Always join multiword operators with '-' [standard-deviation, minimum-of /etc]; other connectors allowed except space
*3 Where clauses don't support pure cat field comparisions 



[nltk_data] Error loading stopwords: <urlopen error [SSL:
[nltk_data]     CERTIFICATE_VERIFY_FAILED] certificate verify failed:
[nltk_data]     unable to get local issuer certificate (_ssl.c:1002)>


[]

In [None]:
tester = SQLTestCases('thrombosis_prediction')
cases = tester.t 
list(map(tester, cases))

t


####################################################################################################
NL Query :  Get the average of ANA where Thrombosis = 1 from Examination.




Reconstructed Query :  get average ANA where Thrombosis = <number> Examination
Parse Tree :  (S
  (CMD get)
  (AGG average)
  (COLUMN (NOMCAT ANA))
  (CLAUSE
    where
    (COLUMN (NOMCAT Thrombosis))
    (NOP =)
    (VALUE <number>)
    (CLAUSE ))
  (TABLE Examination))
SQL :  SELECT AVG(ANA) FROM Examination WHERE Thrombosis = 1
Results :  [(639.2452830188679,)]
####################################################################################################
NL Query :  Retrieve the sum of ANA where Thrombosis = 2 in Examination.




Reconstructed Query :  retrieve summation ANA where Thrombosis = <number> Examination
Parse Tree :  (S
  (CMD retrieve)
  (AGG summation)
  (COLUMN (NOMCAT ANA))
  (CLAUSE
    where
    (COLUMN (NOMCAT Thrombosis))
    (NOP =)
    (VALUE <number>)
    (CLAUSE ))
  (TABLE Examination))
SQL :  SELECT SUM(ANA) FROM Examination WHERE Thrombosis = 2
Results :  [(27848,)]
####################################################################################################
NL Query :  Fetch unique values of Symptoms where Thrombosis = 3 from Examination.




Reconstructed Query :  fetch Symptoms where Thrombosis = <number> Examination
Parse Tree :  (S
  (CMD fetch)
  (COLUMN (CAT Symptoms))
  (CLAUSE
    where
    (COLUMN (NOMCAT Thrombosis))
    (NOP =)
    (VALUE <number>)
    (CLAUSE ))
  (TABLE Examination))
SQL :  SELECT Symptoms FROM Examination WHERE Thrombosis = 3
Results :  [('thrombocytepenia',), ('thrombocytopenia',), ('thrombocytopenia',)]
####################################################################################################
NL Query :  Fetch distinct Diagnosis values where Thrombosis = 2 in Examination.




Reconstructed Query :  fetch distinct Diagnosis where Thrombosis = <number> Examination
Parse Tree :  (S
  (CMD fetch)
  (AGG distinct)
  (COLUMN (CAT Diagnosis))
  (CLAUSE
    where
    (COLUMN (NOMCAT Thrombosis))
    (NOP =)
    (VALUE <number>)
    (CLAUSE ))
  (TABLE Examination))
SQL :  SELECT DISTINCT(Diagnosis) FROM Examination WHERE Thrombosis = 2
Results :  [(None,), ('SLE',), ('SLE+Psy',)]
####################################################################################################
NL Query :  Fetch the maximum value of GOT where GOT > 100 from Laboratory.




Reconstructed Query :  fetch maximum GOT where GOT > <number> Laboratory
Parse Tree :  (S
  (CMD fetch)
  (AGG maximum)
  (COLUMN (NOMCAT GOT))
  (CLAUSE
    where
    (COLUMN (NOMCAT GOT))
    (NOP >)
    (VALUE <number>)
    (CLAUSE ))
  (TABLE Laboratory))
SQL :  SELECT MAX(GOT) FROM Laboratory WHERE GOT > 100
Results :  [(21480,)]
####################################################################################################
NL Query :  Get the average of CPK where TG > 100 from Laboratory.




Reconstructed Query :  get average CPK where TG > <number> Laboratory
Parse Tree :  (S
  (CMD get)
  (AGG average)
  (COLUMN (NOMCAT CPK))
  (CLAUSE
    where
    (COLUMN (NOMCAT TG))
    (NOP >)
    (VALUE <number>)
    (CLAUSE ))
  (TABLE Laboratory))
SQL :  SELECT AVG(CPK) FROM Laboratory WHERE TG > 100
Results :  [(119.23266219239373,)]
####################################################################################################
NL Query :  Retrieve the average value of ALB where HGB > 12 from Laboratory.




Reconstructed Query :  retrieve average ALB where HGB > <number> Laboratory
Parse Tree :  (S
  (CMD retrieve)
  (AGG average)
  (COLUMN (NUM ALB))
  (CLAUSE
    where
    (COLUMN (NUM HGB))
    (NOP >)
    (VALUE <number>)
    (CLAUSE ))
  (TABLE Laboratory))
SQL :  SELECT AVG(ALB) FROM Laboratory WHERE HGB > 12
Results :  [(4.281120716270456,)]
####################################################################################################
NL Query :  Get the total count of RBC where T-BIL > 0.2 in Laboratory.




Reconstructed Query :  get count RBC where T-BIL > <number> Laboratory
Parse Tree :  (S
  (CMD get)
  (AGG count)
  (COLUMN (NUM RBC))
  (CLAUSE
    where
    (COLUMN (NUM T-BIL))
    (NOP >)
    (VALUE <number>)
    (CLAUSE ))
  (TABLE Laboratory))
SQL :  SELECT COUNT(RBC) FROM Laboratory WHERE T-BIL > 0.2
Results :  []
####################################################################################################
NL Query :  Fetch the range of UA values where CRP = 2 in Laboratory.




Reconstructed Query :  fetch RA UA where CRP = <number> Laboratory
Parse Tree :  (S
  (CMD fetch)
  (COLUMN (CAT RA) (COLUMN (NUM UA)))
  (CLAUSE
    where
    (COLUMN (CAT CRP))
    (NOP =)
    (VALUE <number>)
    (CLAUSE ))
  (TABLE Laboratory))
SQL :  SELECT RA, UA FROM Laboratory WHERE CRP = 2
Results :  [('-', 7.0), (None, 8.3), (None, 6.6)]
####################################################################################################
NL Query :  Retrieve the minimum value of ALP where ALP > 15 in Laboratory.




Reconstructed Query :  retrieve minimum ALP where ALP > <number> Laboratory
Parse Tree :  (S
  (CMD retrieve)
  (AGG minimum)
  (COLUMN (NOMCAT ALP))
  (CLAUSE
    where
    (COLUMN (NOMCAT ALP))
    (NOP >)
    (VALUE <number>)
    (CLAUSE ))
  (TABLE Laboratory))
SQL :  SELECT MIN(ALP) FROM Laboratory WHERE ALP > 15
Results :  [(22,)]
####################################################################################################
NL Query :  Fetch the total count of PatientID where SEX = 1 from Patient.




Reconstructed Query :  fetch count ID where SEX = <number> Patient
Parse Tree :  (S
  (CMD fetch)
  (AGG count)
  (COLUMN (NOMCAT ID))
  (CLAUSE
    where
    (COLUMN (CAT SEX))
    (NOP =)
    (VALUE <number>)
    (CLAUSE ))
  (TABLE Patient))
SQL :  SELECT COUNT(ID) FROM Patient WHERE SEX = 1
Results :  [(0,)]
####################################################################################################
NL Query :  Get all unique values of SEX where ID = 4060811 from Patient.




Reconstructed Query :  get SEX where ID = <number> Patient
Parse Tree :  (S
  (CMD get)
  (COLUMN (CAT SEX))
  (CLAUSE
    where
    (COLUMN (NOMCAT ID))
    (NOP =)
    (VALUE <number>)
    (CLAUSE ))
  (TABLE Patient))
SQL :  SELECT SEX FROM Patient WHERE ID = 4060811
Results :  [('F',)]
####################################################################################################
NL Query :  Fetch all non-null Description values where PatientID = 1124385 from Patient.




Reconstructed Query :  fetch Description where ID = <number> Patient
Parse Tree :  (S
  (CMD fetch)
  (COLUMN (CAT Description))
  (CLAUSE
    where
    (COLUMN (NOMCAT ID))
    (NOP =)
    (VALUE <number>)
    (CLAUSE ))
  (TABLE Patient))
SQL :  SELECT Description FROM Patient WHERE ID = 1124385
Results :  [('1997-07-31',)]


[[(639.2452830188679,)],
 [(27848,)],
 [('thrombocytepenia',), ('thrombocytopenia',), ('thrombocytopenia',)],
 [(None,), ('SLE',), ('SLE+Psy',)],
 [(21480,)],
 [(119.23266219239373,)],
 [(4.281120716270456,)],
 [],
 [('-', 7.0), (None, 8.3), (None, 6.6)],
 [(22,)],
 [(0,)],
 [('F',)],
 [('1997-07-31',)]]

In [5]:
import re 

re.match('[^a-zA-Z0-9\s-]*([\w\s-]+)[^a-zA-Z0-9\s-]*', "'hello-john'.").group(1)

'hello-john'