# Chaining search


## Corpus search

### Issue your search query
* Run the cell below to show the Corpus search UI
* Fill in your search query in the UI

In [6]:
import ipywidgets as widgets
from IPython.display import display

DEFAULT_QUERY = r'[lemma="boek"]'
DEFAULT_CORPUS = "chn"

# Create UI elements
corpusQueryField = widgets.Text(description="<b>CQL query:</b>", value=DEFAULT_QUERY)
corpusField = widgets.Dropdown(
    options=['chn', 'opensonar', 'zeebrieven', 'gysseling', 'nederlab'],
    value=DEFAULT_CORPUS,
    description='<b>Corpus:</b>',
)
'''corpusSearchButton = widgets.Button(
    description='Search',
    button_style='info', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Search',
)
# Handle events
corpusSearchButton.on_click(corpus_search)'''

# Stack UI elements in vertical box and display
corpusUiBox = widgets.VBox([corpusQueryField,corpusField])
display(corpusUiBox)


VBox(children=(Text(value='[lemma="boek"]', description='<b>CQL query:</b>'), Dropdown(description='<b>Corpus:…

### Search!
 * Run the cell below to search

In [7]:
import requests
import pandas as pd
import xml.etree.ElementTree as ET

def parse_xml(text):
    # TODO: should we secure against untrusted XML?
    root = ET.fromstring(text)
    records = []
    for entry in root.iter("{http://clarin.eu/fcs/resource}ResourceFragment"):
        for dataView in entry.findall("{http://clarin.eu/fcs/resource}DataView"):
            # We only take into account hits, ignore metadata and segmenting dataViews
            if (dataView.get("type")=="application/x-clarin-fcs-hits+xml"):
                result = dataView.find("{http://clarin.eu/fcs/dataview/hits}Result")
                records.append(list(result.itertext()))
    return pd.DataFrame(records, columns = ["left context", "word", "right context"])

cquery = corpusQueryField.value
corpus = corpusField.value
# Do request to federated content search corpora, so we get same output format for every corpus
url = "http://portal.clarin.inl.nl/fcscorpora/clariah-fcs-endpoints/sru?operation=searchRetrieve&queryType=fcs&x-fcs-context=" + corpus + "&maximumRecords=20&query=" + cquery;
response = requests.get(url)
print(url)
response_text = response.text
df = parse_xml(response_text)
display(df)



http://portal.clarin.inl.nl/fcscorpora/clariah-fcs-endpoints/sru?operation=searchRetrieve&queryType=fcs&x-fcs-context=opensonar&maximumRecords=20&query=[lemma="bok"]


Unnamed: 0,left context,word,right context
0,garantie te mokken en te,bokken,ze en tis niks dat
1,dag in bed liggen en,bokken,das mijn oplossing en denken
2,kameel errond krijgen of stinkende,bok,: - P
3,vreemdeling is steeds de zonde,bok,en groot dutske en de
4,voor lesbo's allemaal over den,bok,springen dan scheurt het ook
5,", weer zo een bruine",bok,"minder , er moesten er"
6,genomen en Leterme bij de,bok,"heeft gezet , tenzij ..."
7,: Meneer of mevrouw de,Bok,? Bob of Annie de
8,np 17 oktober?Kijk eens op,bokkie,zijn site en je zal
9,in een stal bij een,bok,konden blijven . De fransman


## Lexicon search

### Issue your search query
* Run the cell below to show the Lexicon search UI
* Fill in your search query in the UI

In [8]:
import ipywidgets as widgets
from IPython.display import display

DEFAULT_SEARCHWORD = 'boek'
DEFAULT_LEXICON = "diamant"

# Create UI elements
lexQueryField = widgets.Text(description="<b>Word:</b>", value=DEFAULT_SEARCHWORD)
lexiconField = widgets.Dropdown(
    options=['diamant'],
    value=DEFAULT_LEXICON,
    description='<b>Lexicon:</b>',
)
'''lexSearchButton = widgets.Button(
    description='Search',
    button_style='info', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Search',
)
# Handle events
lexSearchButton.on_click(lexicon_search)'''
# Stack UI elements in vertical box and display
lexUiBox = widgets.VBox([lexQueryField,lexiconField])
display(lexUiBox)



VBox(children=(Text(value='boek', description='<b>Word:</b>'), Dropdown(description='<b>Lexicon:</b>', options…

### Search!
 * Run the cell below to search

In [10]:
import requests
import pandas as pd
import json

lquery = lexQueryField.value
lexicon = lexiconField.value
if (lexicon=="anw"):
    subpart = 'FILTER ( regex(?lemma, "'+lquery+'") || regex(?definition, "'+lquery+'") ) .\n'
    query = """PREFIX ontolex: <http://www.w3.org/ns/lemon/ontolex#>\n
              PREFIX anw: <http://rdf.ivdnt.org/lexica/anw>\n
              PREFIX anwsch: <http://rdf.ivdnt.org/schema/anw/>\n
              PREFIX lemon: <http://lemon-model.net/lemon#>\n
              \n
              SELECT ?lemId ?lemma ?writtenForm ?definition ?definitionComplement\n
              FROM <http://rdf.ivdnt.org/lexica/anw>\n
              WHERE {\n
                  ?lemId rdfs:label ?lemma .\n
                  ?lemId ontolex:sense ?senseId .\n
                  ?senseId lemon:definition ?definitionId .\n
                  ?definitionId lemon:value ?definition .\n
                  OPTIONAL { ?definitionId anwsch:definitionComplement ?definitionComplement .}\n
                  OPTIONAL { ?lemId ontolex:canonicalForm ?lemCFId . \n
                      ?lemCFId ontolex:writtenRepresentation ?writtenForm . }\n
                  """+subpart+"""\n
                  }\n"""
elif (lexicon=="diamant"):
    query = """
    PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
    prefix prov: <http://www.w3.org/ns/prov#>
    prefix diamant: <http://rdf.ivdnt.org/schema/diamant#>
    prefix lexinfo: <http://www.lexinfo.net/ontology/2.0/lexinfo#>
    prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    prefix lemon: <http://lemon-model.net/lemon#>
    prefix ontolex: <http://www.w3.org/ns/lemon/ontolex#>
    prefix ud: <http://universaldependencies.org/u/pos/>
    prefix skos: <http://www.w3.org/2004/02/skos/core#>
    prefix dcterms: <http://purl.org/dc/terms/>
    prefix dc: <http://purl.org/dc/terms/>

    select ?n_entry ?n_form ?n_ontolex_writtenRep ?n_syndef ?n_sensedef ?n_sensedef_definitionText ?n_syndef_definitionText ?n_sense ?inputMode ?wy_f_show ?wy_t_show
    where
    {
    graph ?g
    {
    {
        { ?n_form ontolex:writtenRep ?n_ontolex_writtenRep .
          values ?n_ontolex_writtenRep  { \"""" + lquery + """\" } } .
        { ?n_entry a ontolex:LexicalEntry} .
        { ?n_form a ontolex:Form} .
        { ?n_sense a ontolex:LexicalSense} .
        { ?n_syndef a diamant:SynonymDefinition} .
        { ?n_sensedef a lemon:SenseDefinition} .
        { ?n_syndef diamant:definitionText ?n_syndef_definitionText } .
        { ?n_sensedef diamant:definitionText ?n_sensedef_definitionText } .
        { ?n_entry ontolex:canonicalForm ?n_form } .
        { ?n_entry ontolex:sense ?n_sense } .
        { ?n_sense lemon:definition ?n_syndef } .
        { ?n_sense lemon:definition ?n_sensedef } .
          ?n_sense diamant:attestation ?n_attest_show .
          ?n_sense diamant:attestation ?n_attest_filter .
          ?n_attest_show diamant:text ?n_q_show .
          ?n_attest_filter diamant:text ?n_q_filter .
          ?n_attest_show a diamant:Attestation .
          ?n_attest_filter a diamant:Attestation .
          ?n_q_filter a diamant:Quotation .
          ?n_q_show a diamant:Quotation .
          ?n_q_filter diamant:witnessYearFrom ?wy_f_filter .
          ?n_q_filter diamant:witnessYearTo ?wy_t_filter .
          ?n_q_show diamant:witnessYearFrom ?wy_f_show .
          ?n_q_show diamant:witnessYearTo ?wy_t_show .
          FILTER (xsd:integer(?wy_f_show) >= 1200)
          FILTER (xsd:integer(?wy_t_show) >= 1200)
          FILTER (xsd:integer(?wy_f_show) <= 2018)
          FILTER (xsd:integer(?wy_t_show) <= 2018)
        { bind("lemma" as ?inputMode) } .
        } UNION
      {
        { ?n_syndef diamant:definitionText ?n_syndef_definitionText .
        values ?n_syndef_definitionText  { \"""" + lquery + """\" } } .
        { ?n_sense a ontolex:LexicalSense} .
        { ?n_syndef a diamant:SynonymDefinition} .
        { ?n_sensedef a lemon:SenseDefinition} .
        { ?n_form a ontolex:Form} .
        { ?n_form ontolex:writtenRep ?n_ontolex_writtenRep } .  { ?n_entry a ontolex:LexicalEntry} .
        { ?n_entry ontolex:sense ?n_sense } .
        { ?n_sense lemon:definition ?n_syndef } .
        { ?n_sense lemon:definition ?n_sensedef } .
        { ?n_sensedef diamant:definitionText ?n_sensedef_definitionText } .
        { ?n_entry ontolex:canonicalForm ?n_form } .
        ?n_sense diamant:attestation ?n_attest_show .
        ?n_sense diamant:attestation ?n_attest_filter .
        ?n_attest_filter diamant:text ?n_q_filter .
        ?n_attest_show diamant:text ?n_q_show .
        ?n_q_filter diamant:witnessYearFrom ?wy_f_filter .
        ?n_q_filter diamant:witnessYearTo ?wy_t_filter .
        ?n_q_show diamant:witnessYearFrom ?wy_f_show .
        ?n_q_show diamant:witnessYearTo ?wy_t_show .
        ?n_attest_show a diamant:Attestation .
        ?n_attest_filter a diamant:Attestation .
        ?n_q_filter a diamant:Quotation .
        ?n_q_show a diamant:Quotation .
        FILTER (xsd:integer(?wy_f_show) >= 1200)
        FILTER (xsd:integer(?wy_t_show) >= 1200)
        FILTER (xsd:integer(?wy_f_show) <= 2018)
        FILTER (xsd:integer(?wy_t_show) <= 2018)
      { bind("defText" as ?inputMode) } .
        }
    }
    }"""
    endpoint = "http://svprre02:8080/fuseki/tdb/sparql"
    url = endpoint #+ "?query=" + query
    response = requests.post(url,data={"query":query})
    response_json = json.loads(response.text)
    records_json = response_json["results"]["bindings"]
    records_string = json.dumps(records_json)
    df = pd.read_json(records_string, orient="records")
    df = df.applymap(lambda x: x["value"])
    df_relevant = df[["inputMode", "n_ontolex_writtenRep", "n_syndef_definitionText", "n_sensedef_definitionText", "wy_f_show", "wy_t_show"]]
    display(df_relevant)

Unnamed: 0,inputMode,n_ontolex_writtenRep,n_syndef_definitionText,n_sensedef_definitionText,wy_f_show,wy_t_show
0,lemma,boek,acte,"Ook in den zin van officieel stuk, acte, oorko...",1228,1349
1,lemma,boek,acte,"Ook in den zin van officieel stuk, acte, oorko...",1228,1349
2,lemma,boek,acte,"Ook in den zin van officieel stuk, acte, oorko...",1456,1456
3,lemma,boek,acte,"Ook in den zin van officieel stuk, acte, oorko...",1456,1456
4,lemma,boek,oorkonde,"Ook in den zin van officieel stuk, acte, oorko...",1228,1349
5,lemma,boek,oorkonde,"Ook in den zin van officieel stuk, acte, oorko...",1228,1349
6,lemma,boek,oorkonde,"Ook in den zin van officieel stuk, acte, oorko...",1456,1456
7,lemma,boek,oorkonde,"Ook in den zin van officieel stuk, acte, oorko...",1456,1456
8,lemma,boek,Boek,Boek.,1460,1480
9,lemma,boek,Boek,Boek.,1460,1480
