# Settings for Sinequa Server to Query

In [1]:
import requests
import os
import json
from urllib.parse import urljoin
from pprint import pprint
USER = os.getenv('SINEQUA_USER')
PASS = os.getenv('SINEQUA_PASS')
ROOT_URL = os.getenv('SINEQUA_ROOT_URL')
APP = os.getenv('SINEQUA_APP')
QAPP = os.getenv('SINEQUA_QUERY_APP')
QENDPT = os.getenv('SINEQUA_ENDPOINT_QUERY')
query_url = urljoin(ROOT_URL, QENDPT)
print("Configured:")
print("query endpoint: {}".format(query_url))
print("app: {}".format(APP))
print ("query app: {}".format(QAPP))

Configured:
query endpoint: http://ec2-54-156-88-118.compute-1.amazonaws.com/api/v1/search.query
app: nasa-sba-smd
query app: query-smd-primary


# Search Query Setup

In [2]:
query = 'osiris'
print("Set up keyword search for \"{}\".".format(query))

Set up keyword search for "osiris".


# Define Functions to Query Sinequa and Summarize Response

In [3]:
def sinequa_query(query_url, payload):
    headers = {
        'Content-Type': 'text/plain'
    }
    resp = requests.post(
        query_url, # replace with your SDE instance API URL
        data=json.dumps(payload), 
        headers=headers
    )
    return resp.json()

def search_resp_summary(resp_json):
    #pprint(resp_json)
    if 'records' not in resp_json:
        print ("No records found:")
        pprint(resp_json)
        return 0
    records = resp_json['records']
    nrecords = len(records)
    print("Response has {} records".format(nrecords))
    if nrecords > 0:
        print("First record:")
        pprint(records[0])
    return nrecords
    
print("finished")

finished


# Basic Keyword Search

In [4]:
payload_keyword_search = {
    "user": USER, # replace with your username
    "password": PASS, # replace with your password
    "app": APP, # replace with your app name
    "query": {
        "name": QAPP, # replace with your query app name
        "text": query, # replace with your query
    },
}
resp_keyword_search = sinequa_query(
    query_url,
    payload_keyword_search
)
nrecords_keyword_search = search_resp_summary(
    resp_keyword_search
)

Response has 10 records
First record:
{'collection': ['/SMD_Planetary/PDS_API_Dataset_Only/'],
 'databasealias': 'NASA_PDS',
 'docformat': 'htm',
 'documentweight': 'default',
 'extracts': [{'highlighted': 'ROSETTA {b}OSIRIS{nb} SHAPE MODELS OF COMET '
                              '67P/C-G '
                              'urn:nasa:pds:context_pds3:data_set:data_set.ro-c-osinac-osiwac-5-67p-shape-v1.',
               'locations': '0,123',
               'originalLocations': '1170,459',
               'score': '49152'},
              {'highlighted': '0 {b}OSIRIS{nb} - WIDE ANGLE CAMERA ',
               'locations': '696,29',
               'originalLocations': '4288,133',
               'score': '49152'},
              {'highlighted': '{b}OSIRIS{nb} - NARROW ANGLE CAMERA Not '
                              'Provided INTERNATIONAL ROSETTA MISSION Not '
                              'Provided 2015-01-11T23:20:32Z 2015-01-08 '
                              'A.C.Raugh Creation Product_Data

# Paging of Search Results

In [5]:
page_size = 20 # number of documents per page
page = 2 # page of results to retrieve
payload_keyword_search_with_paging = {
    "user": USER, # replace with your username
    "password": PASS, # replace with your password
    "app": APP, # replace with your app name
    "query": {
        "name": QAPP, # replace with your query app name
        "text": query, # replace with your query
        "pageSize": page_size, # <--- replace with your desired page size
        "page": page # <--- replace with your desired page number
    },
}
resp_keyword_search_with_paging = sinequa_query(
    query_url,
    payload_keyword_search_with_paging
)
nrecords_keyword_search_with_paging = search_resp_summary(
    resp_keyword_search_with_paging
)

Response has 20 records
First record:
{'collection': ['/SMD_Planetary/PDS_API_Dataset_Only/'],
 'databasealias': 'NASA_PDS',
 'docformat': 'htm',
 'documentweight': 'default',
 'extracts': [{'highlighted': '4 {b}OSIRIS{nb} - NARROW ANGLE CAMERA Not '
                              'Provided INTERNATIONAL ROSETTA MISSION Not '
                              'Provided 2019-01-11T00:00:00Z SFH 2010-11-12 '
                              'Product_Data_Set_PDS3 Not Provided Not Provided '
                              'Not Provided Not Provided PDS3 Not Provided Not '
                              'Provided Not Provided Not Provided '
                              'Product_Data_Set_PDS3 '
                              'https://archives.esac.esa.int/psa/pdap/metadata?DATA_SET_ID=RO-X-OSINAC-2-CR4B-CRUISE4B-V1.4&RETURN_TYPE=HTML '
                              'Not Provided '
                              'urn:esa:psa:context_pds3:resource:resource.ro-x-osinac-2-cr4b-cruise4b-v1.',
             

# Sort by column "size" descending

In [6]:
payload_sort_by_size_descending = {
    "user": USER, # replace with your username
    "password": PASS, # replace with your password
    "app": APP, # replace with your app name
    "query": {
        "name": QAPP, # replace with your query app name
        "text": query, # replace with your query
        "orderBy": "size desc", # <-- Enter column name and "asc" or "desc" for sort order
    },
}
resp_sort_by_size_descending = sinequa_query(
    query_url,
    payload_sort_by_size_descending
)
nrecords_sort_by_size_descending = search_resp_summary(
    resp_sort_by_size_descending
)

Response has 10 records
First record:
{'collection': ['/SMD_Planetary/PDS_API_Dataset_Only/'],
 'databasealias': 'NASA_PDS',
 'docformat': 'htm',
 'documentweight': 'default',
 'extracts': [{'highlighted': '0 Product_Data_Set_PDS3 PDS3 esa Not Provided '
                              'Not Provided Sierks, H. and the {b}OSIRIS{nb} '
                              'Team, ROSETTA-ORBITER ROSETTA EXTENSION 2 '
                              'OSINAC 2 EDR MTP029 V1.',
               'locations': '143,150',
               'originalLocations': '1649,813',
               'score': '49152'},
              {'highlighted': '0 {b}OSIRIS{nb} - NARROW ANGLE CAMERA Not '
                              'Provided INTERNATIONAL ROSETTA MISSION Not '
                              'Provided 2019-01-11T00:00:00Z '
                              'RO-RIS-MPAE-ID-015 4/c Product_Data_Set_PDS3 '
                              'Not Provided Not Provided Not Provided Not '
                              'Provided PDS3 

# Sort by column "size" ascending

In [7]:
payload_sort_by_size_ascending = {
    "user": USER, # replace with your username
    "password": PASS, # replace with your password
    "app": APP, # replace with your app name
    "query": {
        "name": QAPP, # replace with your query app name
        "text": query, # replace with your query
        "orderBy": "size asc", # <-- Enter column name and "asc" or "desc" for sort order
    },
}
resp_sort_by_size_ascending = sinequa_query(
    query_url,
    payload_sort_by_size_ascending
)
nrecords_sort_by_size_ascending = search_resp_summary(
    resp_sort_by_size_ascending
)

Response has 10 records
First record:
{'collection': ['/SMD_Planetary/PDS_API_Dataset_Only/'],
 'databasealias': 'NASA_PDS',
 'docformat': 'htm',
 'documentweight': 'default',
 'extracts': [{'highlighted': '2 {b}OSIRIS{nb} - WIDE ANGLE CAMERA Not '
                              'Provided INTERNATIONAL ROSETTA MISSION Not '
                              'Provided 2019-01-11T00:00:00Z SFH 2010-11-12 '
                              'Product_Data_Set_PDS3 Not Provided Not Provided '
                              'Not Provided Not Provided PDS3 Not Provided Not '
                              'Provided Not Provided Not Provided '
                              'Product_Data_Set_PDS3 '
                              'https://archives.esac.esa.int/psa/pdap/metadata?DATA_SET_ID=RO-X-OSIWAC-3-CR5-CRUISE5-V1.2&RETURN_TYPE=HTML '
                              'Not Provided '
                              'urn:esa:psa:context_pds3:resource:resource.ro-x-osiwac-3-cr5-cruise5-v1.',
               'loc

# Filter by tab - no match to "Images"

In [8]:
payload_filter_by_tab_nomatch = {
    "user": USER, # replace with your username
    "password": PASS, # replace with your password
    "app": APP, # replace with your app name
    "query": {
        "name": QAPP, # replace with your query app name
        "text": query, # replace with your query
        "pageSize": 20,
        "page": 2,
        "tab": "Images", # <--- Enter tab label
    },
}
resp_filter_by_tab_nomatch = sinequa_query(
    query_url,
    payload_filter_by_tab_nomatch
)
nrecords_filter_by_tab_nomatch = search_resp_summary(
    resp_filter_by_tab_nomatch
)

Response has 0 records


# Filter by tab - matches to "Data"

In [9]:
payload_filter_by_tab_match = {
    "user": USER, # replace with your username
    "password": PASS, # replace with your password
    "app": APP, # replace with your app name
    "query": {
        "name": QAPP, # replace with your query app name
        "text": query, # replace with your query
        "pageSize": 20,
        "page": 2,
        "tab": "Data", # <--- Enter tab label
    },
}
resp_filter_by_tab_match = sinequa_query(
    query_url,
    payload_filter_by_tab_match
)
nrecords_filter_by_tab_match = search_resp_summary(
    resp_filter_by_tab_match
)

Response has 20 records
First record:
{'collection': ['/SMD_Planetary/PDS_API_Dataset_Only/'],
 'databasealias': 'NASA_PDS',
 'docformat': 'htm',
 'documentweight': 'default',
 'extracts': [{'highlighted': '4 {b}OSIRIS{nb} - NARROW ANGLE CAMERA Not '
                              'Provided INTERNATIONAL ROSETTA MISSION Not '
                              'Provided 2019-01-11T00:00:00Z SFH 2010-11-12 '
                              'Product_Data_Set_PDS3 Not Provided Not Provided '
                              'Not Provided Not Provided PDS3 Not Provided Not '
                              'Provided Not Provided Not Provided '
                              'Product_Data_Set_PDS3 '
                              'https://archives.esac.esa.int/psa/pdap/metadata?DATA_SET_ID=RO-X-OSINAC-2-CR4B-CRUISE4B-V1.4&RETURN_TYPE=HTML '
                              'Not Provided '
                              'urn:esa:psa:context_pds3:resource:resource.ro-x-osinac-2-cr4b-cruise4b-v1.',
             

# Filter by Facet

Facets are presented in Sinequa as branches of a tree that can be defined when data sources are configured for indexing.  One or more branches or sub-branches of the tree can be specified as "tree paths".  The search results are filtered to only the documents under those tree paths.

The key extra payload element under query to match tree paths of `/path1`, `/path2`, or `/path3` looks like this:

    'select': [{'expression': 'treepath: (`/path1/*`:`/path2/*`:`/path3/*`)',
                'facet': 'Treepath'}],
The backticks are required.  In the example below, there is a single tree path (facet), `/Planetary Science`.

In [11]:
treepath = '/Planetary Science' # <-- Enter your facet treepath with a leading slash

payload_filter_by_facet = {
    "app": APP,
    "user": USER,
    "password": PASS,
    "query": {
        'name': QAPP,
        'select': [{'expression': 'treepath: (`{}/*`)'.format(treepath),
                    'facet': 'Treepath'}],
        'text': 'osiris'},
    }
resp_filter_by_facet = sinequa_query(
    query_url,
    payload_filter_by_facet
)
nrecords_filter_by_facet = search_resp_summary(
    resp_filter_by_facet
)

Response has 10 records
First record:
{'collection': ['/SMD_Planetary/PDS_API_Dataset_Only/'],
 'databasealias': 'NASA_PDS',
 'docformat': 'htm',
 'documentweight': 'default',
 'extracts': [{'highlighted': 'ROSETTA {b}OSIRIS{nb} SHAPE MODELS OF COMET '
                              '67P/C-G '
                              'urn:nasa:pds:context_pds3:data_set:data_set.ro-c-osinac-osiwac-5-67p-shape-v1.',
               'locations': '0,123',
               'originalLocations': '1170,459',
               'score': '49152'},
              {'highlighted': '0 {b}OSIRIS{nb} - WIDE ANGLE CAMERA ',
               'locations': '696,29',
               'originalLocations': '4288,133',
               'score': '49152'},
              {'highlighted': '{b}OSIRIS{nb} - NARROW ANGLE CAMERA Not '
                              'Provided INTERNATIONAL ROSETTA MISSION Not '
                              'Provided 2015-01-11T23:20:32Z 2015-01-08 '
                              'A.C.Raugh Creation Product_Data