## Notebook for uploading dataset to ElasticSearch

###  Elasticsearch manager

In [None]:
# Import and variables
import collections
import os
from typing import Union, List, Dict
import pandas as pd

from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk, scan, streaming_bulk

# Map common python types to ES Types
TYPE_MAP = {
    "int": "integer",
    "float": "float",
    "double": "double",
    "str": "text",
    "bool": "boolean",
    "datetime": "date",
    "list[int]": "integer",
    "list[str]": "text",
    "list[float]": "float",
    "list[double]": "double",
    "torch.tensor": "dense_vector",
    "numpy.ndarray": "dense_vector",
    "keyword": "keyword"
}

MAX_BULK_SIZE = 100

os.environ['ELASTIC_USERNAME'] = 'elastic'
os.environ['ELASTIC_PASSWORD'] = '' # To be filled
os.environ['ELASTIC_PORT'] = '9200'
os.environ['ELASTIC_HOST'] = 'localhost'

In [3]:
class ESManager():
    """
    Class to manage ElaticSearch
    """
    def __init__(self):
        self.url = f"http://{os.getenv('ELASTIC_HOST')}:{os.getenv('ELASTIC_PORT')}"
        self.username =  os.getenv('ELASTIC_USERNAME')
        self.password = os.getenv('ELASTIC_PASSWORD')

        self.client = Elasticsearch(self.url,
                                    verify_certs=False,
                                    basic_auth=(self.username, self.password), request_timeout=30, max_retries=10, retry_on_timeout=True)

        print(self.client.info())

        self.consolidated_actions = []

    def _check_data_type(self, var, var_type):
        try:
            assert type(var) == var_type
        except:
            return False
        return True

    def _check_valid_values(self, map_dict: dict) -> int:
        """
        Traverse mapping dictionary to ensure that all types are valid types within TYPE_MAP

        Args:
            map_dict (dict): Mapping to be checked

        Returns:
            int: 0 if there is invalid types, 1 otherwise

        """
        ret_val = 1
        for k, v in map_dict.items():
            if isinstance(v, dict):
                ret_val = self._check_valid_values(v)
            else:
                if not v in TYPE_MAP:
                    print(f"'{v}' type for '{k}' NOT FOUND")
                    return 0

        return ret_val * 1

    def _traverse_map(self, map_dict: Dict) -> Dict:
        """
        Traverse mapping dictionary to convert data type into framework specific type

        Args:
            map_dict (dict): Mapping to be used to create ES index

        Returns:
            dict: updated mapping dictionary

        """
        dictionary = {"properties": dict()}
        for k, v in map_dict.items():
            if isinstance(v, dict):
                dictionary['properties'][k] = self._traverse_map(v)
            else:
                dictionary['properties'][k] = {"type": TYPE_MAP[v]}
        return dictionary

    def _flush(self):
        errors = []
        list_of_es_ids = []
        for ok, item in streaming_bulk(self.client, self.consolidated_actions):
            if not ok:
                errors.append(item)
            else:
                list_of_es_ids.append(item['index']['_id'])
        if len(errors) != 0:
            print("List of faulty documents:", errors)
        self.consolidated_actions = []  # Reset List
        return list_of_es_ids

    def _flatten(self, d, parent_key='', sep='.'):
        """
        Flatten nested dictionary keys to dotted parameters because Elasticsearch. 
        """
        items = []
        for k, v in d.items():
            new_key = parent_key + sep + k if parent_key else k
            if isinstance(v, collections.MutableMapping):
                items.extend(self._flatten(v, new_key, sep=sep).items())
            else:
                items.append((new_key, v))
        return dict(items)

    def create_collection(self, collection_name: str, schema: Dict, custom_schema: bool = False) -> Dict:
        """
        Create the index on ElasticSearch

        Args:
            collection_name (str): Index name of ES
            schema (dict): Mapping to be used to create ES index
            custom_schema (bool): If set to True, user may input schema that in accordance to ElasticSearch Mapping's format. The schema will not be parsed. 

        Returns:
            dict: response of error, or 200 if no errors caught

        """
        if not self._check_data_type(schema, dict):
            return {"response": "Type of 'schema' is not dict"}
        if not self._check_data_type(collection_name, str):
            return {"response": "Type of 'collection_name' is not str"}
        if not self._check_data_type(custom_schema, bool):
            return {"response": "Type of 'custom_schema' is not bool"}
        if custom_schema:
            try:
                self.client.indices.create(
                    index=collection_name, mappings=schema)
            except Exception as e:
                return {"response": f"{e}"}
            return {"response": "200"}
        else:
            mapping_validity = self._check_valid_values(schema)
            if not mapping_validity:
                return {"response": "KeyError: data type not found in TYPE_MAP"}
            updated_mapping = self._traverse_map(schema)
            try:
                self.client.indices.create(
                    index=collection_name, mappings=updated_mapping)
            except Exception as e:
                return {"response": f"{e}"}
            return {"response": "200"}

    def delete_collection(self, collection_name: str) -> dict:
        """
        Create the index on ElasticSearch

        Args:
            collection_name (str): Index name of ES
            schema (dict): Mapping to be used to create ES index

        Returns:
            dict: response of error, or 200 if no errors caught

        """
        try:
            self.client.indices.delete(index=collection_name)
        except Exception as e:
            return {"response": f"{e}"}
        return {"response": "200"}

    def create_document(self, collection_name: str, documents: Union[list, dict], id_field: str = None) -> dict:
        """
        Upload document(s) in the specified index within ElasticSearch

        Args:
            collection_name (str): Index name of ES
            documents (dict, list): A dict of document objects to be ingested. A list of dict is accepted as well. 
            id_field (str, Optional): Specify the key amongst the document object to be the id field. If not specified, id will be generated by ES. 

        Returns:
            dict: response of error along with the faulty document, or code 200 along with the ids of ingested document if no errors caught

        """
        if not self._check_data_type(documents, list):
            if not self._check_data_type(documents, dict):
                return {"response": "Type of 'documents' is not dict or a list"}
        if not self._check_data_type(collection_name, str):
            return {"response": "Type of 'collection_name' is not str"}
        if not id_field is None:
            if not self._check_data_type(id_field, str):
                return {"response": "Type of 'id_field' is not str"}

        # If single document, wrap it in a list so it can be an iterable as it would be when a list of document is submitted
        if type(documents) == dict:
            documents = [documents]

        # If id_field is specified, verify that all documents possess the id_field.
        if id_field != None:
            for doc in documents:
                if not id_field in doc.keys():
                    print(
                        "Fix document, or set 'id_field' to None. No documents uploaded.")
                    return {"response": "Fix document, or set 'id_field' to None. No documents uploaded.",
                            "error_doc": doc}
                try:
                    doc[id_field] = str(doc[id_field])
                except Exception as e:
                    return {"response": "id cannot be casted to String type. No documents uploaded.",
                            "error_doc": doc}
        all_id = []
        for doc in documents:
            doc_copy = dict(doc)
            action_dict = {}
            action_dict['_op_type'] = 'index'
            action_dict['_index'] = collection_name
            if id_field != None:
                action_dict['_id'] = doc_copy[id_field]
                doc_copy.pop(id_field)
            action_dict['_source'] = doc_copy
            self.consolidated_actions.append(action_dict)
            if len(self.consolidated_actions) == MAX_BULK_SIZE:
                all_id = all_id+self._flush()

        all_id = all_id+self._flush()

        return {"response": "200", "ids": all_id}

    def delete_document(self, collection_name: str, doc_id: str) -> dict:
        """
        Delete document from index based on the specified document id. 

        Args:
            collection_name (str): Index name of ES
            doc_id (str): id of doc to be deleted

        Returns:
            dict: response of error along with the faulty document, or code 200 along with elastic API response

        """
        if not self._check_data_type(collection_name, str):
            return {"response": "Type of 'collection_name' is not str"}
        if not self._check_data_type(doc_id, str):
            return {"response": "Type of 'doc_id' is not str"}

        # Check for document's existence
        search_result = self.client.search(index=collection_name, query={
                                           "match": {"_id": doc_id}})
        result_count = search_result['hits']['total']['value']

        if result_count == 0:
            return {"response": f"Document '{doc_id}' not found!"}

        try:
            resp = self.client.delete(index=collection_name, id=doc_id)
        except Exception as e:
            return {"response": f"{e.__class__.__name__}. Document Deletion failed"}

        return {"response": "200", "api_resp": resp}

    def update_document(self, collection_name: str, doc_id: str, document: dict) -> dict:
        """
        Delete document from index based on the specified document id. 

        Args:
            collection_name (str): Index name of ES
            doc_id (str): id of doc to be updated
            document (dict): key and values of fields to be updated.

        Returns:
            dict: response of error along with the faulty document, or code 200 along with elastic API response

        """
        if not self._check_data_type(collection_name, str):
            return {"response": "Type of 'collection_name' is not str"}
        if not self._check_data_type(doc_id, str):
            return {"response": "Type of 'doc_id' is not str"}
        if not self._check_data_type(document, dict):
            return {"response": "Type of 'document' is not dict"}

        # Check for document's existence
        search_result = self.client.search(index=collection_name, query={
                                           "match": {"_id": doc_id}})
        result_count = search_result['hits']['total']['value']

        if result_count == 0:
            return {"response": f"Document '{doc_id}' not found, create document first"}

        try:
            for key in document.keys():

                q = {
                    "script": {
                        "source": f"ctx._source.{key}=params.infer",
                        "params": {
                            "infer": document[key]
                        },
                        "lang": "painless"
                    },
                    "query": {
                        "match": {
                            "_id": doc_id
                        }
                    }
                }
                resp = self.client.update_by_query(
                    body=q, index=collection_name)
        except Exception as e:
            return {"response": f"{e.__class__.__name__}. Document Update failed"}

        return {"response": "200", "api_resp": resp}

    def read_document(self, collection_name: str, doc_id: str) -> dict:
        """
        Read document from index based on the specified document id. 

        Args:
            collection_name (str): Index name of ES
            doc_id (str): id of doc to be read

        Returns:
            dict: response of error along with the faulty document, or code 200 along with the retrieved document

        """
        if not self._check_data_type(collection_name, str):
            return {"response": "Type of 'collection_name' is not str"}
        if not self._check_data_type(doc_id, str):
            return {"response": "Type of 'doc_id' is not str"}

        # Check for document's existence
        search_result = self.client.search(index=collection_name, query={
                                           "match": {"_id": doc_id}})
        result_count = search_result['hits']['total']['value']

        if result_count == 0:
            return {"response": f"Document '{doc_id}' not found!"}

        doc_body = search_result['hits']['hits']

        return {"response": "200", "api_resp": doc_body}

    def query_collection(self, collection_name: str, field_value_dict: dict) -> dict:
        """
        Read document from index based on the specific key-value dictionary query. 

        Args:
            collection_name (str): Index name of ES
            field_value_dict (dict): A dictionary with the field to be queried as the key, and the value to be queried as the value of the dictionary. 
                                    example: {"field1":"query1", "field2", "query2"}

        Returns:
            dict: response of error along with the faulty document, or code 200 along with the list of retrieved document

        """
        if not self._check_data_type(collection_name, str):
            return {"response": "Type of 'collection_name' is not str"}
        if not self._check_data_type(field_value_dict, dict):
            return {"response": "Type of 'field_value_dict' is not dict"}

        # Check for document's existence
        reorg_dict = {"bool":{
            "should":[]
            }
        }
        for field in field_value_dict:
            reorg_dict['bool']['should'].append({"match":{field:field_value_dict[field]}})

        search_result = self.client.search(index=collection_name, query=reorg_dict)
        result_count = search_result['hits']['total']['value']

        if result_count == 0:
            return {"response": f"No documents found."}

        docs = search_result['hits']['hits']

        return {"response": "200", "api_resp": docs}

    def custom_query(self, collection_name: str, query: dict, size:int=10) -> dict:
        """
        Read document from index based on custom ES query syntax. 

        Args:
            collection_name (str): Index name of ES
            query (dict): Custom query for ES users who are familiar with the query format
            size (int): Number of results to return per query

        Returns:
            dict: response of error along with the faulty document, or code 200 along with the list of retrieved document

        """
        if not self._check_data_type(collection_name, str):
            return {"response": "Type of 'collection_name' is not str"}
        if not self._check_data_type(query, dict):
            return {"response": "Type of 'field_value_dict' is not dict"}

        # Check for document's existence
        search_result = self.client.search(index=collection_name, query=query, size=size)
        result_count = search_result['hits']['total']['value']

        if result_count == 0:
            return {"response": f"No documents found."}

        docs = search_result['hits']['hits']

        return {"response": "200", "api_resp": docs}

    def get_all_documents(self, collection_name: str) -> dict:
        """
        Generator method to retrieve all documents within the index

        Args:
            collection_name (str): Index name of ES

        Returns:
            Generator Object: Iterable object containing all documents within index specified. 
        """
        if not self._check_data_type(collection_name, str):
            return {"response": "Type of 'collection_name' is not str"}
        docs_response = scan(self.client, index=collection_name, query={
                             "query": {"match_all": {}}})
        for item in docs_response:
            yield item

In [4]:
esManager = ESManager()

{'name': 'f9de9f21a6c3', 'cluster_name': 'docker-cluster', 'cluster_uuid': 'lQ9maNGqThyqboUYSwPNJQ', 'version': {'number': '8.10.1', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': 'a94744f97522b2b7ee8b5dc13be7ee11082b8d6b', 'build_date': '2023-09-14T20:16:27.027355296Z', 'build_snapshot': False, 'lucene_version': '9.7.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'}


### Preparing Data for ES

In [19]:
dataset = pd.read_csv('../src/flora_data/dataset.csv', sep=',', header=0)
# Update 'None' to actual None
dataset = dataset.where(pd.notnull(dataset), None)
dataset.head()


Unnamed: 0,Scientific Name,Common Name,Species ID,Link,Plant Type,Light Preference,Water Preference,Drought Tolerant?,Native to SG?,Fruit Bearing?,...,Attracted Animals,Native habitat,Mature Leaf Colour,Young Flush Leaf Colour,Leaf Area Index,Growth Rate,Trunk Texture,Trunk Colour,Leaf Texture,Canopy Radius
0,Aglaonema simplex (Blume) Blume,Malayan Sword,3740,https://www.nparks.gov.sg/florafaunaweb/flora/...,"Herbaceous Plant, Shrub","Full Shade, Semi Shade","Lots of Water, Moderate Water",False,True,False,...,-,"Terrestrial (Primary Rainforest, Secondary Rai...",Green,-,3.5 (Shrub & Groundcover - Monocot),Slow,,,Coarse,
1,Carludovica palmata,Panama Hat Palm,1783,https://www.nparks.gov.sg/florafaunaweb/flora/...,Shrub,"Full Sun, Semi Shade",Moderate Water,False,False,True,...,-,Terrestrial,Green,-,3.5 (Shrub & Groundcover - Monocot),Fast to Moderate,,,Coarse,
2,Acalypha wilkesiana 'Mosaica',Fire Fiji Plant,1593,https://www.nparks.gov.sg/florafaunaweb/flora/...,Shrub,"Full Sun, Semi Shade","Lots of Water, Moderate Water",False,False,False,...,-,Terrestrial,"Green, Purple, Red","Orange, Red, Green - Light Green",4.5 (Shrub & Groundcover - Dicot),Moderate,,,Coarse,
3,Ixora 'Dwarf Yellow',-,2132,https://www.nparks.gov.sg/florafaunaweb/flora/...,Shrub,Full Sun,Moderate Water,False,False,True,...,"Bird-Attracting, Butterfly Host Plant",Terrestrial,"Green, Yellow / Golden",-,4.5 (Shrub & Groundcover - Dicot),Moderate,,,Medium,
4,Horsfieldia irya (Gaertn.) Warb.,Pianggu,2964,https://www.nparks.gov.sg/florafaunaweb/flora/...,Tree,Full Sun,"Lots of Water, Moderate Water",False,True,True,...,Bird-Attracting (Fruits),"Terrestrial (Primary Rainforest, Coastal Fores...",Green,-,3.0 (Tree - Intermediate Canopy),Moderate,"Fissured, Cracked",red,,15.0


In [5]:
dataset_schema = {
    "mappings": {
        "properties": {
            "Scientific Name" : {"type": "str"},
            "Common Name": {"type": "str"},
            "Species ID": {"type": "int"},
            "Link": {"type": "str"},
            "Plant Type": {"type": "keyword"},
            "Light Preference": {"type": "keyword"},
            "Water Preference": {"type": "keyword"}, 
            "Drought Tolerant": {"type": "bool"},
            "Native to SG": {"type": "bool"},
            "Fruit Bearing": {"type": "bool"},
            "Fragrant Plant": {"type": "bool"},
            "Maximum Height (m)": {"type": "float"},
            "Flower Colour": {"type": "str"},
            "Hazard": {"type": "str"},
            "Attracted Animals": {"type": "str"},
            "Native habitat": {"type": "str"},
            "Mature Leaf Colour": {"type": "str"},
            "Young Flush Leaf Colour": {"type": "str"},
            "Leaf Area Index": {"type": "str"},
            "Growth Rate": {"type": "keyword"},
            "Trunk Colour": {"type": "str"},
            "Trunk Texture": {"type": "str"},
            "Leaf Texture": {"type": "keyword"},            
        }
    }
}

include_canopy_radius = True

if include_canopy_radius:
    dataset_schema["mappings"]["properties"]["Canopy Radius"] = {"type": "int"}


esManager.delete_collection('flora')
esManager.create_collection('flora', dataset_schema)

{'response': '200'}

In [6]:
def ingest_dataset(csv_filepath:str, esManager:ESManager, collection_name:str, include_canopy_radius:bool=False):
    """
    Function to ingest flora csv dataset file into elasticsearch

    Args:
        csv_filepath (str): filepath to csv file
        esManager (ESManager): ESManager instance
        collection_name (str): Collection Name to ingest data
        include_canopy_radius (bool, Optional): Dataset include canopy_radius? Defaults to False
    """

    dataset = pd.read_csv(csv_filepath, sep=',', header=0)
    # Update empty data and 'None' to N/A
    dataset = dataset.where(pd.notnull(dataset), 'N/A')
    # Update any edited boolean to true booleans
    dataset = dataset.applymap(lambda x: True if str(x).strip().upper() == 'TRUE' 
                           else False if str(x).strip().upper() == 'FALSE' 
                           else x)


    NA_columns = ['Common Name', 'Hazard', 'Attracted animals', 'Young Flush Leaf Colour']
    false_column = ['Fragrant Plant?'] 
    int_column = ["Species ID"]
    float_column = ["Maximum Height"]
    keyword_column = ["Plant Type", "Light Preference", "Water Preference", "Growth Rate", 'Leaf Texture']

    if include_canopy_radius:
        int_column.append("Canopy Radius")

    all_documents = []
    headers = dataset.columns

    for index, row in dataset.iterrows():
        document_data = {}

        for attribute in headers:
            data = row[attribute]
            
            if attribute in NA_columns and data == '-':
                data = "N/A"
            
            # False for '-' but if N/A (for trees, use null and remove frm DB)
            if attribute in false_column and data == '-':
                data = False

            if attribute in false_column and data == 'N/A':
                data = None
            
            if attribute in int_column and data != "N/A":
                data = int(data)
            
            if attribute in float_column and data != "N/A":
                data = float(data)
                attribute = "Maximum Height (m)"

            if attribute in keyword_column:
                data = [attr.strip() for attr in data.split(",")]
            
            if '?' in attribute:
                attribute = attribute[:-1]
            
            document_data[attribute] = data
        
       
        all_documents.append(document_data)

    print(f"Ingesting {len(all_documents)} documents")
    return esManager.create_document(collection_name, all_documents)


ingest_dataset('../src/flora_data/dataset_with_canopy_radius.csv', esManager, 'flora', include_canopy_radius)

  dataset = dataset.applymap(lambda x: True if str(x).strip().upper() == 'TRUE'


Ingesting 30 documents


{'response': '200',
 'ids': ['B8ohd5MBxotSUxqwpQx8',
  'CMohd5MBxotSUxqwpQx8',
  'Ccohd5MBxotSUxqwpQx8',
  'Csohd5MBxotSUxqwpQx8',
  'C8ohd5MBxotSUxqwpQx8',
  'DMohd5MBxotSUxqwpQx8',
  'Dcohd5MBxotSUxqwpQx8',
  'Dsohd5MBxotSUxqwpQx8',
  'D8ohd5MBxotSUxqwpQx8',
  'EMohd5MBxotSUxqwpQx8',
  'Ecohd5MBxotSUxqwpQx8',
  'Esohd5MBxotSUxqwpQx8',
  'E8ohd5MBxotSUxqwpQx8',
  'FMohd5MBxotSUxqwpQx8',
  'Fcohd5MBxotSUxqwpQx8',
  'Fsohd5MBxotSUxqwpQx8',
  'F8ohd5MBxotSUxqwpQx8',
  'GMohd5MBxotSUxqwpQx8',
  'Gcohd5MBxotSUxqwpQx8',
  'Gsohd5MBxotSUxqwpQx8',
  'G8ohd5MBxotSUxqwpQx8',
  'HMohd5MBxotSUxqwpQx8',
  'Hcohd5MBxotSUxqwpQx8',
  'Hsohd5MBxotSUxqwpQx8',
  'H8ohd5MBxotSUxqwpQx8',
  'IMohd5MBxotSUxqwpQx8',
  'Icohd5MBxotSUxqwpQx8',
  'Isohd5MBxotSUxqwpQx8',
  'I8ohd5MBxotSUxqwpQx8',
  'JMohd5MBxotSUxqwpQx8']}

In [None]:
all_data = esManager.get_all_documents('flora')
for i in all_data:
    print(i)

### Query Testing

In [25]:
result = esManager.query_collection('flora', {
    "Plant Type.keyword": "Tree",
    "Water Preference.keyword": "Moderate",
    "Trunk Colour.keyword": "red",
    "Attracted Animals.keyword": "butterfly"
})

print(len(result['api_resp']))
for data in result['api_resp']:
    print(data['_source'])
    print(data['_score'])

10
{'Scientific Name': 'Horsfieldia irya (Gaertn.) Warb.', 'Common Name': 'Pianggu', 'Species ID': 2964, 'Link': 'https://www.nparks.gov.sg/florafaunaweb/flora/2/9/2964', 'Plant Type': ['Tree'], 'Light Preference': ['Full Sun'], 'Water Preference': ['Lots of Water', 'Moderate Water'], 'Drought Tolerant': False, 'Native to SG': True, 'Fruit Bearing': True, 'Fragrant Plant': True, 'Maximum Height (m)': 30.0, 'Flower Colour': 'Orange, Yellow / Golden', 'Hazard': 'N/A', 'Attracted Animals': 'Bird-Attracting (Fruits)', 'Native habitat': 'Terrestrial (Primary Rainforest, Coastal Forest, Riverine, Secondary Rainforest, Freshwater Swamp Forest), Shoreline (Mangrove Forest)', 'Mature Leaf Colour': 'Green', 'Young Flush Leaf Colour': 'N/A', 'Leaf Area Index': '3.0 (Tree - Intermediate Canopy)', 'Growth Rate': ['Moderate'], 'Trunk Texture': 'Fissured, Cracked', 'Trunk Colour': 'red', 'Leaf Texture': ['N/A'], 'Canopy Radius': 15}
3.8889275
{'Scientific Name': 'Sonneratia caseolaris (L.) Engl.', 'C

In [12]:
query = {
    "bool": {
        "must": [
            {"term": {"Plant Type.keyword": "Tree"}},
            {"term": {"Plant Type.keyword": "Shrub"}},
            {"term": {"Water Preference.keyword": "Moderate Water"}},
        ],
        "should": [
            {"match": {"Trunk Colour": {"query": "red"}}}
        ]
    }
}

result = esManager.custom_query('flora', query)
print(len(result['api_resp']))
for data in result['api_resp']:
    print(data['_source'])
    print(data['_score'])

2
{'Scientific Name': 'Cratoxylum cochinchinense (Lour.) Blume', 'Common Name': 'Derum Selunchor', 'Species ID': 2829, 'Link': 'https://www.nparks.gov.sg/florafaunaweb/flora/2/8/2829', 'Plant Type': ['Shrub', 'Tree'], 'Light Preference': ['Full Sun', 'Semi Shade'], 'Water Preference': ['Moderate Water'], 'Drought Tolerant': False, 'Native to SG': True, 'Fruit Bearing': False, 'Fragrant Plant': True, 'Maximum Height (m)': 30.0, 'Flower Colour': 'Red, Orange, Pink', 'Hazard': 'N/A', 'Attracted Animals': 'Butterfly Host Plant (Leaves, Associated with: Eurema  hecabe contubernalis (Moore, 1886), Lexias pardalis), Bee-Attracting', 'Native habitat': 'Terrestrial (Secondary Rainforest, Primary Rainforest, Freshwater Swamp Forest)', 'Mature Leaf Colour': 'Green', 'Young Flush Leaf Colour': 'Red', 'Leaf Area Index': '3.0 (Tree - Intermediate Canopy)', 'Growth Rate': ['Moderate'], 'Trunk Texture': 'Peeling / Flaking / Papery, Smooth', 'Trunk Colour': 'Reddish-brown', 'Leaf Texture': ['Medium']}


In [13]:
query = {
    "bool": {
        "must": [
            {"terms": {"Plant Type.keyword": ["Tree", "Shrub"]}},
            {"term": {"Water Preference.keyword": "Moderate Water"}},
        ],
        "should": [
            {"match": {"Trunk Colour": {"query": "red"}}}
        ]
    }
}

result = esManager.custom_query('flora', query)
print(len(result['api_resp']))
for data in result['api_resp']:
    print(data['_source'])
    print(data['_score'])

10
{'Scientific Name': 'Horsfieldia irya (Gaertn.) Warb.', 'Common Name': 'Pianggu', 'Species ID': 2964, 'Link': 'https://www.nparks.gov.sg/florafaunaweb/flora/2/9/2964', 'Plant Type': ['Tree'], 'Light Preference': ['Full Sun'], 'Water Preference': ['Lots of Water', 'Moderate Water'], 'Drought Tolerant': False, 'Native to SG': True, 'Fruit Bearing': True, 'Fragrant Plant': True, 'Maximum Height (m)': 30.0, 'Flower Colour': 'Orange, Yellow / Golden', 'Hazard': 'N/A', 'Attracted Animals': 'Bird-Attracting (Fruits)', 'Native habitat': 'Terrestrial (Primary Rainforest, Coastal Forest, Riverine, Secondary Rainforest, Freshwater Swamp Forest), Shoreline (Mangrove Forest)', 'Mature Leaf Colour': 'Green', 'Young Flush Leaf Colour': 'N/A', 'Leaf Area Index': '3.0 (Tree - Intermediate Canopy)', 'Growth Rate': ['Moderate'], 'Trunk Texture': 'Fissured, Cracked', 'Trunk Colour': 'red', 'Leaf Texture': ['N/A']}
5.0261064
{'Scientific Name': 'Aglaonema simplex (Blume) Blume', 'Common Name': 'Malayan 

In [14]:
query = {
    "bool": {
        "must": [
            {"match": {"Attracted Animals": "butterfly caterpillar"}}
        ],
        "must_not": [
        ]
    }
}


result = esManager.custom_query('flora', query, 30)
print(result)
print(len(result['api_resp']))
for data in result['api_resp']:
    print(data['_source'])
    print(data['_score'])

{'response': '200', 'api_resp': [{'_index': 'flora', '_id': 'VRsNJpMBHQbZJ6GuP0YS', '_score': 1.7489207, '_source': {'Scientific Name': 'Sonneratia caseolaris (L.) Engl.', 'Common Name': 'Crabapple Mangrove', 'Species ID': 3343, 'Link': 'https://www.nparks.gov.sg/florafaunaweb/flora/3/3/3343', 'Plant Type': ['Tree'], 'Light Preference': ['Full Sun'], 'Water Preference': ['Lots of Water'], 'Drought Tolerant': False, 'Native to SG': True, 'Fruit Bearing': True, 'Fragrant Plant': True, 'Maximum Height (m)': 20.0, 'Flower Colour': 'Pink, Red, White', 'Hazard': 'N/A', 'Attracted Animals': 'Bird-Attracting (Flowers), Caterpillar Moth Food Plant (Leaves)', 'Native habitat': 'Shoreline (Mangrove Forest)', 'Mature Leaf Colour': 'Green', 'Young Flush Leaf Colour': 'N/A', 'Leaf Area Index': '3.0 (Tree - Intermediate Canopy)', 'Growth Rate': ['Fast'], 'Trunk Texture': 'Cracked, Plated', 'Trunk Colour': 'Gray', 'Leaf Texture': ['N/A']}}, {'_index': 'flora', '_id': 'WRsNJpMBHQbZJ6GuP0YT', '_score': 

In [20]:
query = {
    "bool": {
        "must": [
            {"terms": {"Attracted Animals": ["butterfly", "caterpillar"]}}
        ],
        "must_not": [
        ],
        "should": [
            {"terms": {"Attracted Animals": ["bird"]}}
        ]
    }
}


result = esManager.custom_query('flora', query, 30)
print(result)
print(len(result['api_resp']))
for data in result['api_resp']:
    print(data['_source'])
    print(data['_score'])

{'response': '200', 'api_resp': [{'_index': 'flora', '_id': '2STtD5MB3A2meWprJesD', '_score': 2.0, '_source': {'Scientific Name': "Ixora 'Dwarf Yellow'", 'Common Name': 'N/A', 'Species ID': 2132, 'Link': 'https://www.nparks.gov.sg/florafaunaweb/flora/2/1/2132', 'Plant Type': ['Shrub'], 'Light Preference': ['Full Sun'], 'Water Preference': ['Moderate Water'], 'Drought Tolerant': False, 'Native to SG': False, 'Fruit Bearing': True, 'Fragrant Plant': True, 'Maximum Height (m)': 1.0, 'Flower Colour': 'Yellow / Golden', 'Hazard': 'N/A', 'Attracted Animals': 'Bird-Attracting, Butterfly Host Plant', 'Native habitat': 'Terrestrial', 'Mature Leaf Colour': 'Green, Yellow / Golden', 'Young Flush Leaf Colour': 'N/A', 'Leaf Area Index': '4.5 (Shrub & Groundcover - Dicot)', 'Growth Rate': ['Moderate'], 'Trunk Texture': 'N/A', 'Trunk Colour': 'N/A', 'Leaf Texture': ['Medium']}}, {'_index': 'flora', '_id': '2yTtD5MB3A2meWprJesD', '_score': 2.0, '_source': {'Scientific Name': 'Sonneratia caseolaris (L.