In [1]:
import pandas as pd
import numpy as np
import json
from enum import Enum

In [2]:
class SearchBrowser:
    """The SearchBrowser is convenient interface allowing interrogation of the results of an RDP Search request.
       The goal of the interface is to help the user to determine the names of available Search properties
       and the values associated with them.  The interface provides methods to interrogate the details of
       a search to accelerate the ability to build proper search expressions.
       
       To start:
       
       1. Create a browser
          browser = SearchBrowser(rdp)
       2. Execute a search
          browser.execute(<criteria>)
       3. Interrogate the browser for details
          See below for interrogation methods.
       
       Object Properties:
       
       df
           Lists the entire result table containing all properties, metadata, and their values, based on the 1st hit
       
       navigator
           If a navigator was specified in the request, this property presents a summary of the results
           
       hits
           # of hits based on the last search execution
    """
    # execute
    def execute(self, query=None, filter='', view=None, order_by='', navigator=''):
        """Apply a search expression to generate a hit containing values and attributes to browse.
           Returns: tuple (# of hits, # of properties generated from the first hit)
        """
        if view == None: 
            view = self.rdp.SearchViews.SearchAll
            
        # Retrieve metadata and prepare column details
        self.meta = rdp.get_search_metadata(view = view)
        
        # Search and process debug output
        debug = self.__search_and_extract(query, filter, view, order_by, navigator)

        if self.meta.index.nlevels > 1:
            self.meta.index.set_names(['Property', 'Nested'], inplace=True)
            self.meta.reset_index(inplace=True)
            self.meta.loc[(self.meta.Property == self.meta.Nested), 'Nested'] = ''
            self.df = debug.join(self.meta.set_index(['Property', 'Nested']), on=['Property', 'Nested'])
        else:
            debug.drop(columns=['Nested'], inplace=True)
            self.meta.index.set_names(['Property'], inplace=True)
            self.meta.reset_index(inplace=True)            
            self.df = debug.join(self.meta.set_index(['Property']), on='Property')
        
        self.df.replace({np.nan: ''}, inplace=True)
        return (self.hits, len(self.df))
    

    # values
    def values(self, text):
        """Browse the values that match the text expression.
           Eg: browser.values('united kingdom') - returns all values containing the expression 'united kingdom'        
        """
        return self.df.loc[self.df.Value.str.contains(text, na=False, case=False)]

    # properties
    def properties(self, text):
        """Browse the properties that match the text expression.
           Eg: browser.properties('ISIN')
        """    
        return self.df.loc[self.df.Property.str.contains(text, na=False, case=False)]

    # nested
    def nested(self, text):
        """Browse the nested properties that match the text expression.
           Eg: browser.nested('')
        """
        if 'Nested' in self.df:
            return self.df.loc[self.df.Nested.str.contains(text, na=False, case=False)]

    # navigable
    def navigable(self, prop=None, value=None):
        """Browse the metadata that matches all properties that are navigable.
           Apply additional criteria that matches properties or values.
           Eg: browser.navigable()              - returns all navigable properties
               browser.navigable('Description') - returns all navigable properties containing 'Description'
               browser.navigable(value='euro')  - returns all navigable properties with a value containing 'euro'
               browser.navigable('RCS', 'euro') - returns all RCS-based navigable properties with a value containing 'euro'
        """
        return self.__interrogate(self.df[self.df['Navigable'] == True], prop, value)
       
    # exact
    def exact(self, prop=None, value=None):
        """Browse the metadata that matches all properties that provide an exact match filter expression.
           Apply an additional critera that matches properties.
           Eg: browser.exact()                - returns all exact properties
               browser.exact('Ticker')        - returns all exact properties containing 'Ticker'
               browser.exact('value=IBM')     - returns all exact properties with a value containing 'IBM'
               browser.exact('Ticker', 'IBM') - returns all exact Ticker-based exact properties with a value containing 'IBM'
        """
        return self.__interrogate(self.df[self.df['Exact'] == True], prop, value)
    
    # type
    def type(self, property_type):
        """Browse the types that match the specified property type.
           Eg: browser.type(SearchBrowser.PropertyType.Double) - returns all properties that have a double type        
        """
        if not isinstance(property_type, SearchBrowser.PropertyType):
            raise NameError(f'**Invalid property type specified.  Type must be: {SearchBrowser.PropertyType}\n\tEg: SearchBrowser.type(SearchBrowser.PropertyType.Double)')
        return self.df.loc[self.df.Type.str.contains(property_type.name, na=False, case=False)]    
    
    class PropertyType(Enum):
        Double = 1,
        String = 2,
        Date = 3,
        Boolean = 4,
        Integer = 5
    
    # __search_and_extract
    # Extracts the output from a _debugall request and organizes the results within a datafrmae
    def __search_and_extract(self, query, filter, view, order_by, navigator):
        data = []
               
        # Search
        response = self.rdp.Search.search(
            view = view,
            query = query,
            filter = filter,
            top = 1,
            select = "_debugall",
            order_by = order_by,
            navigators = navigator
        )
    
        if (response.status['http_status_code'] == 200):
            self.hits = response.data.total
            
            # If available, process Navigator output
            self.__extract_navigator(response)
            
            if (response.data.total > 0):
                for prop, val in response.data.raw['Hits'][0]['raw_source'].items():
                    if (isinstance(val,list) and len(val) > 0 and isinstance(val[0], dict)):
                        for node in val:
                            for nested_prop, nested_val in node.items():
                                data.append([prop, nested_prop, nested_val])
                    else:
                        data.append([prop, '', val])
            return pd.DataFrame(data, columns=['Property', 'Nested', 'Value'])
            
        else:
            print(f'\nFailed to execute search:\n{json.dumps(response.status, indent=2)}\n')
            raise NameError('Failed to execute search')         
    
    # __extract_navigators
    # If present, extracts the navigator details
    def __extract_navigator(self, response):
        data = []
        if "Navigators" in response.data.raw:
            header = []
            navigator = list(response.data.raw['Navigators'].items())[0]
            
            header.append(navigator[0])
            header.append("Count")
            
            for key, bucket in navigator[1].items():
                for val in bucket:
                    row = []
                    row.append(val['Label'] if 'Label' in val else 'NA')
                    row.append(val['Count'] if 'Count' in val else 0)
                    if ('Filter' in val):
                        row.append(val['Filter'])
                        if ('Filter' not in header):
                            header.append("Filter")
                    
                    data.append(row)
                    
            self.navigator = pd.DataFrame(data, columns=header)
            
    # __interrogate
    # Interrogate the dataframe for properties and values
    def __interrogate(self, df, prop, value):
        if prop != None and value != None:
            prop = df.loc[df.Property.str.contains(prop, na=False, case=False)]
            return prop.loc[prop.Value.str.contains(value, na=False, case=False)]
        if prop != None:
            return df.loc[df.Property.str.contains(prop, na=False, case=False)]
        elif value != None:
            return df.loc[df.Value.str.contains(value, na=False, case=False)]
        else:
            return df        
        
    # Instantiate a SearchBrowser object, specifying the rdp session
    def __init__(self, rdp):
        self.rdp = rdp        
        self.df = {}
        self.meta = {}
        self.navigator = {}
        self.hits = 0