In [18]:
import os

In [19]:
patent_abstract = """
Multi-Modal Biometric Authentication System  
This patent introduces a groundbreaking Multi-Modal Biometric Authentication System (M2BAS) that integrates fingerprint, facial, and voice recognition for heightened security. Unlike conventional systems, M2BAS dynamically adjusts security levels based on contextual factors and user preferences, ensuring both robust protection and user convenience. Utilizing advanced machine learning algorithms, the system continuously analyzes environmental conditions and user behavior to determine the optimal authentication strength. Additionally, M2BAS incorporates cutting-edge encryption techniques for secure data transmission and storage. Its modular design facilitates seamless integration into existing infrastructures, making it a versatile and scalable solution across various applications. This innovation represents a significant leap forward in biometric security, promising to redefine authentication methods with its adaptive and user-centric approach."""


In [20]:
from openai import OpenAI
oai_client = OpenAI()

oai_client.embeddings.create(
        model="text-embedding-ada-002",
        input=patent_abstract
    )


CreateEmbeddingResponse(data=[Embedding(embedding=[-0.023760732263326645, 0.011235800571739674, -0.012097482569515705, -0.016500884667038918, -0.02312295138835907, 0.027288882061839104, -0.04046516492962837, -0.02990785427391529, -0.013162712566554546, -0.013128788210451603, 0.01130364928394556, 0.007008805405348539, -0.013576592318713665, 0.00033182417973876, 0.0026986952871084213, -0.020381849259138107, 0.023190800100564957, 0.02138601616024971, 0.0010779515141621232, -0.023041531443595886, 0.00617765448987484, 0.011378283612430096, -0.019228417426347733, -0.0028513555880635977, -0.016907978802919388, 0.015293172560632229, 0.012049988843500614, -0.03558003529906273, -0.020191872492432594, 0.0005470328615047038, 0.022118786349892616, -0.004535708110779524, -0.013454463332891464, -0.00549916410818696, -0.011982139199972153, 0.013522312976419926, -0.007646586280316114, -0.004013270605355501, 0.009451370686292648, -0.00630317535251379, 0.02184738963842392, -0.014858938753604889, 0.032431

In [21]:
import chromadb
from chromadb.utils.embedding_functions import OpenAIEmbeddingFunction

embedding_function = OpenAIEmbeddingFunction(api_key=os.environ.get('OPENAI_API_KEY'),
                                             model_name="text-embedding-ada-002")


chroma_client = chromadb.Client()
vector_store = chroma_client.get_or_create_collection(name="Patents",
                                                      embedding_function=embedding_function)

In [22]:
vector_store.add("patent_info", documents=patent_abstract)

Insert of existing embedding ID: patent_info
Add of existing embedding ID: patent_info


In [23]:
from trulens_eval import Tru
from trulens_eval.tru_custom_app import instrument
tru = Tru()
#tru.reset_database() #Reset the database. Clears all tables.


In [24]:
class RAG_from_scratch:
    @instrument
    def retrieve(self, query: str) -> list:
        """
        Retrieve relevant text from vector store.
        """
        results = vector_store.query(
        query_texts=query,
        n_results=2
    )
        return results['documents'][0]

    @instrument
    def generate_completion(self, query: str, context_str: list) -> str:
        """
        Generate answer from context.
        """
        completion = oai_client.chat.completions.create(
        model="gpt-4",
        temperature=0,
        messages=
        [
            {"role": "user",
            "content": 
            f"The following abstract descripes a concept for a novel invention: \n"
            f"---------------------\n"
            f"{context_str}"
            f"\n---------------------\n"
            f"Given this information, please execute the following instructions: {query}"
            }
        ]
        ).choices[0].message.content
        return completion

    @instrument
    def query(self, query: str) -> str:
        context_str = self.retrieve(query)
        completion = self.generate_completion(query, context_str)
        return completion

rag = RAG_from_scratch()

In [25]:
from trulens_eval import Feedback, Select
from trulens_eval.feedback import Groundedness
from trulens_eval.feedback.provider.openai import OpenAI as fOpenAI

import numpy as np

# Initialize provider class
fopenai = fOpenAI()

grounded = Groundedness(groundedness_provider=fopenai)

# Define a groundedness feedback function
f_groundedness = (
    Feedback(grounded.groundedness_measure_with_cot_reasons, name = "Groundedness")
    .on(Select.RecordCalls.retrieve.rets.collect())
    .on_output()
    .aggregate(grounded.grounded_statements_aggregator)
)

# Question/answer relevance between overall question and answer.
f_qa_relevance = (
    Feedback(fopenai.relevance_with_cot_reasons, name = "Answer Relevance")
    .on(Select.RecordCalls.retrieve.args.query)
    .on_output()
)

# Question/statement relevance between question and each context chunk.
f_context_relevance = (
    Feedback(fopenai.qs_relevance_with_cot_reasons, name = "Context Relevance")
    .on(Select.RecordCalls.retrieve.args.query)
    .on(Select.RecordCalls.retrieve.rets.collect())
    .aggregate(np.mean)
)

✅ In Groundedness, input source will be set to __record__.app.retrieve.rets.collect() .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Answer Relevance, input prompt will be set to __record__.app.retrieve.args.query .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input question will be set to __record__.app.retrieve.args.query .
✅ In Context Relevance, input statement will be set to __record__.app.retrieve.rets.collect() .


In [26]:
from trulens_eval import TruCustomApp
tru_rag = TruCustomApp(rag,
    app_id = 'RAG Key Words',
    feedbacks = [f_groundedness, f_qa_relevance, f_context_relevance])

Function <function RAG_from_scratch.generate_completion at 0x000002E449B84280> was not found during instrumentation walk. Make sure it is accessible by traversing app <__main__.RAG_from_scratch object at 0x000002E44B89E320> or provide a bound method for it as TruCustomApp constructor argument `methods_to_instrument`.
Function <function RAG_from_scratch.query at 0x000002E449B84310> was not found during instrumentation walk. Make sure it is accessible by traversing app <__main__.RAG_from_scratch object at 0x000002E44B89E320> or provide a bound method for it as TruCustomApp constructor argument `methods_to_instrument`.
Function <function RAG_from_scratch.retrieve at 0x000002E449B841F0> was not found during instrumentation walk. Make sure it is accessible by traversing app <__main__.RAG_from_scratch object at 0x000002E44B89E320> or provide a bound method for it as TruCustomApp constructor argument `methods_to_instrument`.


In [27]:
with tru_rag as recording:
    rag.query("Name 5 key words based on this abstract, that I can use for the search in a patent database. Optimize the key words to get back more results. Result as python string.")

Number of requested results 2 is greater than number of elements in index 1, updating n_results = 1


In [28]:
tru.get_leaderboard(app_ids=["RAG Key Words"])

Unnamed: 0_level_0,Context Relevance,Groundedness,Answer Relevance,latency,total_cost
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
RAG Key Words,1.0,1.0,1.0,3.5,0.0


In [29]:

tru_rag = TruCustomApp(rag,
    app_id = 'RAG Classifications',
    feedbacks = [f_groundedness, f_qa_relevance, f_context_relevance])

Function <function RAG_from_scratch.generate_completion at 0x000002E449B84280> was not found during instrumentation walk. Make sure it is accessible by traversing app <__main__.RAG_from_scratch object at 0x000002E44B89E320> or provide a bound method for it as TruCustomApp constructor argument `methods_to_instrument`.
Function <function RAG_from_scratch.query at 0x000002E449B84310> was not found during instrumentation walk. Make sure it is accessible by traversing app <__main__.RAG_from_scratch object at 0x000002E44B89E320> or provide a bound method for it as TruCustomApp constructor argument `methods_to_instrument`.
Function <function RAG_from_scratch.retrieve at 0x000002E449B841F0> was not found during instrumentation walk. Make sure it is accessible by traversing app <__main__.RAG_from_scratch object at 0x000002E44B89E320> or provide a bound method for it as TruCustomApp constructor argument `methods_to_instrument`.


In [30]:
with tru_rag as recording:
    rag.query("Name 5 CPC classifications based on this abstract, that I can use for the search in a patent database. \
Please give me a python string for the codes of the 5 most relevant \
CPC classifications to a possible patent.")

Number of requested results 2 is greater than number of elements in index 1, updating n_results = 1


In [31]:
tru.get_leaderboard(app_ids=["RAG Classifications"])

Unnamed: 0_level_0,Context Relevance,Groundedness,Answer Relevance,latency,total_cost
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
RAG Classifications,0.5,0.42,1.0,4.0,0.0


In [32]:

tru_rag = TruCustomApp(rag,
    app_id = 'RAG Patent Comparison',
    feedbacks = [f_groundedness, f_qa_relevance, f_context_relevance])

Function <function RAG_from_scratch.generate_completion at 0x000002E449B84280> was not found during instrumentation walk. Make sure it is accessible by traversing app <__main__.RAG_from_scratch object at 0x000002E44B89E320> or provide a bound method for it as TruCustomApp constructor argument `methods_to_instrument`.
Function <function RAG_from_scratch.query at 0x000002E449B84310> was not found during instrumentation walk. Make sure it is accessible by traversing app <__main__.RAG_from_scratch object at 0x000002E44B89E320> or provide a bound method for it as TruCustomApp constructor argument `methods_to_instrument`.
Function <function RAG_from_scratch.retrieve at 0x000002E449B841F0> was not found during instrumentation walk. Make sure it is accessible by traversing app <__main__.RAG_from_scratch object at 0x000002E44B89E320> or provide a bound method for it as TruCustomApp constructor argument `methods_to_instrument`.


In [33]:
with tru_rag as recording:
    rag.query(f"""
'The following texts are abstracts from patent specifications. Your task is to compare the novel invention from above to all the other abstracts from below. \n
It is important that you focus on comparing the concepts that the abstracts describe, not the way they are written. \n
Rank the remaining abstracts on how well they match with the Testing Abstract by giving them a rating from 0 to 10 points. \n        
0 meaning they have absolutely nothing in common and 10 meaning they basically describe the exact same idea.\n  
Your output should be a python dictionary with the title "comparison", each element hast the Abstract number as key and the rating as value.\n        
I want to convert your output string to an actual dictionary, so make sure the formatting is right.\n\n        
        
US8952781B2: "A biometrically authenticated access control in which a wireless authentication signal is provided from a primary instrumentality of access, only after a dual-stage biometric verification of the user\'s identity is performed. In one embodiment, an accessing device includes memory for storing a device identification code and an authentication code, along with first and second biometric templates corresponding to biometric samples from a user. In another embodiment, an accessing device includes memory for storing a device identification code and more than one authentication code, for separate users, along with first and second biometric templates corresponding to biometric samples from multiple users. In order to gain access to a secured resource, a user undergoes first and second biometric sampling to generate biometric data to be compared with the first and second biometric templates."\nUS9654468B2: "Systems and methods for secure remote biometric authentication are provided. A network-based biometric authentication platform stores biometric templates for individuals which have been securely enrolled with the authentication platform. A plurality of sensor platforms separately establishes secure communications with the biometric authentication platform. The sensor platform can perform a biometric scan of an individual and generate a biometric authentication template. The sensor platform then requests biometric authentication of the individual by the biometric authentication platform via the established secure communications. The biometric authentication platform compares the generated biometric template to one or more of the enrolled biometric templates stored in memory at the biometric authentication platform. The result of the authentication is then communicated to the requesting sensor platform via the established secure communications."\nUS8392965B2: "Techniques for multiple biometric smart card authentication are provided. At least two biometric readings are obtained from a requesting user. Both biometric readings are verified before access to resources of a smart card are made available to the requesting user."\nUS8141141B2: "This invention provides for progressive processing of biometric samples to facilitate verification of an authorized user. The initial processing is performed by a security token. Due to storage space and processing power limitations, excessive false rejections may occur. To overcome this shortfall, the biometric sample is routed to a stateless server, which has significantly greater processing power and data enhancement capabilities. The stateless server receives, processes and returns the biometric sample to the security token for another attempt at verification using the enhanced biometric sample. In a second embodiment of the invention, a second failure of the security token to verify the enhanced biometric sample sends either the enhanced or raw biometric sample to a stateful server. The stateful server again processes the biometric sample and performs a one to many search of a biometric database. The biometric database contains the master set of enrolled biometric templates associated with all authorized users. Signals generated by the stateful server are used by the security token to allow or deny access to a resource or function. In both embodiments of the invention, the heuristics remain with the security token."\nUS8694793B2: "Aspects and embodiments of the present disclosure provide devices and methods for biometric authentication of a user during access control transactions. In one aspect, an access control processor device, comprising a biometric input sensor configured to receive user biometric information; a biometric verification processor configured to authenticate the input user biometric information; and a communication element configured to activate when the biometric information entered into the biometric verification system is authenticated and maintain an inactive status for the communication element on the payment processor device when the biometric information entered into the biometric verification system is not authenticated."\nUS10698989B2: "Systems and methods verifying a user during authentication of an integrated device. In one embodiment, the system includes an integrated device and an authentication unit. The integrated device stores biometric data of a user and a plurality of codes and other data values comprising a device ID code uniquely identifying the integrated device and a secret decryption value in a tamper proof format, and when scan data is verified by comparing the scan data to the biometric data, wirelessly sends one or more codes and other data values including the device ID code. The authentication unit receives and sends the one or more codes and the other data values to an agent for authentication, and receives an access message from the agent indicating that the agent successfully authenticated the one or more codes and other data values and allows the user to access an application."\nUS11397800B2: "A removable card-enabled BPID Security Device integrates a removable card reader with a biometric authentication component to provide secured access to electronic systems. The device allows for an individual to insert a removable card into an aperture in the physical enclosure of the BPID Security Device, allowing the removable card and the BPID Security Device to electronically communicate with each other. The BPID Security Device is based on a custom application specific integrated circuit that incorporates removable card terminals, such that the BPID Security Device can communicate directly with an inserted removable card. In an alternative embodiment of the invention, the BPID Security Device is based on a commercial off-the-shelf microprocessor, and may communicate with a commercial off-the-shelf microprocessor removable card receiver using a serial, USB, or other type of communication protocol. The device allows for enrolling a user\'s credentials onto the BPID Security Device and for authenticating an individual using the BPID Security Device."\nCA2640915C: "An authentication server, of a user to be authenticated, using a portable object comprising at least one biometric sensor, the portable object being adapted to cooperate with a terminal, the method comprising a step of capturing, by a portable object, a biometric sample to be compared coming from the user to be authenticated. The portable object transmits to the authentication server the biometric sample, in a secure form, and the authentication server determines a signature to be authenticated using the biometric sample, and then compares it to a reference signature. As such, it is the authentication server that creates a signature to be authenticated, using a biometric sample transmitted by the portable object, in a secure manner, and which then carries out the comparison between the signature to be authenticated and a reference signature."\nUS11126635B2: "Systems, methods, and devices for a cyberphysical (IoT) software application development platform based upon a model driven architecture and derivative IoT SaaS applications are disclosed herein. The system may include concentrators to receive and forward time-series data from sensors or smart devices. The system may include message decoders to receive messages comprising the time-series data and storing the messages on message queues. The system may include a persistence component to store the time-series data in a key-value store and store the relational data in a relational database. The system may include a data services component to implement a type layer over data stores. The system may also include a processing component to access and process data in the data stores via the type layer, the processing component comprising a batch processing component and an iterative processing component."\nUS11231705B2: "Methods for data monitoring with changeable routing of input channels are disclosed. An example method includes a data collector communicatively coupled to a plurality of input channels and a data acquisition circuit to interpret the detection values, each corresponding to an input channel. Sensor data is acquired from a first route of input channels and stored together with specifications for the sensors that correspond to the input channels. The sensor data is evaluated with respect to an alarm threshold level and an alarm state set when the alarm threshold level is exceeded. A response circuit changes a routing of the input channels for data collection from a first routing to an alternate routing of input channels, wherein the alternate routing of input channels comprise the first input channel and a group of input channels related to the first input channel."\nUS11126171B2: "Systems and methods for data collection in an industrial environment are disclosed. A system may include a data collector to collect data from a subset of a plurality of input channels based on a selected data collection routine, and a data acquisition and analysis circuit for receiving the collected data and analyzing the collected data using an expert system analysis circuit to determine an occurrence of an anomalous condition for a machine component based on an analysis. The expert system analysis circuit may utilize a neural network. The data analysis circuit may determine an aggregate rate of data being collected and, if the aggregate rate exceeds a current bandwidth allocation rate associated with the network infrastructure, request an increase to the current bandwidth allocation rate from the network infrastructure."\nUS11451398B2: "A computer implemented method of validating use of a computing resource by a an executing requester software module from a plurality of discrete software modules, the method including validating a characteristic of the requester software module; generating a first transaction defining criteria for consumption of the computing resource by the requester software module, the first transaction being encrypted with a private key from a public key/private key pair and being added as part of a block of transactions to a blockchain data structure; generating a subsequent encrypted transaction corresponding to a request of the requester software module to consume the computing resource, the subsequent transaction referring to the first transaction, wherein the subsequent transaction is validated by a transaction miner computing component from a plurality of miners by authenticating the transaction using the public key and verifying compliance with the criteria defined in each transaction."\nUS11698818B2: "A computer implemented method of executing a plurality of discrete software modules each including a machine learning algorithm as an executable software component configurable to approximate a function relating a domain data set to a range data set; a data store; and a message handler as an executable software component arranged to receive input data and communicate output data for the module, wherein the message handler is adapted to determine domain parameters for the algorithm based on the input data and to generate the output data based on a result generated by the algorithm, each module having associated a metric of resource utilization by the module, the method including receiving a request for a machine learning task; and selecting a module from the plurality of modules for the task based on the metric associated with the module."\nUS11823017B2: "A computer implemented method of executing a plurality of discrete software modules each including a machine learning algorithm as an executable software component configurable to approximate a function relating a domain data set to a range data set; a data store; and a message handler as an executable software component arranged to receive input data and communicate output data for the module, wherein the message handler is adapted to determine domain parameters for the algorithm based on the input data and to generate the output data based on a result generated by the algorithm, the method including providing a communication channel between modules in order that at least part of output data for a first module constitutes at least part of input data for a second module so as to create a network of modules for combining machine learning algorithms to refine the approximation of the function."\nUS20190294999A1: "Systems and methods for selecting hyper parameters for machine learning algorithms based on past training results are provided. For example, groups of values of hyper parameters may be obtained. Further, in some examples, results of training the machine learning algorithm using different pluralities of training examples and/or the different group of values of hyper parameters may be obtained. Further, in some examples, the results and the groups of values of hyper parameters may be used to select at least one value of a hyper parameter for a prospective training of the machine learning algorithm."\nUS11562293B2: "A computer implemented method of executing a software module includes a machine learning algorithm as an executable software component configurable to approximate a function relating a domain data set to a range data set; a data store; and a message handler as an executable software component arranged to receive input data and communicate output data for the module, wherein the message handler is adapted to determine domain parameters for the algorithm based on the input data and to generate the output data based on a result generated by the algorithm, the method including generating a message as input data for the module, the message including instructions for execution by the module to effect a modification of the machine learning algorithm of the module."\nUS20200034665A1: "Apparatuses, systems, program products, and methods are disclosed for determining validity of machine learning algorithms for datasets. An apparatus includes a primary training module that is configured to train a first machine learning model for a first machine learning algorithm. An apparatus includes a primary validation module that is configured to validate a first machine learning model to generate an error data set. An apparatus includes a secondary training module that is configured to train a second machine learning model for a second machine learning algorithm using an error data set. A second machine learning algorithm may be configured to predict a suitability of a first machine learning model for analyzing an inference data set. An apparatus includes an action module that is configured to trigger an action in response to a predicted suitability of the first machine learning model not satisfying a predetermined suitability threshold."\nUS11568300B2: "A machine learning management apparatus identifies a maximum prediction performance score amongst a plurality of prediction performance scores corresponding to a plurality of models generated by executing each of a plurality of machine learning algorithms. As for a first machine learning algorithm having generated a model corresponding to the maximum prediction performance score, the machine learning management apparatus determines a first training dataset size to be used when the first machine learning algorithm is executed next time based on the maximum prediction performance score, first estimated prediction performance scores, and first estimated runtimes. As for a second machine learning algorithm different from the first machine learning algorithm, the machine learning management apparatus determines a second training dataset size to be used when the second machine learning algorithm is executed next time based on the maximum prediction performance score, second estimated prediction performance scores, and second estimated runtimes."\n'
""")

Number of requested results 2 is greater than number of elements in index 1, updating n_results = 1


In [34]:
tru.get_leaderboard(app_ids=["RAG Patent Comparison"])

Unnamed: 0_level_0,Context Relevance,Groundedness,Answer Relevance,latency,total_cost
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
RAG Patent Comparison,0.8,1.0,0.9,11.5,0.0
