In [10]:
import gradio as gr
import PyPDF2
from langchain.output_parsers import StructuredOutputParser
from langchain.output_parsers import ResponseSchema
from langchain.prompts import ChatPromptTemplate
from langchain_community.chat_models import ChatOpenAI
from langchain_community.document_loaders import PyPDFLoader
from Levenshtein import distance
import time
import os
import sys
from bs4 import BeautifulSoup as bs
import openai
import requests
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import MongoDBAtlasVectorSearch
from pymongo import MongoClient
from pymongo.server_api import ServerApi
from reportlab.pdfgen import canvas
from langchain_openai import OpenAIEmbeddings
from langchain.schema import Document

In [11]:
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
openai.api_key = os.environ['OPENAI_API_KEY']
patent_api_key = os.environ['GOOGLE_PATENT_API_KEY']

#Define LLM Model
llm_model = "gpt-4"

#Initialize LLM Model Attributes
from openai import OpenAI

client = OpenAI()
def get_completion(prompt, model=llm_model):
    messages = [{"role": "user", "content": prompt}] #role: define the role of the llm; conent: how the llm should act
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=0, #creativity range 0..1
    )
    return response.choices[0].message.content

Python-dotenv could not parse statement starting at line 18
Python-dotenv could not parse statement starting at line 20
Python-dotenv could not parse statement starting at line 22


In [12]:
content = """
Biometric Vehicle Access System
Abstract:
The Biometric Vehicle Access System (BVAS) is an innovative technology designed to revolutionize traditional vehicle security and access methods.
This system emplopatent_abstractys advanced biometric authentication, including fingerprint and facial recognition, to ensure secure and convenient entry and ignition processes.
BVAS enhances vehicle security by replacing traditional key-based and electronic fob systems with a seamless and personalized biometric verification process. The technology integrates biometric sensors into door handles, steering wheels, and ignition systems, allowing for quick and reliable user authentication. The BVAS not only provides an additional layer of security against unauthorized access but also enhances user convenience by eliminating the need for physical keys or key fobs. Users can effortlessly unlock, start, and operate their vehicles through a simple and rapid biometric scan. The system is designed with robust anti-spoofing measures to prevent unauthorized access attempts.
Furthermore, BVAS contributes to the growing trend of biometric integration in smart vehicles, aligning with the industry's commitment to innovation, user experience, and safety. As vehicles continue to evolve into interconnected and autonomous entities, BVAS sets a new standard for personalized and secure access, catering to the increasing demand for sophisticated yet user-friendly solutions in the automotive sector.
"""

In [13]:
def output_keywords(content, n, progress=gr.Progress()):
    progress(0, desc="Generating Key Words...")
    keyword_prompt = f"""
    The following abstract descripes a concept for a novel invention:\
    ```{content}```\
    Name {n} key words based on this abstract, that I can use for the search in a patent database. \
    Optimize the key words to get back more results. Result as python string.
    """

    response_keywords = get_completion(keyword_prompt)

    return response_keywords

In [14]:
output_keywords = output_keywords(content, 2)

In [15]:
def output_classes(content, n, progress=gr.Progress()):
    progress(0, desc="Generating Classifications...")
    classes_prompt = f"""
        The following abstract descripes a concept for a novel invention:\
        ```{content}```\
        Name {n} CPC classifications based on this abstract, that I can use for the search in a patent database. \
        Please give me a python string for the codes of the {n} most relevant \
        CPC classifications to a possible patent. 
        """
    
    response_classes = get_completion(classes_prompt)
    
    return response_classes

In [16]:
output_classes = output_classes(content, 2)

output_classes

'"G06K9/00", "B60R25/10"'

In [17]:
def patent_analysis(content, response_keywords, response_classes, progress=gr.Progress()):
        #cast the results (key words) from string to list
        keywords_list = []

        splitstring = response_keywords.split(", ") #split the key words
        for i in splitstring:
            keywords_list.append(i[1:-1]) #remove the quotation marks
        print(keywords_list)

        #cast the results (classifications) from string to list
        class_list = []

        new_string = response_classes.replace(",", "",9999) #remove commas
        splitstring = new_string.split() #split the classes
        for i in splitstring:
            class_list.append(i[1:-1]) #remove the quotation marks
        print(class_list)

        #initialization of base vars for the following loop
        progress(0.5, desc="Research for patents")
        patent_api_key = os.environ['GOOGLE_PATENT_API_KEY']
        count = 0
        patent_base_url = "https://patentimages.storage.googleapis.com/" #just to complete the url
        patent_data = {}

        #Loop for multiple Google Patents API calls with Key Words
        for i in keywords_list:
            openai_response = i #Search String for Google Patents
            url_base = "https://serpapi.com/search.html?engine=google_patents"
            query = openai_response.replace(" ", "+")
            url = url_base + "&q=" + query + "&api_key=" + patent_api_key

            # API call Google Patents
            response = requests.get(url)

            # Check if API call was successful
            if response.status_code == 200:
                data = response.json() #write json-answer in var
                for cluster in data["results"]["cluster"]:
                    for result in cluster["result"]:
                        id = result["patent"]["publication_number"]
                        if id not in patent_data.keys():
                            patent_data[id] = {
                                "pdf": result["patent"]["pdf"],
                            }
            else:
                print(f"Error with API request: Status code {response.status_code}")

        for cluster in data["results"]["cluster"]:
            for result in cluster["result"]:
                patent_id = result["patent"]["publication_number"]
                if patent_id not in patent_data.keys():
                    patent_data[patent_id] = {
                        "pdf": result["patent"]["pdf"],
                    }

        #Scraping complete patent data
        progress(0.6, desc="Collecting patent data")

        for patent_id in patent_data.keys():
        
            # generating Google Patent links for each ID

            url = "https://patents.google.com/patent/" + patent_id + "/en"

            response = requests.get(url)
            html_content = response.content
            soup = bs(html_content, 'html.parser')

            # Scraping Title

            title_span = soup.find('span', itemprop='title')

            if title_span is not None:
                title = title_span.get_text()

                # Removing weird ending of title
                to_remove = "\n"
                title = title.replace(to_remove, "").strip()
            else:
                title = False

            # Scraping Abstract

            abstract_div = soup.find('div', class_='abstract')

            if abstract_div is not None:
                abstract = abstract_div.get_text()
            else:
                abstract = False 

            # Scraping Description

            description_section = soup.find('section', itemprop='description')

            if description_section:

                # Removing H2 from section
                h2_tag = description_section.find('h2')
                if h2_tag:
                    h2_tag.decompose()
                    
                # Removing all 'notranslate' class items
                for notranslate_tag in description_section.find_all(class_='notranslate'):
                    notranslate_tag.decompose()
                    
                # Removing all <aside> elements
                for aside_tag in description_section.find_all('aside'):
                    aside_tag.decompose()

                # Extracting and joining the text
                description = "".join(description_section.stripped_strings)
                if description == "":
                    description = False

            else:
                description = False   

            # Scraping Claims

            description_section = soup.find('section', itemprop='claims')

            if description_section:
                # Removing H2 from section
                h2_tag = description_section.find('h2')
                if h2_tag:
                    h2_tag.decompose()
                    
                # Removing all 'notranslate' class items
                for notranslate_tag in description_section.find_all(class_='notranslate'):
                    notranslate_tag.decompose()
                    
                # Removing all <aside> elements
                for aside_tag in description_section.find_all('aside'):
                    aside_tag.decompose()

                # Extracting and joining the text
                claims = "".join(description_section.stripped_strings)
                if claims == "":
                    claims = False

            else:
                claims = False
                
            patent_data[patent_id].update({
                "title": title,
                "abstract": abstract,
                "description": description,
                "claims": claims
            })

        # Converting the patent data into a usable format to perform a vector search
        patent_list = []

        for patent_id, data in patent_data.items():
            page_content = f"{data['title']} {data['abstract']} {data['description']} {data['claims']}"
            metadata = {"patent_id": patent_id}
            patent_list.append(Document(page_content=page_content, metadata=metadata))


        #Login MongoDB with User and specific database
        uri = "mongodb+srv://timmey:faB8MFdyyb7zWvVr@llm-ttt.8kqrnka.mongodb.net/?retryWrites=true&w=majority"

        # Create a new client and connect to the server
        client = MongoClient(uri, server_api=ServerApi('1'))

        DB_NAME = "llm-ttt"
        COLLECTION_NAME = "pdfresults"
        ATLAS_VECTOR_SEARCH_INDEX_NAME = "vector_index"

        MONGODB_COLLECTION = client[DB_NAME][COLLECTION_NAME]       

        # insert the documents in MongoDB Atlas with their embedding
        vector_search = MongoDBAtlasVectorSearch.from_documents(
            documents=patent_list,
            embedding=OpenAIEmbeddings(disallowed_special=()),
            collection=MONGODB_COLLECTION,
            index_name=ATLAS_VECTOR_SEARCH_INDEX_NAME,
        )

        #pause for the db to save
        time.sleep(5)

        # Perform a similarity search with Score between the embedding of the query and the embeddings of the documents
        progress(0.9, desc="Compare the patents")
        query = str(content)

        results = vector_search.similarity_search_with_score(
            query=query,
            k=20, #Output for the top n results
        )

        vector_scoring = {}

        for result in results:
            vector_scoring[result[0].metadata['patent_id']] = result[1]


        comparison_prompt = f"""The following texts are abstracts from patent specifications. Your task is to compare the "Testing Abstract" to all the others. 
        It is important that you focus on comparing the concepts that the abstracts describe, not the way they are written. 
        Rank the remaining abstracts on how well they match with the Testing Abstract by giving them a rating from 0 to 10 points. 
        0 meaning they have absolutely nothing in common and 10 meaning they basically describe the exact same idea.
        Your output should be a python dictionary with the title "comparison", each element hast the Abstract number as key and the rating as value.
        I want to convert your output string to an actual dictionary, so make sure the formatting is right.

        Testing Abstract: "{content}"
        """

        for patent_id in vector_scoring.keys():
            # Check if there is an abstract for the patent
            if patent_id in patent_data.keys():
                if patent_data[patent_id]["abstract"] is not False:
                    comparison_prompt = comparison_prompt + f'{patent_id}: "{patent_data[patent_id]["abstract"]}"\n'

        return comparison_prompt



In [18]:
comparison_result = patent_analysis(content, output_keywords, output_classes)

comparison_result

['Biometric Vehicle Access System', 'biometric authentication']
['G06K9/00', 'B60R25/10']
Error with API request: Status code 429
Error with API request: Status code 429


UnboundLocalError: cannot access local variable 'data' where it is not associated with a value

'The following texts are abstracts from patent specifications. Your task is to compare the "Testing Abstract" to all the others. \n        It is important that you focus on comparing the concepts that the abstracts describe, not the way they are written. \n        Rank the remaining abstracts on how well they match with the Testing Abstract by giving them a rating from 0 to 10 points. \n        0 meaning they have absolutely nothing in common and 10 meaning they basically describe the exact same idea.\n        Your output should be a python dictionary with the title "comparison", each element hast the Abstract number as key and the rating as value.\n        I want to convert your output string to an actual dictionary, so make sure the formatting is right.\n\n        Testing Abstract: "\nBiometric Vehicle Access System\nAbstract:\nThe Biometric Vehicle Access System (BVAS) is an innovative technology designed to revolutionize traditional vehicle security and access methods.\nThis system emplopatent_abstractys advanced biometric authentication, including fingerprint and facial recognition, to ensure secure and convenient entry and ignition processes.\nBVAS enhances vehicle security by replacing traditional key-based and electronic fob systems with a seamless and personalized biometric verification process. The technology integrates biometric sensors into door handles, steering wheels, and ignition systems, allowing for quick and reliable user authentication. The BVAS not only provides an additional layer of security against unauthorized access but also enhances user convenience by eliminating the need for physical keys or key fobs. Users can effortlessly unlock, start, and operate their vehicles through a simple and rapid biometric scan. The system is designed with robust anti-spoofing measures to prevent unauthorized access attempts.\nFurthermore, BVAS contributes to the growing trend of biometric integration in smart vehicles, aligning with the industry\'s commitment to innovation, user experience, and safety. As vehicles continue to evolve into interconnected and autonomous entities, BVAS sets a new standard for personalized and secure access, catering to the increasing demand for sophisticated yet user-friendly solutions in the automotive sector.\n"\n        US20100299002A1: "A system for authenticating individuals traveling to and from various destinations at various times. Personal identity and travel privilege verification are coordinated for several modes of transportation, including aircraft, boats, buses, cars and trains. Travel privileges are considered to be the ability to leave the current location, travel to the desired location, travel at specific times, and use specific forms of transportation. The system specifically provides operator privilege verification, allowing individuals to receive vehicle operator privileges. These privileges are evaluated upon the individual\'s application, and are periodically updated at the discretion of the controlling institution."\nUS20180201225A1: "A method for authorizing a driver to activate at least one system of a vehicle, includes a preliminary phase that includes an initialization step, an enrollment step scanning enrollment biometric data of the driver, an access step for the driver to enter the vehicle, and a storage step registering enrollment biometric data in memory. The method also includes a subsequent phase that includes an authentication step scanning authentication biometric data of the driver and comparing said authentication biometric data with the enrollment biometric data in the memory, then an activation step wherein, if the authentication biometric data match with the enrollment biometric data, in authorizing the driver to activate the at least one system of the vehicle, or a refusal step wherein, if the authentication biometric data does not match with the enrollment biometric data, in forbidding the driver to activate the at least one system of the vehicle."\nUS8952781B2: "A biometrically authenticated access control in which a wireless authentication signal is provided from a primary instrumentality of access, only after a dual-stage biometric verification of the user\'s identity is performed. In one embodiment, an accessing device includes memory for storing a device identification code and an authentication code, along with first and second biometric templates corresponding to biometric samples from a user. In another embodiment, an accessing device includes memory for storing a device identification code and more than one authentication code, for separate users, along with first and second biometric templates corresponding to biometric samples from multiple users. In order to gain access to a secured resource, a user undergoes first and second biometric sampling to generate biometric data to be compared with the first and second biometric templates."\nCN111231893B: "The invention relates to a method (17) for operating a shared vehicle (1) and to a corresponding shared vehicle (1). The invention provides that the biometric data of the holder of the driver\'s license (14) is read from the driver\'s license (14) carried by the user (5) by means of a data acquisition device (13). Furthermore, at least one biometric of the user (5) is detected by means of the identification device (13) during the stay of the user in the area of the shared vehicle (1). Next, to verify the read data, the data is compared with the detected biometric features. In the event of agreement between the read data and the detected biometric feature, i.e. when a positive test result is obtained, the shared vehicle (1) is activated for the user (5) for use."\nUS8694793B2: "Aspects and embodiments of the present disclosure provide devices and methods for biometric authentication of a user during access control transactions. In one aspect, an access control processor device, comprising a biometric input sensor configured to receive user biometric information; a biometric verification processor configured to authenticate the input user biometric information; and a communication element configured to activate when the biometric information entered into the biometric verification system is authenticated and maintain an inactive status for the communication element on the payment processor device when the biometric information entered into the biometric verification system is not authenticated."\nEP1705085B1: "A driver authenticating apparatus has a camera for capturing a facial image of a person in a driver seat, and a controller connected to the camera. The controller permits a driver to start an vehicle engine if the captured facial image matches a pre-registered facial image of an authorized driver. When restarting an engine with the driver present in the driver seat, the apparatus allows the driver to start the vehicle engine without an authenticating procedure."\nUS9654468B2: "Systems and methods for secure remote biometric authentication are provided. A network-based biometric authentication platform stores biometric templates for individuals which have been securely enrolled with the authentication platform. A plurality of sensor platforms separately establishes secure communications with the biometric authentication platform. The sensor platform can perform a biometric scan of an individual and generate a biometric authentication template. The sensor platform then requests biometric authentication of the individual by the biometric authentication platform via the established secure communications. The biometric authentication platform compares the generated biometric template to one or more of the enrolled biometric templates stored in memory at the biometric authentication platform. The result of the authentication is then communicated to the requesting sensor platform via the established secure communications."\nUS11397800B2: "A removable card-enabled BPID Security Device integrates a removable card reader with a biometric authentication component to provide secured access to electronic systems. The device allows for an individual to insert a removable card into an aperture in the physical enclosure of the BPID Security Device, allowing the removable card and the BPID Security Device to electronically communicate with each other. The BPID Security Device is based on a custom application specific integrated circuit that incorporates removable card terminals, such that the BPID Security Device can communicate directly with an inserted removable card. In an alternative embodiment of the invention, the BPID Security Device is based on a commercial off-the-shelf microprocessor, and may communicate with a commercial off-the-shelf microprocessor removable card receiver using a serial, USB, or other type of communication protocol. The device allows for enrolling a user\'s credentials onto the BPID Security Device and for authenticating an individual using the BPID Security Device."\nUS20160311400A1: "The invention relates to a method for authenticating a driver (2) in a motor vehicle (1), having a detection device (10) which is arranged in the motor vehicle (1) and has the purpose of detecting actual data (50) of the driver (2) which is transmitted during the authentication to a checking device (20) which is arranged in an external station (3) outside the motor vehicle (1), wherein the checking device (20) compares the actual data (50) with setpoint data (60), and when the actual data (50) corresponds to the setpoint data (60) an enable signal (70) is transmitted from the external station (3) to the motor vehicle (1), as a result of which a starting process of the motor vehicle (1) for the driver (2) is made possible."\nUS9376090B2: "The invention relates to a method for authenticating a driver (2) in a motor vehicle (1) by means of a recognition device (10) disposed in the motor vehicle (1) for collecting actual data (50) of the driver (2) which are transmitted during the authentication to a checking device (20) disposed in an external station (3) outside the motor vehicle (1), wherein the checking device (20) compares the actual data (50) with the target data (60) and in the event of conformity of the actual data (50) with the target data (60) an enabling signal (70) is sent from the external station (3) to the motor vehicle (1), thus enabling the driver (2) to start the motor vehicle (1)."\n'

In [None]:
from trulens_eval import TruCustomApp
tru_rag = TruCustomApp(rag,
    app_id = 'RAG Classifications',
    feedbacks = [f_groundedness, f_qa_relevance, f_context_relevance])

In [None]:
with tru_rag as recording:
    rag.query("Name 5 CPC classifications based on this abstract, that I can use for the search in a patent database. \
Please give me a python string for the codes of the 5 most relevant \
CPC classifications to a possible patent.")

In [None]:
tru.get_leaderboard(app_ids=["RAG Classifications"])

In [None]:
tru.run_dashboard()