#### Installing Dependencies

In [17]:
from typing import Any
import os
from unstructured.partition.pdf import partition_pdf
import pytesseract
import os
import base64
from langchain.chat_models import ChatOpenAI
from langchain.schema.messages import HumanMessage, AIMessage
import uuid
import chromadb
from langchain.embeddings import OpenAIEmbeddings
from langchain.retrievers.multi_vector import MultiVectorRetriever
from langchain.schema.document import Document
from langchain.storage import InMemoryStore
from langchain_community.vectorstores import Chroma
from langchain.schema.runnable import RunnablePassthrough
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
import openai
import gradio
import json

#### Define input and output paths

In [18]:
input_path = os.getcwd()
output_path = 'C:/Users/eeo21/VSCodeProjects/PatentRAGApplication/figures'

#### Define and Instantiate LLM

In [19]:
# API Key


OPENAI_API_KEY = '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'
os.environ['OPENAI_API_KEY'] = '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'
openai.api_key = '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'

# OpenAI Model

chain_gpt_35 = ChatOpenAI(model="gpt-4-0125-preview", max_tokens=1024, api_key=OPENAI_API_KEY)
chain_gpt_4_vision = ChatOpenAI(model="gpt-4-vision-preview", max_tokens=1024, api_key=OPENAI_API_KEY)

#### Function to encode images for use with GPT-4 Vision Model 

In [20]:
# Function to encode images
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

#### Functions to create summaries from PDF Text, Tables and Images 

In [50]:
# Function for text summaries
def summarize_text(text_element):
    prompt = f"Summarize in detail the following text:\n\n{text_element}\n\nSummary:"
    response = chain_gpt_35.invoke([HumanMessage(content=prompt)])
    return response.content

# Function for table summaries
def summarize_table(table_element):
    prompt = f"Summarize in detail the following table:\n\n{table_element}\n\nSummary:"
    response = chain_gpt_35.invoke([HumanMessage(content=prompt)])
    return response.content

# Function for image summaries
def summarize_image(encoded_image):
    prompt = [
        AIMessage(content="You are a bot that is good at analyzing images."),
        HumanMessage(content=[
            {"type": "text", "text": "Describe the contents of this image from a patent application"},
            {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{encoded_image}"
                },
            },
        ])
    ]
    response = chain_gpt_4_vision.invoke(prompt)
    return response.content

#### Define retreival algorithm and database

In [22]:
store = InMemoryStore()
id_key = "doc_id"

# Initialize the retriever
vectorstore = Chroma(collection_name="summaries", embedding_function=OpenAIEmbeddings())
retriever = MultiVectorRetriever(vectorstore=vectorstore, docstore=store, id_key=id_key)


#### Function to add documents to retrieval algorithm

In [23]:
# Function to add documents to the retriever
def add_documents_to_retriever(summaries, original_contents):
    id_key = "doc_id"
    doc_ids = [str(uuid.uuid4()) for _ in summaries]
    summary_docs = [
        Document(page_content=s, metadata={id_key: doc_ids[i]})
        for i, s in enumerate(summaries)
    ]
    retriever.vectorstore.add_documents(summary_docs)
    retriever.docstore.mset(list(zip(doc_ids, original_contents)))

#### Adding PDF information into database

In [51]:
Patents = ['US10196086', 'US20180193694A1', 'us10745814', 'us2021229059']

for patent in Patents:

    # raw_pdf_elements = partition_pdf(
    #     filename=os.path.join(input_path, f"{patent}.pdf"),
    #     extract_images_in_pdf=True, # Get images 
    #     infer_table_structure=True, # Get tables 
    #     chunking_strategy="by_title", # Preserves sections (by headings and subheadings, as well as over pages where possible)
    #     max_characters=3000, # Define chunk size
    #     new_after_n_chars=1900, # Soft max chunk size 
    #     extract_image_block_types=['Image'],
    #     combine_text_under_n_chars=1000, # Min length of chunk size
    #     extract_image_block_output_dir=f'C:/Users/eeo21/VSCodeProjects/PatentRAGApplication/{patent}_images') #Directory to store images


    # Save table and text elements to list
    # text_elements = []
    # table_elements = []
    image_elements = []

    # for element in raw_pdf_elements:
    #     if 'CompositeElement' in str(type(element)):
    #         text_elements.append(element)
    #     elif 'Table' in str(type(element)):
    #         table_elements.append(element)
    
    patent_id = f'###_### {patent}' 

    # table_elements = [i.text for i in table_elements]
    # table_elements = [''.join([i, patent_id]) for i in table_elements]

    # text_elements = [i.text for i in text_elements]
    # text_elements = [''.join([i, patent_id]) for i in text_elements]

    # Save image elements to list, converting to format compatible with Vision transformer model

    for image_file in os.listdir(f'C:/Users/eeo21/VSCodeProjects/PatentRAGApplication/{patent}_images'):
        if image_file.endswith(('.png', '.jpg', '.jpeg')):
            image_path = os.path.join(f'C:/Users/eeo21/VSCodeProjects/PatentRAGApplication/{patent}_images', image_file)
            encoded_image = encode_image(image_path)
            image_elements.append(encoded_image)

    # # Generate Table summaries
    # table_summaries = []
    
    # for i, te in enumerate(table_elements):
    #     summary = summarize_table(te)
    #     summary = ''.join([summary, patent_id])
    #     table_summaries.append(summary)
    #     print(f"{i + 1}th element of tables processed.")

    # # Generate Text element summaries
    # text_summaries = []
    # for i, te in enumerate(text_elements):
    #     summary = summarize_text(te)
    #     summary = ''.join([summary, patent_id])
    #     text_summaries.append(summary)
    #     print(f"{i + 1}th element of texts processed.")

    # Generate summaries of images
    image_summaries = []
    for i, ie in enumerate(image_elements):
        try:
            summary = summarize_image(ie)
            summary = ''.join([summary, patent_id])
            image_summaries.append(summary)
        except:
            print(f'{ie} could not be summarised, could be too large')
        print(f"{i + 1}th element of images processed.")

    # Add text, table and image summaries to vector database
    # add_documents_to_retriever(text_summaries, text_elements)
    # add_documents_to_retriever(table_summaries, table_elements)
    add_documents_to_retriever(image_summaries, image_summaries) 

    # Save pdf elements to databases 

    # texts =list(zip(text_summaries, text_elements))
    # tables = list(zip(table_summaries, table_elements))
    images = list(zip(image_summaries, image_summaries))

    components = [images]
    
    PDFdictionary = {}
    for component in components:
        for pair in component:
            PDFdictionary[pair[0]] = [pair[1]]
    
    with open(f"{patent}_images.json", "w") as outfile: 
        json.dump(PDFdictionary, outfile)

1th element of images processed.
2th element of images processed.
3th element of images processed.
4th element of images processed.
5th element of images processed.
6th element of images processed.
1th element of images processed.
2th element of images processed.
3th element of images processed.
4th element of images processed.
5th element of images processed.
6th element of images processed.
7th element of images processed.
8th element of images processed.
9th element of images processed.
10th element of images processed.
11th element of images processed.
1th element of images processed.
2th element of images processed.
3th element of images processed.
4th element of images processed.
5th element of images processed.
6th element of images processed.
7th element of images processed.
8th element of images processed.
9th element of images processed.
10th element of images processed.
11th element of images processed.
12th element of images processed.
13th element of images processed.
14th

#### Define inference model and prompt

In [52]:
#Define prompt template

template = """Answer the question based only on the following context, which can include text, images and tables:
{context}
Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

Aim = 'Quality' #Change to slow for faster response, perhaps with lower quality

if Aim == 'Quality':
    model = ChatOpenAI(temperature=0, model="gpt-4-0125-preview")
elif Aim == 'Fast':
    model = ChatOpenAI(temperature=0, model="gpt-3.5-turbo")

chain = ({"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser())

#### Define Gradio App

In [53]:
def chatbot(input):
    response = chain.invoke(f'{input}')

    contextlist = retriever.get_relevant_documents(f'{input}')

    #Remove any duplicated contexts
    contextlist = list(dict.fromkeys(contextlist))

    #Get ID of patent that context was retreived from 
    for x in contextlist:
        comps = x.split('###_###')
        context = comps[0]
        ID = comps[1]

    print(response)

    # Prettify response
    response = response.replace('**', '')
    response = '\n'.join([response, f"{'#'*120} \n"])
    response = '\n'.join([response, f"CONTEXT USED TO GENERATE RESPONSE:\n {context}"])
    response = '\n'.join([response, f"{'#'*120} \n"])
    response = '\n'.join([response, f"CONTEXT FOUND IN PATENT NO. {ID}"])
   
    return response

inputs = gradio.Textbox(lines=7, label="Ask question based on facts from a patent")
outputs = gradio.Textbox(label="Reply")

gradio.Interface(fn=chatbot, inputs=inputs, outputs=outputs, title="Patent RAG Prototype",
             theme="compact").launch(share=True)


Sorry, we can't find the page you are looking for.


Running on local URL:  http://127.0.0.1:7866
Running on public URL: https://6bac85bbb645fd31fb.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




The images in US patent US10196086 are not directly described in the provided text. The text references a different patent application, US20180193694A1, and describes images from this application instead. Therefore, based on the provided context, I cannot accurately describe the images in US patent US10196086.
The images in US patent US20180193694A1 depict various figures related to the patent application, including an oval-shaped object with a dotted pattern, possibly indicating holes, indentations, or raised points, and a rectangular area within the oval shape that might represent a different section or component. The images also include reference numbers like 410, 414, and 412, which are typically used to describe parts of the design in detail within the patent document. Additionally, there are figures that seem to include text and possibly other diagrams or illustrations related to the patent, but without specific details on their content, it's challenging to determine their exact 