In [67]:
import yaml
import os

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain.document_loaders import BigQueryLoader
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.llms import OpenAI

## Authentication

Load OpenAI API Key

In [3]:
with open('../api_keys.yaml', 'r') as f:
    keys = yaml.safe_load(f)

openai_api_key = keys['openai']

Authenticate to BigQuery

In [4]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '../law-project-service-account.json'

In [44]:
QUERY = (
    'SELECT spif_publication_number as patent_number, t.text as title,  a.text as abstract, c.text as claims '
    'FROM `patents-public-data.patents.publications`, UNNEST(title_localized) as t, UNNEST(abstract_localized) as a,  UNNEST(claims_localized) as c '
    'WHERE spif_publication_number = "US8205344B2" '
    'LIMIT 100')

## BigQuery Doc Loading

In [45]:
loader = BigQueryLoader(QUERY)

data = loader.load()

In [46]:
print(data)

[Document(page_content='patent_number: US8205344B2\ntitle: Safety razor having pivotable blade unit\nabstract: A safety razor having a blade unit has at least one blade and a handle casing. A pivotal connection structure is disposed between the blade unit and the handle casing. A first member is connected to the blade unit and a second member is connected to the handle casing. A joint member connects the first member and the second member and facilitates movement of the first member relative to the second member about a hinge axis that is substantially perpendicular to the at least one blade.\nclaims: 1. A safety razor comprising:\n a blade unit having at least one blade having a cutting edge; \n a handle casing; \n a pivotal connection structure including: \n a first member connected to the blade unit; \n a second member connected to the handle casing; and \n a joint member comprising a plurality of separated joint elements which are disposed along the perpendicular pivot axis, and wh

## Chunking

### Split query results into chunks

In [47]:
from langchain.text_splitter import RecursiveCharacterTextSplitter 
text_splitter = RecursiveCharacterTextSplitter (
    # Set a really small chunk size, just to show.
    chunk_size = 1000,
    chunk_overlap  = 0
)
texts = text_splitter.split_documents(data)
print(texts[0])
print(len(texts))

page_content='patent_number: US8205344B2\ntitle: Safety razor having pivotable blade unit\nabstract: A safety razor having a blade unit has at least one blade and a handle casing. A pivotal connection structure is disposed between the blade unit and the handle casing. A first member is connected to the blade unit and a second member is connected to the handle casing. A joint member connects the first member and the second member and facilitates movement of the first member relative to the second member about a hinge axis that is substantially perpendicular to the at least one blade.\nclaims: 1. A safety razor comprising:\n a blade unit having at least one blade having a cutting edge; \n a handle casing; \n a pivotal connection structure including: \n a first member connected to the blade unit; \n a second member connected to the handle casing; and' metadata={}
7


Give a source index to each of the doc chunks.

In [52]:
for i in range(len(texts)):
    texts[i].metadata['source'] = i
texts[0].metadata['source']

0

In [64]:
len(data[0].page_content)

6431

In [66]:
data[0].page_content[0]

'p'

### Construct vector DB out of results

CAREFUL: Don't run next cell too often; costs money

In [48]:
faiss_index = FAISS.from_documents(texts, OpenAIEmbeddings(openai_api_key=openai_api_key))

In [49]:
docs = faiss_index.similarity_search("Summarize the independent claims.", k=6)
docs

[Document(page_content='9. The safety razor of  claim 7  wherein the blade unit includes a frame with a cam surface and the handle unit includes a spring-biased plunger with a rounded distal end that contacts the cam surface at a location spaced from the parallel pivot axis to impart a biasing force to the frame. \n     \n     \n       10. The safety razor of  claim 1  further comprising a shaving cartridge which is detachable from the handle unit, wherein the shaving cartridge includes the blade unit and the pivotal connection structure. \n     \n     \n       11. The safety razor of  claim 1  further comprising a shaving cartridge which is detachable from the handle unit, wherein the shaving cartridge includes the blade unit and the handle unit includes the pivotal connection structure. \n     \n     \n       12. The safety razor of  claim 1  wherein the separated joint elements include a triangular prism shaped base member and an elastic plate member extending from the base member.'

### Use search results against vector DB in LLM QA prompt

CAREFUL: Don't run next cell too often; costs money

In [54]:
template = """You are a patent lawyer. Given the following extracted parts of a patent and a question, create a final answer with references ("SOURCES"). 
If you don't know the answer, just say that you don't know. Don't try to make up an answer.
ALWAYS return a "SOURCES" part in your answer.

QUESTION: {question}
=========
{summaries}
=========
FINAL ANSWER:"""
PROMPT = PromptTemplate(template=template, input_variables=["summaries", "question"])

chain = load_qa_with_sources_chain(OpenAI(temperature=0, openai_api_key=openai_api_key), chain_type="stuff", prompt=PROMPT)
query = "How does claim 1 differ from claim 15?"
docs = faiss_index.similarity_search(query, k=6)
chain({"input_documents": docs, "question": query}, return_only_outputs=True)

{'output_text': '\nClaim 1 of the safety razor includes a blade unit having at least one blade with a cutting edge, a handle casing, and a pivotal connection structure including a first member connected to the blade unit and a second member connected to the handle casing. Claim 15 of the safety razor includes all of the elements of claim 1, as well as a joint member connecting the first and second members and facilitating movement of the first member relative to the second member about a hinge axis that is substantially perpendicular to the at least one blade. Additionally, claim 15 includes further elements such as the joint member being formed by an injection molding process of a thermo plastic material, at least one of the first and second members having a convexly curved face facing the other of the first and second members, a shaving cartridge which is detachable from the handle unit, and the shaving cartridge including the blade unit and the pivotal connection structure. \n\nSOUR

In [61]:
texts[6]

Document(page_content='16. The safety razor according to  claim 15 , wherein the first and second members and the joint member are formed by an injection molding process of a thermo plastic material. \n     \n     \n       17. The safety razor according to  claim 15 , wherein at least one of the first and second members has a convexly curved face facing the other of the first and second members. \n     \n     \n       18. The safety razor according to  claim 15 , further comprising a shaving cartridge which is detachable from the handle unit, wherein the shaving cartridge includes the blade unit and the pivotal connection structure. \n     \n     \n       19. The safety razor according to  claim 15 , further comprising a shaving cartridge which is detachable from the handle unit, wherein the shaving cartridge includes the blade unit and the handle unit includes the pivotal connection structure.', metadata={'source': 6})

## Whole Patent as Input

In [74]:
template = """You are a patent lawyer. Given the following extracted parts of a patent and a question, create a final answer with references ("SOURCES"). Answer concisely in plain English so that a layman could understand.
Do not repeat anything in the patent in your answer. If you don't know the answer, just say that you don't know. Don't try to make up an answer.
ALWAYS return a "SOURCES" part in your answer.

QUESTION: {question}
=========
{summaries}
=========
FINAL ANSWER:"""
PROMPT = PromptTemplate(template=template, input_variables=["summaries", "question"])

chain = load_qa_with_sources_chain(OpenAI(temperature=0, openai_api_key=openai_api_key, model_name='gpt-3.5-turbo'), chain_type="stuff", prompt=PROMPT)
query = "How does claim 1 differ from claim 15?"
data[0].metadata['source'] = 0  # Need to give our singular doc source info that the LLM can cite.
chain({"input_documents": data, "question": query}, return_only_outputs=True)



{'output_text': 'Claim 1 and claim 15 both describe a safety razor with a blade unit and a handle casing connected by a pivotal connection structure that allows movement of the blade unit relative to the handle casing about a hinge axis that is perpendicular to the cutting edge of the blade. However, claim 1 specifies that the joint member of the pivotal connection structure has a thinner wall section toward the hinge axis than toward at least one of the joint portions of the first and second members, while claim 15 does not include this feature. \n\nSOURCES: US8205344B2'}