In [1]:
# !pip install langchain
# !pip install openai
# !pip install PyPDF2
# !pip install faiss-cpu
# !pip install tiktoken


In [3]:
# Load environment variables from .env file
from dotenv import find_dotenv, load_dotenv
load_dotenv(find_dotenv())

True

In [4]:
from PyPDF2 import PdfReader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import ElasticVectorSearch, Pinecone, Weaviate, FAISS

In [5]:
# location of the pdf file/files. 
reader = PdfReader('Article 17.pdf')

In [6]:
# read data from the file and put them into a variable called raw_text
raw_text = ''
for i, page in enumerate(reader.pages):
    text = page.extract_text()
    if text:
        raw_text += text

In [7]:
raw_text[:100]

' \n ANNOTATIONS \nCross references.  — For duty of successor in business, see 7-1-61 NMSA 1978 et \nseq'

In [8]:
# We need to split the text that we read into smaller chunks so that during information retreival we don't hit the token size limits. 

text_splitter = CharacterTextSplitter(        
    separator = "\n",
    chunk_size = 1000,
    chunk_overlap  = 200,
    length_function = len,
)
texts = text_splitter.split_text(raw_text)

In [9]:
len(texts)

28

In [10]:
texts[0]

'ANNOTATIONS \nCross references.  — For duty of successor in business, see 7-1-61 NMSA 1978 et \nseq.  \nAm. Jur. 2d, A.L.R. and C.J.S. references.  — 45 Am. Jur. 2d Intoxicating Liquors §§ \n203 to 219.  \n48 C.J.S. Intoxicating Liquors §§ 199 to 212.  \n7-17-2. Definitions. \nAs used in the Liquor Excise Tax Act:  A. "alcoholic beverages" means distilled or  rectified spirits, potable alcohol, brandy, \nwhiskey, rum, gin, aromatic bitters or an y similar beverage, including blended or \nfermented beverages, dilutions or mixtures of  one or more of the foregoing containing \nmore than one-half of one percent alcohol by volume, but "alcoholic beverages" does not include medicinal bitters; \nB. "beer" means an alcoholic beverage obta ined by the fermentation of any infusion \nor decoction of barley, malt and hops or other cereals in water and includes porter, \nbeer, ale and stout; \nC. "cider" means an alcoholic beverage made from the normal alcoholic'

In [11]:
texts[1]

'or decoction of barley, malt and hops or other cereals in water and includes porter, \nbeer, ale and stout; \nC. "cider" means an alcoholic beverage made from the normal alcoholic \nfermentation of the juice of sound, ripe apples or pears that contains not less than one-half of one percent of alcohol by volume and not more than eight and one-half percent of alcohol by volume; \nD. "department" means the taxation and revenue department, the secretary of \ntaxation and revenue or any employee of the department exercising authority lawfully delegated to that employee by the secretary; \nE. "fortified wine" means wine containin g more than fourteen percent alcohol by \nvolume when bottled or packaged by the manufacturer, but "fortified wine" does not include: \n(1) wine that is sealed or capped by cork closure and aged two years or more; \n(2) wine that contains more than four teen percent alcohol by volume solely as'

In [13]:
# Download embeddings from OpenAI
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [14]:
docsearch = FAISS.from_texts(texts, embeddings)

In [16]:
docsearch

<langchain_community.vectorstores.faiss.FAISS at 0x262a1728390>

In [19]:
from langchain.chains.question_answering import load_qa_chain
from langchain_openai import OpenAI

In [20]:
chain = load_qa_chain(OpenAI(), chain_type="stuff")

In [21]:
query = "what is this article about?"
docs = docsearch.similarity_search(query)
chain.run(input_documents=docs, question=query)

  warn_deprecated(


' This article is about the amendments and changes made to the liquor excise tax rates for small wine growers and craft distillers in the state of New Mexico. It also discusses the dormant Commerce Clause and how it relates to state regulation of interstate commerce.'

In [22]:
query = "what does alcoholic beverage mean according to article 17?"
docs = docsearch.similarity_search(query)
chain.run(input_documents=docs, question=query)

' According to Article 17, "alcoholic beverages" means distilled or rectified spirits, potable alcohol, brandy, whiskey, rum, gin, aromatic bitters or any similar beverage, including blended or fermented beverages, dilutions or mixtures of one or more of the foregoing containing more than one-half of one percent alcohol by volume, but does not include medicinal bitters.'

In [26]:
query = "what are the ammendments in article 17?"
docs = docsearch.similarity_search(query)
chain.run(input_documents=docs, question=query)

' The 1997, 2000, 1996, 1995, 1994, 1993, 1991, 1982, 1983, 1984, and 2008 amendments.'

In [27]:
query = "Comapre and contrast amendments 2000 and 2008?"
docs = docsearch.similarity_search(query)
chain.run(input_documents=docs, question=query)

' The 2000 amendment, effective July 1, 2000, substituted "five hundred sixty thousand liters" for "three hundred seventy-five thousand liters" in Subsection E. The 2008 amendment, effective July 1, 2008, increased the minimum production of wine from five hundred sixty thousand liters to nine hundred fifty thousand liters and added Subsections B and C. Both amendments deal with the production of wine and the amount of wine that can be produced. However, the 2008 amendment also added new subsections, while the 2000 amendment only substituted one amount for another.'