In [1]:
# !pip install langchain
# !pip install openai
# !pip install PyPDF2
# !pip install faiss-cpu
# !pip install tiktoken


In [2]:
# Load environment variables from .env file
from dotenv import find_dotenv, load_dotenv
load_dotenv(find_dotenv())

True

In [3]:
from PyPDF2 import PdfReader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import ElasticVectorSearch, Pinecone, Weaviate, FAISS

In [4]:
# location of the pdf file/files. 
reader = PdfReader('Article 17.pdf')

In [5]:
# read data from the file and put them into a variable called raw_text
raw_text = ''
for i, page in enumerate(reader.pages):
    text = page.extract_text()
    if text:
        raw_text += text

In [6]:
raw_text[:100]

' \n ANNOTATIONS \nCross references.  — For duty of successor in business, see 7-1-61 NMSA 1978 et \nseq'

In [7]:
# We need to split the text that we read into smaller chunks so that during information retreival we don't hit the token size limits. 

text_splitter = CharacterTextSplitter(        
    separator = "\n",
    chunk_size = 1000,
    chunk_overlap  = 200,
    length_function = len,
)
texts = text_splitter.split_text(raw_text)

In [8]:
len(texts)

28

In [9]:
texts[0]

'ANNOTATIONS \nCross references.  — For duty of successor in business, see 7-1-61 NMSA 1978 et \nseq.  \nAm. Jur. 2d, A.L.R. and C.J.S. references.  — 45 Am. Jur. 2d Intoxicating Liquors §§ \n203 to 219.  \n48 C.J.S. Intoxicating Liquors §§ 199 to 212.  \n7-17-2. Definitions. \nAs used in the Liquor Excise Tax Act:  A. "alcoholic beverages" means distilled or  rectified spirits, potable alcohol, brandy, \nwhiskey, rum, gin, aromatic bitters or an y similar beverage, including blended or \nfermented beverages, dilutions or mixtures of  one or more of the foregoing containing \nmore than one-half of one percent alcohol by volume, but "alcoholic beverages" does not include medicinal bitters; \nB. "beer" means an alcoholic beverage obta ined by the fermentation of any infusion \nor decoction of barley, malt and hops or other cereals in water and includes porter, \nbeer, ale and stout; \nC. "cider" means an alcoholic beverage made from the normal alcoholic'

In [10]:
texts[1]

'or decoction of barley, malt and hops or other cereals in water and includes porter, \nbeer, ale and stout; \nC. "cider" means an alcoholic beverage made from the normal alcoholic \nfermentation of the juice of sound, ripe apples or pears that contains not less than one-half of one percent of alcohol by volume and not more than eight and one-half percent of alcohol by volume; \nD. "department" means the taxation and revenue department, the secretary of \ntaxation and revenue or any employee of the department exercising authority lawfully delegated to that employee by the secretary; \nE. "fortified wine" means wine containin g more than fourteen percent alcohol by \nvolume when bottled or packaged by the manufacturer, but "fortified wine" does not include: \n(1) wine that is sealed or capped by cork closure and aged two years or more; \n(2) wine that contains more than four teen percent alcohol by volume solely as'

In [11]:
# Download embeddings from OpenAI
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [12]:
docsearch = FAISS.from_texts(texts, embeddings)

In [13]:
docsearch

<langchain_community.vectorstores.faiss.FAISS at 0x24c2b599d10>

In [14]:
from langchain.chains.question_answering import load_qa_chain
from langchain_openai import OpenAI

In [15]:
chain = load_qa_chain(OpenAI(), chain_type="stuff")

In [16]:
query = "what is this article about?"
docs = docsearch.similarity_search(query)
chain.run(input_documents=docs, question=query)

  warn_deprecated(


' This article discusses the potential implications and violations of the dormant Commerce Clause in regards to alcohol tax rates and regulations in the state of New Mexico. It also mentions various amendments that have been made to these regulations over the years.'

In [17]:
query = "what does alcoholic beverage mean according to article 17?"
docs = docsearch.similarity_search(query)
chain.run(input_documents=docs, question=query)

' Alcoholic beverage means distilled or rectified spirits, potable alcohol, brandy, whiskey, rum, gin, aromatic bitters or any similar beverage, including blended or fermented beverages, dilutions or mixtures of one or more of the foregoing containing more than one-half of one percent alcohol by volume, but does not include medicinal bitters.'

In [18]:
query = "what are the ammendments in article 17?"
docs = docsearch.similarity_search(query)
chain.run(input_documents=docs, question=query)

' The 2000, 1997, 1996, 1995, 1994, 1993, 1991, 1983, and 1984 amendments.'

In [19]:
query = "Comapre and contrast amendments 2000 and 2008?"
docs = docsearch.similarity_search(query)
chain.run(input_documents=docs, question=query)

' The 2000 amendment increased the minimum production of wine from five hundred sixty thousand liters to nine hundred fifty thousand liters and changed the excise tax on beer produced by a microbrewer. The 2008 amendment also increased the minimum production of wine, but to a smaller amount of barrels, and added additional tax rates for different production levels of wine. Both amendments also made changes to the language and structure of the section.'

In [1]:
#!/usr/bin/env python
# coding: utf-8

# Required Libraries
import os
from dotenv import find_dotenv, load_dotenv
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI

# Load environment variables for API keys
load_dotenv(find_dotenv())

# Single Function to Load, Split, Embed, and Query a PDF
def analyze_pdf(pdf_path, questions):
    """
    Analyze a PDF file and return answers to a list of questions.
    
    :param pdf_path: Path to the PDF file
    :param questions: List of questions to ask about the PDF
    :return: Dictionary of question-answer pairs
    """
    # Read the PDF file
    reader = PdfReader(pdf_path)
    raw_text = ''
    for page in reader.pages:
        text = page.extract_text()
        if text:
            raw_text += text

    # Split the text into chunks
    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len,
    )
    texts = text_splitter.split_text(raw_text)

    # Create embeddings and a vector store
    embeddings = OpenAIEmbeddings()
    docsearch = FAISS.from_texts(texts, embeddings)

    # Load the QA chain with OpenAI LLM
    chain = load_qa_chain(OpenAI(), chain_type="stuff")

    # Create a dictionary to store answers
    answers = {}

    # For each question, perform a similarity search and run the QA chain
    for question in questions:
        docs = docsearch.similarity_search(question)
        answer = chain.run(input_documents=docs, question=question)
        answers[question] = answer

    return answers

# Example usage
if __name__ == "__main__":
    pdf_path = "Article 17.pdf"
    questions = [
        "What is this article about?",
        "What does alcoholic beverage mean according to article 17?",
        "What are the amendments in article 17?",
        "Compare and contrast amendments 2000 and 2008?",
    ]
    
    results = analyze_pdf(pdf_path, questions)
    
    # Print out the answers
    for question, answer in results.items():
        print(f"Question: {question}\nAnswer: {answer}\n")

  warn_deprecated(
  warn_deprecated(
  warn_deprecated(


Question: What is this article about?
Answer:  This article discusses the dormant Commerce Clause and how it applies to proposed legislation that seeks to raise the basic alcohol tax rates imposed by NMSA 1978, § 7-17-5. It also mentions the different amendments made to this section over the years and how they relate to small winers, winegrowers, and craft distillers both in-state and out-of-state.

Question: What does alcoholic beverage mean according to article 17?
Answer:  According to article 17, "alcoholic beverages" means distilled or rectified spirits, potable alcohol, brandy, whiskey, rum, gin, aromatic bitters or any similar beverage, including blended or fermented beverages, dilutions or mixtures of one or more of the foregoing containing more than one-half of one percent alcohol by volume, but does not include medicinal bitters.

Question: What are the amendments in article 17?
Answer:  The amendments in article 17 include changes to the imposition and rate of the liquor exc