# RAG Tariff Regulations Trial

In [1]:
from langchain_community.document_loaders import TextLoader, WebBaseLoader, PyPDFLoader
import bs4
import os

USER_AGENT environment variable not set, consider setting it to identify your requests.


### Load Data

In [2]:
loader = TextLoader("data/tarrifs_demo_text_file.txt")
text_documents = loader.load()

In [3]:
URLs = [
    "https://taxfoundation.org/blog/trump-tariffs-prices-long-term-effects/",
]

In [4]:
web_loader = WebBaseLoader(URLs,bs_kwargs=dict(parse_only=bs4.SoupStrainer(
    class_=('entry-content',)
)))

In [5]:
text_documents1 = web_loader.load()

In [6]:
text_documents1

[Document(metadata={'source': 'https://taxfoundation.org/blog/trump-tariffs-prices-long-term-effects/', 'language': 'No language found.'}, page_content='\n\nPresident Trump acknowledges his tariffs will “short term [cause] some little pain” but claims they’ll be “worth the price that must be paid.” Trump is right that his tariffs will cause “a little disturbance,” but unfortunately, he’s wrong that with time tariffs will bring wealth and jobs creation.\nHistory shows tariffs lead to lasting economic harm, such as lower production and incomes. Data from 151 countries from 1963 through 2014 shows higher tariffs reduce output and productivity, increase unemployment, and worsen inequality. Studies of US tariffs in 2018-2019 confirm they failed to boost employment and instead harmed manufacturing due to rising input costs and foreign retaliation.\nWhen the United States imposes a tariffTariffs are taxes imposed by one country on goods imported from another country. Tariffs are trade barrier

In [7]:
pdffiles = []
for files in os.listdir("data"):
    if files.endswith(".pdf"):
        pdffiles.append("data/"+files)

In [8]:
pdffiles

['data/CFR-2024-title15-vol1-part30.pdf',
 'data/FTR_with_August_2023_FR_Incorporated_PUBLIC.pdf']

In [9]:
pdf_loader = PyPDFLoader(pdffiles[0])
pdf_loader2 = PyPDFLoader(pdffiles[1])

In [10]:
text_documents3 = pdf_loader.load()
text_documents4 = pdf_loader2.load()

In [11]:
all_docs = text_documents + text_documents1 + text_documents3 + text_documents4

In [12]:
len(all_docs)

114

### Transform Data

In [13]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [14]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200) # Converting entire documents into chunks
documents = text_splitter.split_documents(all_docs)

### Vector Embeddings

In [17]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS

In [None]:
db = FAISS.from_documents(documents, OllamaEmbeddings(model="deepseek-r1"))

In [52]:
db

<langchain_community.vectorstores.faiss.FAISS at 0x221fff28410>

In [53]:
from langchain_community.llms import Ollama

In [54]:
llm = Ollama(model="deepseek-r1")

In [55]:
from langchain_core.prompts import ChatPromptTemplate

In [56]:
prompt = ChatPromptTemplate.from_template(
    """
    Answer the question based on the provided context.
    Think carefully before answering and provide a clear and concise response.
    <context>
    {context}
    <context>
    Question: {input}
    """
    )

In [57]:
from langchain.chains.combine_documents import create_stuff_documents_chain

In [58]:
document_chain = create_stuff_documents_chain(llm, prompt)

In [59]:
retriever = db.as_retriever()

In [60]:
from langchain.chains import create_retrieval_chain

In [61]:
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [63]:
db.save_local("vector_storage")

In [64]:
query = "Tell me about Foreign Trade Regulations?"

In [65]:
db.similarity_search(query)

[Document(id='286e478a-5978-4f87-b2f6-e45a6f5cc283', metadata={'producer': 'iText® Core 7.2.3 (production version) ©2000-2022 iText Group NV, Government Publishing Office', 'creator': 'govinfo, U. S. Government Publishing Office', 'creationdate': '2024-07-15T19:34:38+00:00', 'moddate': '2024-07-15T15:43:03-04:00', 'source': 'data/CFR-2024-title15-vol1-part30.pdf', 'total_pages': 47, 'page': 10, 'page_label': '11'}, page_content='emption may be filed postdeparture \nonly when the appropriate licensing \nagency has granted the USPPI author-\nization. See Subpart B of this part. \n(2) Specific data elements required \nfor EEI filing are contained in § 30.6. \n(3) The AES downtime procedures \nprovide uniform instructions for proc-\nessing export transactions when the \ngovernment’s AES or AESDirect is un-\navailable for transmission. (See \n§ 30.4(b)(1) and (4)). \n(4) Instructions for particular types \nof transactions and exemptions from \nthese requirements are found in Sub-\nparts C a

In [66]:
response = retrieval_chain.invoke({"input":query})

In [67]:
response

{'input': 'Tell me about Foreign Trade Regulations?',
 'context': [Document(id='286e478a-5978-4f87-b2f6-e45a6f5cc283', metadata={'producer': 'iText® Core 7.2.3 (production version) ©2000-2022 iText Group NV, Government Publishing Office', 'creator': 'govinfo, U. S. Government Publishing Office', 'creationdate': '2024-07-15T19:34:38+00:00', 'moddate': '2024-07-15T15:43:03-04:00', 'source': 'data/CFR-2024-title15-vol1-part30.pdf', 'total_pages': 47, 'page': 10, 'page_label': '11'}, page_content='emption may be filed postdeparture \nonly when the appropriate licensing \nagency has granted the USPPI author-\nization. See Subpart B of this part. \n(2) Specific data elements required \nfor EEI filing are contained in § 30.6. \n(3) The AES downtime procedures \nprovide uniform instructions for proc-\nessing export transactions when the \ngovernment’s AES or AESDirect is un-\navailable for transmission. (See \n§ 30.4(b)(1) and (4)). \n(4) Instructions for particular types \nof transactions and