### RAG 1: Local File System Loader with ChromaDB

In [33]:
import os
import openai
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Now you can access your variables
openai_api_key = os.getenv("OPENAI_API_KEY")
pinecone_api_key = os.getenv("PINECONE_API_KEY")
pinecone_env = os.getenv("PINECONE_ENV")

aws_access_key = os.getenv("AWS_ACCESS_KEY_ID")
aws_secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")
aws_region = os.getenv("AWS_DEFAULT_REGION")


##### Load Documents from the Local File System

In [36]:
from langchain.document_loaders import DirectoryLoader, PyPDFLoader


# Create a loader that reads .txt files from the current directory
loader = DirectoryLoader('./', glob="*.pdf", loader_cls=PyPDFLoader)
documents = loader.load()

print(f"Loaded {len(documents)} documents from local file system.")


Loaded 17 documents from local file system.


#### Create a Vector Store Using ChromaDB

In [37]:
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma


# Create an embeddings object (using OpenAI embeddings)
embeddings = OpenAIEmbeddings()

# Initialize the Chroma vector store with the loaded documents
vector_store = Chroma.from_documents(documents, embeddings, collection_name="local_docs")

print("Documents indexed in ChromaDB.")


Documents indexed in ChromaDB.


#### Build the RAG with LangChain’s Retrieval QA Chain

In [38]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain.prompts import PromptTemplate
from langchain_openai.chat_models import ChatOpenAI


template = """ Answer the question only based on the following context.
If you can't find answer in context, use your own knowlege
{context}

Question: {question}
"""

prompt = PromptTemplate(template=template)
model = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)


# Create the RetrievalQA chain using the vector store’s retriever
retriever = vector_store.as_retriever(search_kwargs={"k": 3})


In [39]:
combine_docs_chain = create_stuff_documents_chain(llm=model, prompt=prompt)

retrieval_chain = create_retrieval_chain(retriever, combine_docs_chain)

In [40]:
retrieval_chain.invoke({"input":"Which Country is Trump Invading?","question":"Which Country is Trump Invading??" })

{'input': 'Which Country is Trump Invading?',
 'question': 'Which Country is Trump Invading??',
 'context': [Document(metadata={'creationdate': '2025-02-09T07:49:21+00:00', 'creator': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36', 'moddate': '2025-02-09T07:49:21+00:00', 'page': 1, 'page_label': '2', 'producer': 'Skia/PDF m132', 'source': 'data.pdf', 'title': 'Trump’s invasion of Greenland would be ‘the shortest war in the world’ – POLITICO', 'total_pages': 17}, page_content='This week, incoming U.S. President Donald Trump sent shockwaves across Europe\nwhen he refused to rule out using military force to annex the world’s largest island, an\nautonomous territory of 57,000 people that is part of the Kingdom of Denmark.\xa0\nTrump, who also ﬂoated the idea of the U.S. taking over Canada and the Panama Canal,\nhas long had his eye on Greenland — a strategically located island rich in minerals and\noil.\nAdvertisement\nAffo