In [2]:
from langchain_community.document_loaders.csv_loader import CSVLoader
from pathlib import Path
from langchain_openai import ChatOpenAI,OpenAIEmbeddings
import os
from langchain.document_loaders import  PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from dotenv import load_dotenv

# Load environment variables from a .env file
load_dotenv()
api_key = os.getenv("NOMIC_API_KEY")
url = os.getenv("base_url")
doc_path = os.getenv("pdf_coop")

In [3]:
# path = "E:/RAG_Project/data/Understanding_Climate_Change.pdf"
loader = PyPDFLoader(doc_path)
documents = loader.load()
# documents = documents[:10]
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(documents)

In [12]:
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS
from langchain_nomic import NomicEmbeddings

embeddings =  NomicEmbeddings(model="nomic-embed-text-v1.5",)
index = faiss.IndexFlatL2(len(embeddings.embed_query(" ")))
vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={}
)

In [13]:
vector_store.add_documents(documents=docs)

['6e0ad395-1ecf-49e3-8a38-bc1ddd036047',
 '656c0b2f-78c5-4272-9467-4237fe670d09',
 '7dc56e1d-345d-4976-982e-78b8026a14a2',
 'be245b31-5b32-45ca-8136-9fff2c5a502d',
 '32aad846-baed-4782-8162-64e01c1957e0',
 'e20a2112-0d0b-4a00-912c-20392bbdda39',
 '87715179-ab13-4942-8cf3-d7bac8393de8',
 '2a692a89-7853-4b54-8974-b5280055114d',
 '1d7325e1-9bd0-4b9b-8d8f-abe360f11a9b',
 '74b0b09d-2c3f-4cc1-9e5c-a0724a338367',
 '59a66cdd-2ac5-4937-bdce-4c28ed80b6e0',
 '059c45e2-a095-47d6-a779-806829af5dfb',
 '7c5c96ed-46f1-4b0a-b2dc-10947be09b16',
 '3dfa48a0-8ed1-4266-b424-a5d43e11020b',
 '5438b23b-acb4-4780-9f3e-cd7547058a2e',
 '8074d467-99f8-46ca-b903-9725ee55b317',
 'd8d3c335-c0c8-4a57-af8a-2da3322b2e70',
 'e2666be7-cdb9-4c41-841d-312c3be084b5',
 'f28af9e1-df85-4680-bc6a-d983870270a4',
 '73727bdb-4009-4a12-9d79-29697551ffab',
 '5101b5c6-57fa-4bcd-8728-1a260f7c7373',
 '5d277b3e-6893-4f6a-9c77-6e498d516852',
 'f7c05a64-aa95-4029-a98c-691d17fb7faa',
 '87ec088b-b5c0-4072-95fc-1ee7496c3b24',
 '4ac704fc-6f68-

In [14]:
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

retriever = vector_store.as_retriever()
llm = ChatOpenAI(
                model="lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF",
                base_url=url,
                api_key="lm-studio"
            )
# Set up system prompt
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "{input}"),
    
])

# Create the question-answer chain
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [15]:
answer= rag_chain.invoke({"input": "which company does sheryl Baxter work for?"})
answer['answer']

'Sheryl Baxter works for Rasmussen Group.'

In [16]:
# Sheryl
answer= rag_chain.invoke({"input": "what is  subscription date sheryl Baxter?"})
answer['answer']

"Sheryl Baxter's subscription date is 2020-08-24. She has another entry with a different last name, Meyers, but the subscription date for Sheryl Baxter is this one."