### #### RAG system so that it can answer questions both from the uploaded PDF and from external knowledge

In [None]:

from langchain.document_loaders import PyPDFLoader, Docx2txtLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from groq import Groq  # Correct import for Groq
import requests



Exiting...


In [69]:
# Load and process the document
file_path = r"wp-oreilly-deep-learning.pdf"
if file_path.endswith(".pdf"):
    loader = PyPDFLoader(file_path)
elif file_path.endswith(".docx"):
    loader = Docx2txtLoader(file_path)
else:
    raise ValueError("File format not supported.")


In [70]:
file_path

'wp-oreilly-deep-learning.pdf'

In [71]:
file_path.endswith(".pdf")

True

In [72]:
data = loader.load()

In [73]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    add_start_index=True,
)

In [74]:
all_splits = text_splitter.split_documents(data)

In [75]:
# Create embeddings and vector store
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
vector_store = FAISS.from_documents(all_splits, embeddings)
vector_store.save_local("faiss_index")
vector_store = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)


### Initializes ConversationBufferMemory to store the conversation history.



In [76]:
# Initialize memory
memory = ConversationBufferMemory(return_messages=True)

In [77]:
print(response)

The text describes how logistic regression works. In logistic regression, a logistic function is used to determine the probability of an outcome based on input values and weights. The logistic function, also known as the sigmoid function, maps any input in the range of negative to positive infinity to an output in the range of 0.0 to 1.0, allowing the output to be interpreted as a probability.

The process involves the following steps:

1. **Defining the logistic function**: The logistic function is defined as f(x) = 1 / (1 + e^(-θx)), where θ is the weight and x is the input value.
2. **Calculating the output**: The output of the logistic function is calculated by plugging in the input values and weights into the function.
3. **Interpreting the output**: The output of the logistic function is interpreted as a probability, with values close to 1 indicating a high probability of the outcome and values close to 0 indicating a low probability.
4. **Training the model**: The model is train

In [78]:
# Initialize Groq client
groq_client = Groq(api_key="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")
# model = ChatGroq(model="llama-3.3-70b-versatile", api_key="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")

In [79]:


# Define the prompt template
template = """Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Chat History:
{history}

Human: {input}
AI:"""

prompt = PromptTemplate.from_template(template)


In [80]:
# Create the RetrievalQA chain with hybrid retrieval
def hybrid_qa_chain(query, retriever, memory):
    # Retrieve from PDF first
    pdf_results = retriever.get_relevant_documents(query)
    if pdf_results:
        context = "\n".join([doc.page_content for doc in pdf_results])
        response = groq_client.chat.completions.create(
            model= "llama-3.3-70b-versatile",
            messages=[
                {"role": "system", "content": prompt.format(context=context, question=query, history=memory.buffer, input=query)},
                {"role": "user", "content": query}
            ]
        )
        return response.choices[0].message.content
    

In [82]:
# Example conversation loop
while True:
    user_query = input("You: ")
    if user_query.lower() in ["exit", "quit"]:
        print("Exiting...")
        break

    # Generate a response using the hybrid QA chain
    response = hybrid_qa_chain(user_query, vector_store.as_retriever(), memory)
    print(f"AI: {response}")

    # Save the conversation to memory
    memory.save_context({"input": user_query}, {"output": response})

AI: It looks like you're saying hello again. Hello! Is there something I can help you with or would you like to chat? By the way, I think I recall you mentioning your name might be Sid earlier. Is that correct?
AI: Hello Sid from Merta. It's nice to meet you. Is there something I can help you with or would you like to chat? By the way, I see you've already mentioned your name and location a few times in our conversation. What brings you here today?
AI: Your name is Sid, and you are from Merta.
Exiting...
