In [1]:
import os
from dotenv import load_dotenv
from langchain_groq import ChatGroq 
load_dotenv()
api_key = os.environ.get("GROQ_API_KEY")
cohere_api_key = os.environ.get("COHERE_API_KEY")
serpapi_api_key = os.environ.get("SERP_API_KEY")
llm = ChatGroq(temperature=0.8, model="llama-3.3-70b-specdec", api_key=api_key)

In [6]:
pdf_file_path = "2304.00501v6.pdf" 
PAPERS_DIR = "Papers"

In [9]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain.vectorstores import Chroma
from langchain_community.document_loaders import DirectoryLoader
from datetime import datetime
from langchain.prompts import ChatPromptTemplate
loader = DirectoryLoader(PAPERS_DIR, glob="**/*.pdf")
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=500)
chunks = text_splitter.split_documents(documents)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = Chroma.from_documents(documents=chunks, embedding=embeddings,persist_directory="chromdb")
retriever = db.as_retriever(k=min(len(db), 10))
def generate_rfq_with_details(llm,query):
    try:
        prompt = ChatPromptTemplate.from_template("""
        Answer the following question based only on the provided context.
        Think step by step before providing a detailed answer.
        <context>
        {context}
        <context>
        Question: {input}
        """)
        chain = create_stuff_documents_chain(llm=llm,prompt=prompt)
        retriver_chain = create_retrieval_chain(retriever,chain)
        answer = retriver_chain.invoke({"input" : query})
        return answer["answer"]
    except Exception as e:
        return f"An error occurred: {str(e)}"



In [8]:
generate_rfq_with_details(llm,"What is Attention ?")

'Based on the provided context, Attention refers to a mechanism used in the Transformer model, specifically in the encoder-decoder attention layers and self-attention layers. It allows the model to focus on different parts of the input sequence when generating output, by attending to specific positions in the input sequence.\n\nIn the context, Attention is described as a way for the model to:\n\n1. Follow long-distance dependencies in the input sequence (Figure 3)\n2. Resolve anaphora (Figure 4)\n3. Attend to different positions in the input sequence (Section 3.2.3)\n\nThe Attention mechanism is implemented using multi-head attention, where different attention heads learn to perform different tasks, such as attending to specific words or phrases in the input sequence.\n\nIn general, Attention can be understood as a way for the model to selectively focus on certain parts of the input sequence, rather than considering the entire sequence equally, in order to generate more accurate and re