In [59]:
import os
from dotenv import load_dotenv
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_groq import ChatGroq
from langchain.chains.combine_documents import create_stuff_documents_chain 
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.vectorstores import Pinecone
import pinecone
from langchain_pinecone import PineconeVectorStore
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFDirectoryLoader

# Load environment variables from .env file
load_dotenv('config.env')

# Retrieve API keys from environment variables
groq_api_key = os.getenv('groq_api_key')
os.environ['GEMINI_API_KEY'] = os.getenv('GEMINI_API_KEY')
google_api_key = os.getenv('GEMINI_API_KEY')

# Initialize the ChatGroq model
llm = ChatGroq(groq_api_key=groq_api_key, model_name='gemma-7b-it')

# Create a prompt template for the chatbot
prompt = ChatPromptTemplate.from_template(""" 
Answer the question based on the provided context only.
Please provide the most accurate response based on the question.
<context>
{context}
</context>
Questions: {input}
""")

def vector_embedding():
    # Load documents and create vector embeddings if not already done
    embeddings = GoogleGenerativeAIEmbeddings(google_api_key=google_api_key, model="models/text-embedding-004")
    loader = PyPDFDirectoryLoader("./attention")
    docs = loader.load()
    index_name = 'testproject2'
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    final_documents = text_splitter.split_documents(docs)
    
    vectors = PineconeVectorStore.from_documents(
        documents=final_documents,
        embedding=embeddings,
        index_name=index_name
    )
    
    return vectors

# Example usage of the vector embedding function and querying the model
if __name__ == "__main__":
    vectors = vector_embedding()
    
    # Sample question to ask from the documents
    prompt1 = "What is multihead attention"
    
    # Create a document chain and retrieval chain for answering questions
    document_chain = create_stuff_documents_chain(llm, prompt)
    retriever = vectors.as_retriever()
    retrieval_chain = create_retrieval_chain(retriever, document_chain)
    
    # Invoke the retrieval chain with a sample input
    response = retrieval_chain.invoke({'input': prompt1})
    
    # Print the answer and document context
    print(response)
    print("\nContext:",response["context"])
    print("\n Main Answer:", response['answer'])

    print("\nDocument Similarity Search:")
    for i,doc in enumerate(response["context"]):
        print(doc.page_content)
        print("--------------------------------------------")
    
      
    

{'input': 'What is multihead attention', 'context': [Document(id='303e09f2-e5c2-4160-a8ad-1407c36a87ed', metadata={'page': 4.0, 'source': 'attention\\attention.pdf'}, page_content='MultiHead( Q,K,V ) = Concat(head 1,...,head h)WO\nwhere head i= Attention( QWQ\ni,KWK\ni,VWV\ni)\nWhere the projections are parameter matrices WQ\ni∈Rdmodel×dk,WK\ni∈Rdmodel×dk,WV\ni∈Rdmodel×dv\nandWO∈Rhdv×dmodel.\nIn this work we employ h= 8 parallel attention layers, or heads. For each of these we use\ndk=dv=dmodel/h= 64 . Due to the reduced dimension of each head, the total computational cost\nis similar to that of single-head attention with full dimensionality.\n3.2.3 Applications of Attention in our Model\nThe Transformer uses multi-head attention in three different ways:\n•In "encoder-decoder attention" layers, the queries come from the previous decoder layer,\nand the memory keys and values come from the output of the encoder. This allows every\nposition in the decoder to attend over all positions in 

In [93]:
import os
from dotenv import load_dotenv
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_groq import ChatGroq
from langchain.chains.combine_documents import create_stuff_documents_chain 
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.vectorstores import Pinecone
import pinecone
from langchain_pinecone import PineconeVectorStore
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFDirectoryLoader

# Load environment variables from .env file
load_dotenv('config.env')

# Retrieve API keys from environment variables
groq_api_key = os.getenv('groq_api_key')
os.environ['GEMINI_API_KEY'] = os.getenv('GEMINI_API_KEY')
google_api_key = os.getenv('GEMINI_API_KEY')

# Initialize the ChatGroq model
llm = ChatGroq(groq_api_key=groq_api_key, model_name='gemma-7b-it')

# Create a prompt template for answering questions based on context
document_prompt = ChatPromptTemplate.from_template(""" 
Answer the question based on the provided context only.
Please provide the most accurate response based on the question.
<context>
{context}
</context>
Questions: {input}
""")

# Create a prompt template for assigning topics to text chunks
topic_prompt = ChatPromptTemplate.from_template(""" 
Assign a topic to the following text chunk:
<chunk>
{chunk}
</chunk>
""")

def vector_embedding():
    # Load documents and create vector embeddings if not already done
    embeddings = GoogleGenerativeAIEmbeddings(google_api_key=google_api_key, model="models/text-embedding-004")
    loader = PyPDFDirectoryLoader("./attention")
    docs = loader.load()
    index_name = 'testproject2'
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    
    # Split documents into chunks
    final_documents = text_splitter.split_documents(docs)
    
    # Initialize Pinecone Vector Store and add documents
    vectors = PineconeVectorStore.from_documents(
        documents=final_documents,
        embedding=embeddings,
        index_name=index_name
    )
    
    return vectors, final_documents  # Return both vectors and original documents

def assign_topics_to_chunks(chunks):
    topics = []
    for chunk in chunks:
        # Format the input correctly for LLM invocation
        formatted_input = topic_prompt.format(chunk=chunk)
        
        # Use the LLM to assign a topic to each chunk using formatted input string
        response = llm.invoke(formatted_input)
        
        # Access the content of the AIMessage object directly
        topics.append(response.content)  # Change here to access content attribute directly
        
    return topics

# Example usage of the vector embedding function and querying the model
if __name__ == "__main__":
    vectors, final_documents = vector_embedding()  # Get both vectors and original documents
    
    # Sample question to ask from the documents
    prompt1 = "What is multihead attention"
    
    # Create a document chain and retrieval chain for answering questions
    document_chain = create_stuff_documents_chain(llm, document_prompt)
    retriever = vectors.as_retriever()
    retrieval_chain = create_retrieval_chain(retriever, document_chain)
    
    # Invoke the retrieval chain with a sample input
    response = retrieval_chain.invoke({'input': prompt1})
    
    # Print the answer and document context
    print(response['answer'])

    # Assign topics to each chunk of documents using their page content
    chunks = [doc.page_content for doc in final_documents]  # Use final_documents instead of vectors.documents
    topics = assign_topics_to_chunks(chunks)
    
    # Print assigned topics for each chunk
    for i, topic in enumerate(topics):
        print(f"Chunk {i+1}: Topic - {topic}")

Multihead attention is a technique that uses multiple parallel attention heads to capture different aspects of the input. It involves linearly projecting the queries, keys, and values multiple times with different learned linear projections, resulting in parallel attention over different representation subspaces. The outputs from each head are concatenated and projected once again to produce the final values.
Chunk 1: Topic - **Topic:** Transformer Network Architecture for Machine Translation
Chunk 2: Topic - **Topic:** Advancement in Machine Translation using Recurrent Neural Networks
Chunk 3: Topic - **Topic:** Development of Transformer Models for Language Modeling and Machine Translation
Chunk 4: Topic - **Topic:** Research improvements through tensor2tensor framework development
Chunk 5: Topic - **Topic:** Sequential Computation in Recurrent Models
Chunk 6: Topic - **Topic:** Transformer Model and Attention Mechanism in Sequence Modeling
Chunk 7: Topic - **Topic:** Attention Mecha

In [98]:
import os
from dotenv import load_dotenv
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_groq import ChatGroq
from langchain.chains.combine_documents import create_stuff_documents_chain 
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.vectorstores import Pinecone
import pinecone
from langchain_pinecone import PineconeVectorStore
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFDirectoryLoader

# Load environment variables from .env file
load_dotenv('config.env')

# Retrieve API keys from environment variables
groq_api_key = os.getenv('groq_api_key')
os.environ['GEMINI_API_KEY'] = os.getenv('GEMINI_API_KEY')
google_api_key = os.getenv('GEMINI_API_KEY')

# Initialize the ChatGroq model
llm = ChatGroq(groq_api_key=groq_api_key, model_name='gemma-7b-it')

# Create a prompt template for answering questions based on context
document_prompt = ChatPromptTemplate.from_template(""" 
Answer the question based on the provided context only.
Please provide the most accurate response based on the question.
<context>
{context}
</context>
Questions: {input}
""")

# Create a prompt template for assigning topics to text chunks
topic_prompt = ChatPromptTemplate.from_template(""" 
Topic should be of 3 to 4 words only. Assign a topic to the following text chunk:
<chunk>
{chunk}
</chunk>
""")

def vector_embedding():
    # Load documents and create vector embeddings if not already done
    embeddings = GoogleGenerativeAIEmbeddings(google_api_key=google_api_key, model="models/text-embedding-004")
    loader = PyPDFDirectoryLoader("./attention")
    docs = loader.load()
    index_name = 'testproject2'
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    
    # Split documents into chunks
    final_documents = text_splitter.split_documents(docs)
    
    # Initialize Pinecone Vector Store and add documents
    vectors = PineconeVectorStore.from_documents(
        documents=final_documents,
        embedding=embeddings,
        index_name=index_name
    )
    
    return vectors, final_documents  # Return both vectors and original documents

def assign_topics_to_chunks(chunks, documents):
    topics = []
    for i, chunk in enumerate(chunks):
        # Format the input correctly for LLM invocation
        formatted_input = topic_prompt.format(chunk=chunk)
        
        # Use the LLM to assign a topic to each chunk using formatted input string
        response = llm.invoke(formatted_input)
        
        # Access the content of the AIMessage object directly
        topic = response.content
        
        # Update the metadata of the corresponding document with the assigned topic
        documents[i].metadata['topic'] = topic  # Assign topic to document metadata
        topics.append(topic)
    print("\n2nd chunk topic:-",documents[1].metadata['topic'])  
    return topics

# Example usage of the vector embedding function and querying the model
if __name__ == "__main__":
    vectors, final_documents = vector_embedding()  # Get both vectors and original documents
    
    # Sample question to ask from the documents
    prompt1 = "What is multihead attention"
    
    # Create a document chain and retrieval chain for answering questions
    document_chain = create_stuff_documents_chain(llm, document_prompt)
    retriever = vectors.as_retriever()
    retrieval_chain = create_retrieval_chain(retriever, document_chain)
    
    # Invoke the retrieval chain with a sample input
    response = retrieval_chain.invoke({'input': prompt1})
    
    # Print the answer and document context
    print(response['answer'])

    # Assign topics to each chunk of documents using their page content
    chunks = [doc.page_content for doc in final_documents]  # Use final_documents instead of vectors.documents
    topics = assign_topics_to_chunks(chunks, final_documents)  # Pass final_documents for metadata update
    
    # Print assigned topics for each chunk
    for i, topic in enumerate(topics):
        print(f"Chunk {i+1}: Topic - {topic}")

Multihead attention is a technique used in the Transformer model that involves combining the outputs of multiple attention heads to enhance the representation of the input. Each attention head focuses on a different aspect of the input, and the combined outputs provide a richer representation.

2nd chunk topic:- **Model Performance**

The text chunk discusses the performance of a new machine translation model in comparison to existing models.
Chunk 1: Topic - **Attention-based models**
Chunk 2: Topic - **Model Performance**

The text chunk discusses the performance of a new machine translation model in comparison to existing models.
Chunk 3: Topic - **Model Development**

The text chunk discusses the development of various models related to language processing.
Chunk 4: Topic - **Tensor2tensor development**
Chunk 5: Topic - **Sequence Modeling**

The topic focuses on the challenges and techniques related to modeling sequential data.
Chunk 6: Topic - **Topic:** Attention-based models
Ch

In [103]:
import os
import uuid  # Import uuid for generating unique IDs
from dotenv import load_dotenv
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_groq import ChatGroq
from langchain.chains.combine_documents import create_stuff_documents_chain 
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.vectorstores import Pinecone
import pinecone
from langchain_pinecone import PineconeVectorStore
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFDirectoryLoader

# Load environment variables from .env file
load_dotenv('config.env')

# Retrieve API keys from environment variables
groq_api_key = os.getenv('groq_api_key')
os.environ['GEMINI_API_KEY'] = os.getenv('GEMINI_API_KEY')
google_api_key = os.getenv('GEMINI_API_KEY')

# Initialize the ChatGroq model
llm = ChatGroq(groq_api_key=groq_api_key, model_name='gemma-7b-it')

# Create a prompt template for answering questions based on context
document_prompt = ChatPromptTemplate.from_template(""" 
Answer the question based on the provided context only.
Please provide the most accurate response based on the question.
<context>
{context}
</context>
Questions: {input}
""")

# Create a prompt template for assigning topics to text chunks
topic_prompt = ChatPromptTemplate.from_template(""" 
Topic should be of 3 to 4 words only. Topic name should contain only the name. Not '**Topic**' or something similar to this. Assign a topic to the following text chunk:
<chunk>
{chunk}
</chunk>
""")

def vector_embedding():
    # Load documents and create vector embeddings if not already done
    embeddings = GoogleGenerativeAIEmbeddings(google_api_key=google_api_key, model="models/text-embedding-004")
    loader = PyPDFDirectoryLoader("./attention")
    docs = loader.load()
    index_name = 'testproject2'
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    
    # Split documents into chunks
    final_documents = text_splitter.split_documents(docs)
    
    # Generate unique IDs for each document if they don't have one
    for i, doc in enumerate(final_documents):
        if 'id' not in doc.metadata:
            doc.metadata['id'] = str(uuid.uuid4())  # Generate a unique ID

    # Initialize Pinecone Vector Store and add documents without initial metadata
    vectors = PineconeVectorStore.from_documents(
        documents=final_documents,
        embedding=embeddings,
        index_name=index_name,
    )
    
    return vectors, final_documents  # Return both vectors and original documents

def upsert_metadata(vectors, documents):
    """Upsert metadata into Pinecone."""
    ids = [doc.metadata['id'] for doc in documents]
    metadatas = [{"topic": doc.metadata.get('topic', '')} for doc in documents]
    
    # Upsert metadata into Pinecone
    vectors.add_texts(
        texts=[doc.page_content for doc in documents],
        metadatas=metadatas,
        ids=ids,
    )

def assign_topics_to_chunks(chunks, documents):
    topics = []
    for i, chunk in enumerate(chunks):
        # Format the input correctly for LLM invocation
        formatted_input = topic_prompt.format(chunk=chunk)
        
        # Use the LLM to assign a topic to each chunk using formatted input string
        response = llm.invoke(formatted_input)
        
        # Access the content of the AIMessage object directly
        topic = response.content.strip()  # Ensure no leading/trailing whitespace
        
        # Update the metadata of the corresponding document with the assigned topic
        documents[i].metadata['topic'] = topic  # Assign topic to document metadata
        
        topics.append(topic)
    
    print("\n2nd chunk topic:-",documents[1].metadata['topic'])  
    return topics

# Example usage of the vector embedding function and querying the model
if __name__ == "__main__":
    vectors, final_documents = vector_embedding()  # Get both vectors and original documents
    
    # Sample question to ask from the documents
    prompt1 = "What is multihead attention"
    
    # Create a document chain and retrieval chain for answering questions
    document_chain = create_stuff_documents_chain(llm, document_prompt)
    retriever = vectors.as_retriever()
    retrieval_chain = create_retrieval_chain(retriever, document_chain)
    
    # Invoke the retrieval chain with a sample input
    response = retrieval_chain.invoke({'input': prompt1})
    
    # Print the answer and document context
    print(response['answer'])

    # Assign topics to each chunk of documents using their page content
    chunks = [doc.page_content for doc in final_documents]  # Use final_documents instead of vectors.documents
    topics = assign_topics_to_chunks(chunks, final_documents)  # Pass final_documents for metadata update
    
    # Upsert assigned topics into Pinecone metadata
    upsert_metadata(vectors, final_documents)

    # Print assigned topics for each chunk
    for i, topic in enumerate(topics):
        print(f"Chunk {i+1}: Topic - {topic}")

The provided text does not contain any information regarding "multihead attention", so I am unable to provide an answer to the question from the given context.

2nd chunk topic:- RNN
Chunk 1: Topic - Transformer
Chunk 2: Topic - RNN
Chunk 3: Topic - **Collaborators**
Chunk 4: Topic - Tensor2tensor
Chunk 5: Topic - Sequential computation
Chunk 6: Topic - Transformer
Chunk 7: Topic - Attention
Chunk 8: Topic - Transformer
Chunk 9: Topic - Transformer
Chunk 10: Topic - Layers
Chunk 11: Topic - **Architecture**
Chunk 12: Topic - Attention
Chunk 13: Topic - Attention
Chunk 14: Topic - Attention
Chunk 15: Topic - Attention
Chunk 16: Topic - Attention
Chunk 17: Topic - Encoder-decoder
Chunk 18: Topic - Feed-forward networks
Chunk 19: Topic - Positional Encodings
Chunk 20: Topic - Positional Encodings
Chunk 21: Topic - Self-Attention
Chunk 22: Topic - Dependency length
Chunk 23: Topic - Path length
Chunk 24: Topic - Neighborhood
Chunk 25: Topic - Convolution
Chunk 26: Topic - Training
Chunk 27

In [106]:
import os
import uuid  # Import uuid for generating unique IDs
from dotenv import load_dotenv
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_groq import ChatGroq
from langchain.chains.combine_documents import create_stuff_documents_chain 
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.vectorstores import Pinecone
import pinecone
from langchain_pinecone import PineconeVectorStore
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFDirectoryLoader

# Load environment variables from .env file
load_dotenv('config.env')

# Retrieve API keys from environment variables
groq_api_key = os.getenv('groq_api_key')
os.environ['GEMINI_API_KEY'] = os.getenv('GEMINI_API_KEY')
google_api_key = os.getenv('GEMINI_API_KEY')

# Initialize the ChatGroq model
llm = ChatGroq(groq_api_key=groq_api_key, model_name='gemma-7b-it')

# Create a prompt template for answering questions based on context
document_prompt = ChatPromptTemplate.from_template(""" 
Answer the question based on the provided context only.
Please provide the most accurate response based on the question.
<context>
{context}
</context>
Questions: {input}
""")

# Create a prompt template for assigning topics to text chunks
topic_prompt = ChatPromptTemplate.from_template(""" 
Topic should be of 3 to 4 words only. Topic name should contain only the name. Not '**Topic**' or something similar to this. Assign a topic to the following text chunk:
<chunk>
{chunk}
</chunk>
""")

def vector_embedding():
    # Load documents and create vector embeddings if not already done
    embeddings = GoogleGenerativeAIEmbeddings(google_api_key=google_api_key, model="models/text-embedding-004")
    loader = PyPDFDirectoryLoader("./attention")
    docs = loader.load()
    index_name = 'testproject2'
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    
    # Split documents into chunks
    final_documents = text_splitter.split_documents(docs)
    
    # Generate unique IDs for each document if they don't have one
    for i, doc in enumerate(final_documents):
        if 'id' not in doc.metadata:
            doc.metadata['id'] = str(uuid.uuid4())  # Generate a unique ID

    # Initialize Pinecone Vector Store and add documents without initial metadata
    vectors = PineconeVectorStore.from_documents(
        documents=final_documents,
        embedding=embeddings,
        index_name=index_name,
    )
    
    return vectors, final_documents  # Return both vectors and original documents

def upsert_metadata(vectors, documents):
    """Upsert metadata into Pinecone."""
    ids = [doc.metadata['id'] for doc in documents]
    metadatas = [{"topic": doc.metadata.get('topic', '')} for doc in documents]
    
    # Upsert metadata into Pinecone
    vectors.add_texts(
        texts=[doc.page_content for doc in documents],
        metadatas=metadatas,
        ids=ids,
    )

def assign_topics_to_chunks(chunks, documents):
    topics = []
    for i, chunk in enumerate(chunks):
        # Format the input correctly for LLM invocation
        formatted_input = topic_prompt.format(chunk=chunk)
        
        # Use the LLM to assign a topic to each chunk using formatted input string
        response = llm.invoke(formatted_input)
        
        # Access the content of the AIMessage object directly
        topic = response.content.strip()  # Ensure no leading/trailing whitespace
        
        # Update the metadata of the corresponding document with the assigned topic
        documents[i].metadata['topic'] = topic  # Assign topic to document metadata
        
        topics.append(topic)
    
    print("\n2nd chunk topic:-",documents[1].metadata['topic'])  
    return topics

def assign_main_topic(topics):
    """Classify a broader main topic based on assigned topics."""
    formatted_input = "Main topic should give only the name, not '**Main Topic**',or something similar to that. Classify the main topic based on the following topics: " + ", ".join(topics)
    response = llm.invoke(formatted_input)
    main_topic = response.content.strip()  # Ensure no leading/trailing whitespace
    return main_topic

def upsert_main_topic(vectors, documents, main_topic):
    """Upsert the main topic into each document's metadata."""
    for doc in documents:
        doc.metadata['main_topic'] = main_topic  # Assign main topic to document metadata
    
    # Upsert updated documents back to Pinecone
    ids = [doc.metadata['id'] for doc in documents]
    metadatas = [{"topic": doc.metadata.get('topic', ''), "main_topic": doc.metadata.get('main_topic', '')} for doc in documents]
    
    vectors.add_texts(
        texts=[doc.page_content for doc in documents],
        metadatas=metadatas,
        ids=ids,
    )

# Example usage of the vector embedding function and querying the model
if __name__ == "__main__":
    vectors, final_documents = vector_embedding()  # Get both vectors and original documents
    
    # Sample question to ask from the documents
    prompt1 = "What is multihead attention"
    
    # Create a document chain and retrieval chain for answering questions
    document_chain = create_stuff_documents_chain(llm, document_prompt)
    retriever = vectors.as_retriever()
    retrieval_chain = create_retrieval_chain(retriever, document_chain)
    
    # Invoke the retrieval chain with a sample input
    response = retrieval_chain.invoke({'input': prompt1})
    
    # Print the answer and document context
    print(response['answer'])

    # Assign topics to each chunk of documents using their page content
    chunks = [doc.page_content for doc in final_documents]
    topics = assign_topics_to_chunks(chunks, final_documents)  # Pass final_documents for metadata update
    
    # Classify a broader 'main topic' based on assigned topics
    main_topic = assign_main_topic(topics)

    # Upsert assigned topics and main topic into Pinecone metadata
    upsert_metadata(vectors, final_documents)
    upsert_main_topic(vectors, final_documents, main_topic)

    # Print assigned topics for each chunk along with the main topic
    for i, topic in enumerate(topics):
        print(f"Chunk {i+1}: Topic - {topic}")
    
    print(f"Main Topic: {main_topic}")

Multihead attention is a technique used in machine learning and deep learning models to improve attention between sequences of data points. It involves using multiple parallel attention heads to capture different aspects of the input. In the context of the provided text, it is used in the Transformer model to enhance the ability of the model to capture long-range dependencies in sequence-to-sequence tasks.

2nd chunk topic:- RNN
Chunk 1: Topic - Transformer
Chunk 2: Topic - RNN
Chunk 3: Topic - **People**
Chunk 4: Topic - Tensor2tensor
Chunk 5: Topic - Sequential computation
Chunk 6: Topic - Transformer
Chunk 7: Topic - Attention
Chunk 8: Topic - Transformer
Chunk 9: Topic - Architecture
Chunk 10: Topic - Layers
Chunk 11: Topic - Architecture
Chunk 12: Topic - Attention
Chunk 13: Topic - Attention
Chunk 14: Topic - Attention
Chunk 15: Topic - Attention
Chunk 16: Topic - MultiHeadAttention
Chunk 17: Topic - Encoder-Decoder
Chunk 18: Topic - Feed-Forward Network
Chunk 19: Topic - Positio

In [77]:
import os
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer, util
from langchain.schema import Document
from langchain.vectorstores import Pinecone
from langchain_pinecone import PineconeVectorStore
from langchain_groq import ChatGroq
from langchain.chains.combine_documents import create_stuff_documents_chain 
from langchain.chains import create_retrieval_chain
from langchain.prompts import ChatPromptTemplate

# Load environment variables if needed
load_dotenv('config.env')

# Initialize the sentence transformer model for topic identification
model = SentenceTransformer('all-MiniLM-L6-v2')

# Pre-defined topics (for embedding comparison)
predefined_topics = [
    "Transformer Architecture",
    "Attention Mechanisms",
    "Neural Networks",
    "Machine Translation",
    "Deep Learning",
    "Natural Language Processing"
]

def extract_text_from_pdf(pdf_path):
    # Extract text from each page of the PDF
    reader = PdfReader(pdf_path)
    text_by_page = [page.extract_text() for page in reader.pages]
    return text_by_page

def identify_main_topics(text_by_page):
    main_topics = []
    
    # Encode predefined topics for comparison
    predefined_embeddings = model.encode(predefined_topics, convert_to_tensor=True)
    
    for i, text in enumerate(text_by_page):
        if text:  # Check if text extraction was successful
            # Encode the page text
            text_embedding = model.encode(text, convert_to_tensor=True)
            
            # Compute cosine similarities with predefined topics
            cosine_scores = util.pytorch_cos_sim(text_embedding, predefined_embeddings)[0]
            main_topic_index = cosine_scores.argmax().item()  # Get index of highest similarity
            
            main_topics.append((i + 1, predefined_topics[main_topic_index]))  # Store page number and identified topic
        else:
            main_topics.append((i + 1, "No text found"))
    
    return main_topics

def create_documents_with_metadata(text_by_page, topics):
    documents = []
    for i, text in enumerate(text_by_page):
        doc_metadata = {
            'page': i + 1,
            'main_topic': topics[i][1],
        }
        documents.append(Document(page_content=text, metadata=doc_metadata))
    return documents

def vector_embedding(documents):
    # Assuming embeddings and Pinecone setup is done here
    embeddings = GoogleGenerativeAIEmbeddings(google_api_key=os.getenv('GEMINI_API_KEY'), model="models/text-embedding-004")
    
    index_name = 'testproject4'
    
    vectors = PineconeVectorStore.from_documents(
        documents=documents,
        embedding=embeddings,
        index_name=index_name
    )
    
    return vectors

# Initialize ChatGroq model for querying
groq_api_key = os.getenv('groq_api_key')
llm = ChatGroq(groq_api_key=groq_api_key, model_name='gemma-7b-it')

# Create a prompt template for answering questions based on context
prompt_template = ChatPromptTemplate.from_template(""" 
Answer the question based on the provided context only.
Please provide the most accurate response based on the question.                                                  
<context>
{context}
</context>
Questions: {input}
""")

# Main execution
if __name__ == "__main__":
    pdf_path = 'C:/python prac/langchain/attention.pdf'  
    text_by_page = extract_text_from_pdf(pdf_path)
    
    # Identify main topics for each page using pre-trained models
    topics = identify_main_topics(text_by_page)
    documents_with_metadata = create_documents_with_metadata(text_by_page, topics)
    
    # Create vector embeddings and store them in Pinecone
    vectors = vector_embedding(documents_with_metadata)

    print("Documents processed and stored with metadata.")

    # Sample question to ask from the documents
    prompt1 = "What is multihead attention?"
    
    # Create a document chain and retrieval chain for answering questions
    document_chain = create_stuff_documents_chain(llm, prompt_template)
    retriever = vectors.as_retriever()
    retrieval_chain = create_retrieval_chain(retriever, document_chain)
    
    # Invoke the retrieval chain with a sample input
    response = retrieval_chain.invoke({'input': prompt1})
    
    # Print the answer and document context
    print(response)
    print("\nContext:", response["context"])
    print("\nMain Answer:", response['answer'])

Documents processed and stored with metadata.
{'input': 'What is multihead attention?', 'context': [Document(id='f8ae13e9-f5af-4690-8d1c-277aa0fb3dc6', metadata={'main_topic': 'Attention Mechanisms', 'page': 4.0}, page_content='Scaled Dot-Product Attention\n Multi-Head Attention\nFigure 2: (left) Scaled Dot-Product Attention. (right) Multi-Head Attention consists of several\nattention layers running in parallel.\nquery with all keys, divide each bypdk, and apply a softmax function to obtain the weights on the\nvalues.\nIn practice, we compute the attention function on a set of queries simultaneously, packed together\ninto a matrix Q. The keys and values are also packed together into matrices KandV. We compute\nthe matrix of outputs as:\nAttention(Q;K;V ) = softmax(QKT\npdk)V (1)\nThe two most commonly used attention functions are additive attention [ 2], and dot-product (multi-\nplicative) attention. Dot-product attention is identical to our algorithm, except for the scaling factor\nof

In [78]:
print("\nDocument Similarity Search:")
for i, doc in enumerate(response["context"]):
    print(doc.page_content)
    print("--------------------------------------------")


Document Similarity Search:
Scaled Dot-Product Attention
 Multi-Head Attention
Figure 2: (left) Scaled Dot-Product Attention. (right) Multi-Head Attention consists of several
attention layers running in parallel.
query with all keys, divide each bypdk, and apply a softmax function to obtain the weights on the
values.
In practice, we compute the attention function on a set of queries simultaneously, packed together
into a matrix Q. The keys and values are also packed together into matrices KandV. We compute
the matrix of outputs as:
Attention(Q;K;V ) = softmax(QKT
pdk)V (1)
The two most commonly used attention functions are additive attention [ 2], and dot-product (multi-
plicative) attention. Dot-product attention is identical to our algorithm, except for the scaling factor
of1pdk. Additive attention computes the compatibility function using a feed-forward network with
a single hidden layer. While the two are similar in theoretical complexity, dot-product attention is
much faster and 

In [85]:
import os
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from langchain_experimental.text_splitter import SemanticChunker
from langchain.schema import Document
from langchain.vectorstores import Pinecone
from langchain_pinecone import PineconeVectorStore
from langchain_groq import ChatGroq
from langchain.chains.combine_documents import create_stuff_documents_chain 
from langchain.chains import create_retrieval_chain
from langchain.prompts import ChatPromptTemplate
from langchain_google_genai import GoogleGenerativeAIEmbeddings  # Ensure this is imported

# Load environment variables if needed
load_dotenv('config.env')

def extract_text_from_pdf(pdf_path):
    # Extract text from each page of the PDF and concatenate into a single string
    reader = PdfReader(pdf_path)
    full_text = ""
    for page in reader.pages:
        full_text += page.extract_text() + "\n"  # Add newline for separation
    return full_text

def chunk_text_semantically(text, embedding_function, max_chunk_length=512):
    # Initialize the Semantic Chunker with the provided embedding function
    chunker = SemanticChunker(embedding_function)
    
    # Wrap text in a list and create documents
    documents = chunker.create_documents([text])  # Wrap text in a list
    
    # Filter out chunks that exceed max length
    semantic_chunks = [d.page_content for d in documents if len(d.page_content) <= max_chunk_length]
    
    return semantic_chunks

def create_documents_with_metadata(chunks):
    documents = []
    for i, chunk in enumerate(chunks):
        doc_metadata = {
            'chunk': i + 1,
        }
        documents.append(Document(page_content=chunk, metadata=doc_metadata))
    return documents

def vector_embedding(documents):
    # Assuming embeddings and Pinecone setup is done here
    embeddings = GoogleGenerativeAIEmbeddings(google_api_key=os.getenv('GEMINI_API_KEY'), model="models/text-embedding-004")
    
    index_name = 'testproject2'
    
    vectors = PineconeVectorStore.from_documents(
        documents=documents,
        embedding=embeddings,
        index_name=index_name
    )
    
    return vectors

# Initialize ChatGroq model for querying
groq_api_key = os.getenv('groq_api_key')
llm = ChatGroq(groq_api_key=groq_api_key, model_name='gemma-7b-it')

# Create a prompt template for answering questions based on context
prompt_template = ChatPromptTemplate.from_template(""" 
Answer the question based on the provided context only.
Please provide the most accurate response based on the question.
<context>
{context}
</context>
Questions: {input}
""")

# Main execution
if __name__ == "__main__":
    pdf_path = 'C:/python prac/langchain/attention.pdf'  
    full_text = extract_text_from_pdf(pdf_path)
    
    # Initialize embedding function for semantic chunking
    embedding_function = GoogleGenerativeAIEmbeddings(google_api_key=os.getenv('GEMINI_API_KEY'), model="models/text-embedding-004")
    
    # Chunk the full text semantically using the Semantic Chunker with a max length of 512 tokens
    semantic_chunks = chunk_text_semantically(full_text, embedding_function, max_chunk_length=512)

    documents_with_metadata = create_documents_with_metadata(semantic_chunks)
    
    # Create vector embeddings and store them in Pinecone
    vectors = vector_embedding(documents_with_metadata)

    print("Documents processed and stored with metadata.")

    

Documents processed and stored with metadata.


In [83]:
 # Sample question to ask from the documents
prompt1 = "What is multihead attention?"
    
    # Create a document chain and retrieval chain for answering questions
document_chain = create_stuff_documents_chain(llm, prompt_template)
retriever = vectors.as_retriever()
retrieval_chain = create_retrieval_chain(retriever, document_chain)
    
    # Invoke the retrieval chain with a sample input
response = retrieval_chain.invoke({'input': prompt1})
    
    # Print the answer and document context
print(response['answer'])

The provided text does not contain any information regarding multihead attention, so I am unable to answer this question from the given context.


In [None]:
print("\nDocument Similarity Search:")
for i, doc in enumerate(response["context"]):
    print(doc.page_content)
    print("--------------------------------------------")