In [1]:
#pip install faiss-cpu

In [1]:
from langchain.document_loaders import DirectoryLoader,PyMuPDFLoader
from langchain_chroma import Chroma
from langchain.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI,GoogleGenerativeAIEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS

import os

# step -1 load the documents

In [2]:
loader = DirectoryLoader(path=r"C:\Users\HP\Documents\data science projects\rag bassed projects",glob="*.pdf",loader_cls=PyMuPDFLoader)


In [3]:
docs = loader.load()

# 2. chuncks the data

In [4]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000,chunk_overlap=1000)
chunks = text_splitter.split_documents(docs)

# 3. Intitalize the model for embeddings

In [29]:
with open('key.txt','r') as key:
    key = key.read()

In [30]:
embeddings = GoogleGenerativeAIEmbeddings(model='models/text-embedding-004', google_api_key=key)

# 4 init chromadb 

In [8]:
"""db = Chroma(collection_name='git_stores',embedding_function=embeddings,persist_directory='./git_vector_stores')
db.add_documents(chunks)"""

"db = Chroma(collection_name='git_stores',embedding_function=embeddings,persist_directory='./git_vector_stores')\ndb.add_documents(chunks)"

In [9]:
docs

[Document(metadata={'source': 'C:\\Users\\HP\\Documents\\data science projects\\rag bassed projects\\1706.03762v7.pdf', 'file_path': 'C:\\Users\\HP\\Documents\\data science projects\\rag bassed projects\\1706.03762v7.pdf', 'page': 0, 'total_pages': 15, 'format': 'PDF 1.5', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'creator': 'LaTeX with hyperref', 'producer': 'pdfTeX-1.40.25', 'creationDate': 'D:20240410211143Z', 'modDate': 'D:20240410211143Z', 'trapped': ''}, page_content='Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.com\nNoam Shazeer∗\nGoogle Brain\nnoam@google.com\nNiki Parmar∗\nGoogle Research\nnikip@google.com\nJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.com\nAidan N. Gomez∗†\nUniversity of Toronto\naidan@cs.toro

In [31]:
vector_stores = FAISS.from_documents(chunks,embeddings)
vector_stores.save_local('faiss_index1')

# 5 init reteriver object

In [32]:
reteriever = vector_stores.as_retriever()


# 6 creating model object

In [33]:
model = ChatGoogleGenerativeAI(model='models/gemini-1.0-pro-latest',api_key=key)

In [34]:
model

ChatGoogleGenerativeAI(model='models/gemini-1.0-pro-latest', google_api_key=SecretStr('**********'), client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x00000199FECF9250>, default_metadata=())

# Defining prompt template

In [35]:
prompt = """
Your are helpfull ai assistant.
Your name is qa Bot.
Answer the question based on the context : {context}.
Answer the question {question}.
summarize the given context in point wise
If any thing asked other than context reply in polite """


In [36]:
prompt_template = ChatPromptTemplate.from_template(prompt)


In [37]:
from langchain.chains import LLMChain

In [38]:
#using for custom rag chain
def retriever(query):
    retrieval = vector_stores.as_retriever()
    context = retrieval.invoke(query)
    return '\n\n'.join([i.page_content for i in context])

In [39]:
from langchain.chains.question_answering import load_qa_chain
from langchain_core.runnables import RunnablePassthrough


# custom rag chain

In [40]:
parser = StrOutputParser()

In [41]:
#custom ragchain
chain = {'context':retriever,'question':RunnablePassthrough()} | prompt_template | model | parser

In [42]:
print(chain.invoke('summarize transformers architecture'))

**Transformer Architecture Summary**

- **Encoder-Decoder Structure:**
    - Encoder maps input sequence to a sequence of continuous representations.
    - Decoder generates an output sequence one symbol at a time.

- **Attention Mechanism:**
    - Multi-head attention allows different attention heads to focus on different representation subspaces.
    - Encoder-decoder attention enables decoder positions to attend to all input sequence positions.
    - Encoder and decoder self-attention allows positions to attend to all other positions within each layer.

- **Position-wise Feed-Forward Networks:**
    - Applied to each position separately and identically.
    - Consist of two linear transformations with a ReLU activation.

- **Embeddings and Softmax:**
    - Learned embeddings convert input/output tokens to vectors.
    - Linear transformation and softmax function convert decoder output to predicted next-token probabilities.

- **Other Features:**
    - Residual connections and layer 

In [43]:
#built in chain
qa_chain =load_qa_chain(llm = model,chain_type='stuff',prompt=prompt_template)

In [44]:
docs = reteriever.invoke('summarize about transformers architecture')

In [45]:
docs

[Document(metadata={'source': 'C:\\Users\\HP\\Documents\\data science projects\\rag bassed projects\\1706.03762v7.pdf', 'file_path': 'C:\\Users\\HP\\Documents\\data science projects\\rag bassed projects\\1706.03762v7.pdf', 'page': 2, 'total_pages': 15, 'format': 'PDF 1.5', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'creator': 'LaTeX with hyperref', 'producer': 'pdfTeX-1.40.25', 'creationDate': 'D:20240410211143Z', 'modDate': 'D:20240410211143Z', 'trapped': ''}, page_content='Figure 1: The Transformer - model architecture.\nThe Transformer follows this overall architecture using stacked self-attention and point-wise, fully\nconnected layers for both the encoder and decoder, shown in the left and right halves of Figure 1,\nrespectively.\n3.1\nEncoder and Decoder Stacks\nEncoder:\nThe encoder is composed of a stack of N = 6 identical layers. Each layer has two\nsub-layers. The first is a multi-head self-attention mechanism, and the second is a simple, position-\nwise fully 

In [46]:
print(qa_chain.invoke({'input_documents':docs,'question':'summarize about transformers architecture in detail'},return_only_outputs=True)['output_text'])

**Transformer Architecture**

* **Encoder:**
    * Stack of 6 identical layers
    * Each layer has two sub-layers:
        * Multi-head self-attention mechanism
        * Position-wise fully connected feed-forward network
    * Residual connections and layer normalization

* **Decoder:**
    * Similar to encoder, with 6 identical layers
    * Adds a third sub-layer:
        * Multi-head attention over the output of the encoder stack
    * Prevents leftward information flow to maintain auto-regressive property

* **Attention:**
    * Multi-head attention:
        * Allows model to attend to information from different representation subspaces
        * Uses 8 parallel attention layers, each with reduced dimension (64)
    * Applications in Transformer:
        * Encoder-decoder attention: Decoder attends to encoder output
        * Self-attention: Encoder and decoder attend to their own outputs
        * Masked attention in decoder prevents leftward information flow

* **Position-wise F