In [1]:
with open("gemini_api_key.txt") as f:
    key = f.read().strip()

In [2]:
from langchain_google_genai import ChatGoogleGenerativeAI

chat_model = ChatGoogleGenerativeAI(google_api_key=key, 
                                   model="gemini-1.5-pro-latest")
print(chat_model)

model='models/gemini-1.5-pro-latest' google_api_key=SecretStr('**********') client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x000001B9D31C17F0> default_metadata=()


In [4]:
from langchain_community.document_loaders import PyPDFLoader

loaders = [
    PyPDFLoader("Finance.pdf"),
    PyPDFLoader("IT.pdf"),
    PyPDFLoader("Juridique.pdf"),
]

all_pages = []
for loader in loaders:
    all_pages.extend(loader.load_and_split())

In [6]:
all_pages

[Document(metadata={'producer': 'pdfTeX-1.40.26', 'creator': 'LaTeX with hyperref', 'creationdate': '2025-04-29T08:26:50+00:00', 'author': '', 'keywords': '', 'moddate': '2025-04-29T08:26:50+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024) kpathsea version 6.4.0', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'Finance.pdf', 'total_pages': 5, 'page': 0, 'page_label': '1'}, page_content='April 29, 2025\n1. Company Overview\n1.1 Introduction to TOPG Informatics\nTOPG Informatics is a forward-thinking IT services company founded in 2022 with a\nmission to democratize access to high-quality digital solutions. Based in Morocco, the\ncompany operates across several fields including software development, IT consulting,\ncybersecurity, network architecture, data engineering, and cloud services. With a skilled\nworkforce and a strong commitment to innovation, TOPG has rapidly gained a solid\nreputation among SMEs, governmental institutions,

In [7]:
all_pages[0]

Document(metadata={'producer': 'pdfTeX-1.40.26', 'creator': 'LaTeX with hyperref', 'creationdate': '2025-04-29T08:26:50+00:00', 'author': '', 'keywords': '', 'moddate': '2025-04-29T08:26:50+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024) kpathsea version 6.4.0', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'Finance.pdf', 'total_pages': 5, 'page': 0, 'page_label': '1'}, page_content='April 29, 2025\n1. Company Overview\n1.1 Introduction to TOPG Informatics\nTOPG Informatics is a forward-thinking IT services company founded in 2022 with a\nmission to democratize access to high-quality digital solutions. Based in Morocco, the\ncompany operates across several fields including software development, IT consulting,\ncybersecurity, network architecture, data engineering, and cloud services. With a skilled\nworkforce and a strong commitment to innovation, TOPG has rapidly gained a solid\nreputation among SMEs, governmental institutions, 

In [10]:
len(all_pages)

14

In [11]:
from langchain_text_splitters import CharacterTextSplitter

text_splitter = CharacterTextSplitter(
    separator="\n",           
    chunk_size=500,
    chunk_overlap=100
)

chunks = text_splitter.split_documents(all_pages)

print(len(chunks))
print(type(chunks[0]))

70
<class 'langchain_core.documents.base.Document'>


In [12]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embedding_model = GoogleGenerativeAIEmbeddings(google_api_key=key, model="models/embedding-001")

In [13]:
from langchain_community.vectorstores import Chroma

db = Chroma.from_documents(chunks, embedding_model, persist_directory="./chroma_db_")

db.persist()

  db.persist()


In [14]:
db_connection = Chroma(persist_directory="./chroma_db_", embedding_function=embedding_model)

  db_connection = Chroma(persist_directory="./chroma_db_", embedding_function=embedding_model)


In [15]:
retriever = db_connection.as_retriever(search_kwargs={"k": 5})

print(type(retriever))

<class 'langchain_core.vectorstores.base.VectorStoreRetriever'>


In [16]:
from langchain_core.messages import SystemMessage
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
chat_template = ChatPromptTemplate.from_messages([
    SystemMessage(content="""You are a Helpful AI Bot.
                  Given a context and question from user,
                  you should answer based on the given context."""),
    HumanMessagePromptTemplate.from_template("""Answer the question based on the given context.
    Context: {context}
    Question: {question}
    Answer: """)
])

In [17]:
from langchain_core.output_parsers import StrOutputParser

output_parser = StrOutputParser()

In [18]:
from langchain_core.runnables import RunnablePassthrough

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | chat_template
    | chat_model
    | output_parser
)

In [22]:
response = rag_chain.invoke("""Which programming languages are mentioned in the document as part of TOPG Informatics' technology stack?""")

response

'Python, Java, JavaScript, C++, and PHP.'

In [50]:
from IPython.display import Markdown as md

md(response)

Leave No Context Behind introduces Infini-attention, a new attention mechanism for Transformer-based Large Language Models (LLMs).  Infini-attention allows LLMs to handle infinitely long input sequences with limited memory and computation by combining compressive memory, masked local attention, and long-term linear attention within a single Transformer block.  This approach enables continual pre-training and fine-tuning on extended contexts.  The effectiveness of Infini-attention is demonstrated on long-context language modeling, passkey retrieval with 1M sequence length, and book summarization with 500K length using 1B and 8B parameter LLMs.

In [55]:
response = rag_chain.invoke("""Please Explain MCP
                            """)

response

'The provided text describes Memory-augmented Compressive Transformer (MCT), not MCP.  It explains that MCT augments the standard multi-head attention (MHA) mechanism with parallel "compressive memories."  Unlike the standard Transformer\'s key-value (KV) memory, which grows with input length, compressive memory maintains a constant size for efficiency.  The MCT combines the strengths of both MHA (for context-dependent computation) and compressive memory (for efficient information storage and retrieval).'