In [None]:
import subprocess
import time
import os

def fetch_ollama():
    !curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz -o ollama-linux-amd64.tgz
    !mkdir -p /usr/local/bin
    !tar -C /usr/local -xzf ollama-linux-amd64.tgz
    !chmod +x /usr/local/bin/ollama

def install_ollama():
    if not os.path.isfile('/usr/local/bin/ollama'):
        fetch_ollama()
    process = subprocess.Popen(
        ['/usr/local/bin/ollama', 'serve'],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        env={**os.environ, 'OLLAMA_HOST': '0.0.0.0:11434'}
    )

    # Esperar a que el servidor se inicie
    time.sleep(5)

def check_ollama():
    import requests
    try:
        response = requests.get("http://localhost:11434/ping")
        print("Ollama est déjà installé.")
    except requests.ConnectionError:
        print("Ollama n'est pas installé. Installation en cours...")
        install_ollama()
        print("Ollama a été installé avec succès.")

# Vérifie si le code est exécuté sur Google Colab
if 'COLAB_GPU' in os.environ:
    # Commandes à exécuter uniquement sur Google Colab
    if os.path.isdir('tp-rag'):
        %cd tp-rag
    if os.path.isdir('.git'):
        # Already in the git repository, just pull
        # Pull updates; only check/install if no updates
        !git pull | grep -q 'Already up to date.' || pip install -r requirements.txt
    else:
        # Clone the repository
        !git clone https://github.com/Florian-Audouard/tp-rag
        %cd tp-rag
        !pip install -r requirements.txt
    check_ollama()
    !/usr/local/bin/ollama pull qwen3:8b

else:
    # Commandes à exécuter si ce n'est pas sur Google Colab
    print("Pas sur Google Colab, ces commandes ne seront pas exécutées.")

Ollama est déjà installé.


In [76]:
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.document_loaders import DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.chat_history import InMemoryChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder


EMBESSINGS_MODEL_NAME = "intfloat/multilingual-e5-base"
DATA_FOLDER = "data/"
CHUNK_SIZE = 1000
CHUNK_OVERLAP = CHUNK_SIZE // 5

ModuleNotFoundError: No module named 'langchain_ollama'

In [50]:
embeddings = HuggingFaceEmbeddings(model_name=EMBESSINGS_MODEL_NAME)
vector_store_splits = Chroma(
    collection_name="split_data_collection",
    embedding_function=embeddings,
    persist_directory="./chroma_langchain_split_db",  # Where to save data locally, remove if not necessary
)
vector_store_full = Chroma(
    collection_name="full_data_collection",
    embedding_function=embeddings,
    persist_directory="./chroma_langchain_full_db",  # Where to save data locally, remove if not necessary
)

In [51]:
loader = DirectoryLoader(DATA_FOLDER)
documents = loader.load()
print(f"Number of documents loaded: {len(documents)}")

Number of documents loaded: 63


In [52]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP, add_start_index=True
)
all_splits = text_splitter.split_documents(documents)
print(f"Number of paragraphs created: {len(all_splits)}")

Number of paragraphs created: 8847


In [53]:
_ = vector_store_full.add_documents(documents=documents)

In [54]:
# Add documents in batches to avoid exceeding max batch size
BATCH_SIZE = 5000
for i in range(0, len(all_splits), BATCH_SIZE):
    batch = all_splits[i : i + BATCH_SIZE]
    vector_store_splits.add_documents(documents=batch)
    print(f"Added batch {i//BATCH_SIZE + 1}: {len(batch)} documents")
print(f"All {len(all_splits)} documents added to the vector store.")

Added batch 1: 5000 documents
Added batch 2: 3847 documents
All 8847 documents added to the vector store.


In [56]:
def generate_query(vector_store, query, k=3, score=False):
    if score:
        return vector_store.similarity_search_with_score(query, k=k)
    return vector_store.similarity_search(query, k=k)


print()
generate_query(vector_store_splits, "what is Video-Panda ?", k=1)




[Document(id='5753a59f-f862-4b2a-8eb6-0b2f8796f1b6', metadata={'start_index': 52738, 'source': 'data/autres_articles/2412.18609v1.pdf'}, page_content='F. Broader Impact\n\nWe introduce Video-Panda, an encoder-free Video Lan- guage Model for video understanding. Our model addresses key ethical and practical challenges in large-scale AI de- ployment. While many VLMs raise concerns about data bias, privacy, and computational costs, Video-Panda miti- gates these issues through two key design choices: training exclusively on publicly available datasets and eliminating the need for a pretrained encoder. This approach not only reduces ethical concerns but also significantly lowers com- putational requirements and deployment costs, making the model more accessible and environmentally sustainable.')]

In [None]:
llm = llm = ChatOllama(
    model="qwen3:8b",
    temperature=0,
)

res1 = llm.invoke("Hello, world!").content

print("Response from", MODEL_NAME_SIMPLE + ":", res1)

Response from llama-3.1-8b-instant: Hello, world. It's nice to meet you. Is there something I can help you with or would you like to chat?
Response from groq/compound-mini: Hello! How can I help you today?


In [58]:
SYSTEM_PROMPT = """You are a helpful AI assistant that helps people find information. Use the provided DOCUMENTS to answer the question at the end. If you don't know the answer, just say you don't know, don't try to make up an answer."""
USER_PROMPT = """DOCUMENTS:
{context}
QUESTION: {question}
Answer:"""


def generate_answer(
    agent, question, get_session_history=lambda x: InMemoryChatMessageHistory()
):
    results = generate_query(vector_store_splits, question, k=3)
    context = ""
    for i, documents in enumerate(results):
        context += f"DOCUMENT {i}" + ":\n"
        context += documents.page_content + "\n\n"

    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", SYSTEM_PROMPT),
            MessagesPlaceholder(variable_name="history"),
            ("human", USER_PROMPT),
        ]
    )
    chain = prompt | agent
    chain_with_memory = RunnableWithMessageHistory(
        chain,
        get_session_history,
        input_messages_key="question",
        history_messages_key="history",
    )

    response = chain_with_memory.invoke(
        {"context": context, "question": question},
        config={"configurable": {"session_id": "user-1"}},
    )
    return response.content


generate_answer(llm, "What is Video-Panda?")

'Video-Panda is an encoder-free Video Language Model for video understanding.'

In [59]:
store = {}


def get_session_history(session_id: str):
    if session_id not in store:
        store[session_id] = InMemoryChatMessageHistory()
    return store[session_id]


ans1 = generate_answer(llm, "What is Video-Panda?", get_session_history)
ans2 = generate_answer(llm, "Tell me what we discussed earlier?", get_session_history)
print("Answer 1:", ans1)
print("Answer 2:", ans2)

Answer 1: Video-Panda is an encoder-free Video Language Model for video understanding.
Answer 2: We discussed Video-Panda, but then you provided some documents and asked me to answer a question based on those documents. However, there is no question in the documents you provided.


In [None]:
SYSTEM_PROMPT_SUMMARY = """You are a helpful AI assistant that helps people summarize documents. Use the provided DOCUMENT to create a concise summary."""

USER_PROMPT_SUMMARY = """DOCUMENT:{document}"""


def create_sumarry(document, debug=False):
    document = generate_query(vector_store_full, document, k=1)[0]
    if debug:
        print("Document to summarize:", document.metadata["source"])
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", SYSTEM_PROMPT_SUMMARY),
            ("human", USER_PROMPT_SUMMARY),
        ]
    )
    chain = prompt | llm
    summary = chain.invoke({"document": document.page_content})
    return summary.content


summary = create_sumarry("Video-Panda", debug=True)

Document to summarize: data/autres_articles/2412.18609v1.pdf


APIStatusError: Error code: 413 - {'error': {'message': 'Request Entity Too Large', 'type': 'invalid_request_error', 'code': 'request_too_large'}}