In [1]:
import os

# V√©rification de l'environnement d'ex√©cution
IS_COLAB = 'COLAB_GPU' in os.environ

if IS_COLAB:
    print("Environnement d√©tect√© : Google Colab")
    print("Clonage du d√©p√¥t GitHub...")
    !git clone https://github.com/Apierriecube/tp-rag-student-version
    %cd tp-rag-student-version
else:
    print("Environnement d√©tect√© : Local")

# Installation des d√©pendances depuis requirements.txt
print("\nInstallation des d√©pendances...")
print("Cela peut prendre quelques minutes...\n")

!pip install -q -r requirements.txt

print("\n" + "="*80)
print("Installation termin√©e !")
print("="*80)
print("\nLes d√©pendances RAG sont install√©es avec les bonnes versions.")
print("Vous pouvez maintenant ex√©cuter les cellules suivantes.\n")


Environnement d√©tect√© : Local

Installation des d√©pendances...
Cela peut prendre quelques minutes...



Environnement d√©tect√© : Local

Installation des d√©pendances...
Cela peut prendre quelques minutes...



^C
[31mERROR: Operation cancelled by user[0m[31m
[0m
Installation termin√©e !

Les d√©pendances RAG sont install√©es avec les bonnes versions.
Vous pouvez maintenant ex√©cuter les cellules suivantes.



# TP - Retrieval Augmented Generation (RAG)

## √âtape 1 - Indexation des documents

In [2]:
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_ollama import ChatOllama
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.messages import HumanMessage, AIMessage
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
import gradio as gr
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

### Exercice 1 : Indexation des documents


In [3]:
DATA_PATH = "data/arxiv"
CHROMA_PATH = "chroma_db"

print("Chargement des documents PDF...")
loader = PyPDFDirectoryLoader(DATA_PATH)
documents = loader.load()
print(f"{len(documents)} pages charg√©es depuis {DATA_PATH}")

print("\nD√©coupage des documents en chunks...")
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len,
    is_separator_regex=False,
)
chunks = text_splitter.split_documents(documents)
print(f"{len(chunks)} chunks cr√©√©s")

print("\nExemple de chunk:")
print(f"Source: {chunks[0].metadata.get('source', 'N/A')}")
print(f"Contenu: {chunks[0].page_content[:200]}...")


Chargement des documents PDF...
386 pages charg√©es depuis data/arxiv

D√©coupage des documents en chunks...
1721 chunks cr√©√©s

Exemple de chunk:
Source: data/arxiv/A_Survey_of_Software-Defined_Smart_Grid_Networks_Security_Threats_and__Defense_Techniques.pdf
Contenu: A Survey of Software-Defined Smart Grid
Networks: Security Threats and Defense Techniques
Dennis Agnew Sharon Boamah Janise McNair
Department of Electrical and Computer Engineering , University of Flo...


In [4]:
print("Initialisation du mod√®le d'embeddings multilingual-e5-base...")
embeddings = HuggingFaceEmbeddings(
    model_name="intfloat/multilingual-e5-base",
    model_kwargs={'device': 'cpu'},
    encode_kwargs={'normalize_embeddings': True}
)
print("Mod√®le d'embeddings charg√©")

test_text = "Retrieval Augmented Generation"
test_embedding = embeddings.embed_query(test_text)
print(f"\nDimension des embeddings: {len(test_embedding)}")


Initialisation du mod√®le d'embeddings multilingual-e5-base...


modules.json:   0%|          | 0.00/387 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/57.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/694 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/418 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/200 [00:00<?, ?B/s]

Mod√®le d'embeddings charg√©

Dimension des embeddings: 768


In [5]:
print("Cr√©ation de la base vectorielle ChromaDB...")
vectorstore = Chroma.from_documents(
    documents=chunks,
    embedding=embeddings,
    persist_directory=CHROMA_PATH,
    collection_name="rag_documents"
)
print(f"Base vectorielle cr√©√©e avec {len(chunks)} chunks index√©s")
print(f"Stock√©e dans: {CHROMA_PATH}")


Cr√©ation de la base vectorielle ChromaDB...
Base vectorielle cr√©√©e avec 1721 chunks index√©s
Stock√©e dans: chroma_db


### Exercice 2 : Interrogation de la base vectorielle

Cr√©ation d'une fonction pour rechercher les documents pertinents avec leurs scores de similarit√©.

In [6]:
def search_documents(query: str, k: int = 5):
    """
    Recherche les documents pertinents par rapport √† la requ√™te.

    Args:
        query: La requ√™te de recherche
        k: Nombre de documents √† retourner

    Returns:
        Liste de tuples (Document, score)
    """
    print(f"\nRecherche: '{query}'")
    print(f"Recherche des {k} documents les plus pertinents...\n")

    results = vectorstore.similarity_search_with_score(query, k=k)

    for i, (doc, score) in enumerate(results, 1):
        print(f"{'='*80}")
        print(f"R√©sultat #{i} | Score: {score:.4f}")
        print(f"Source: {doc.metadata.get('source', 'N/A')}")
        print(f"Page: {doc.metadata.get('page', 'N/A')}")
        print(f"\nContenu:")
        print(doc.page_content[:300] + "..." if len(doc.page_content) > 300 else doc.page_content)
        print()

    return results

results = search_documents("What is Retrieval Augmented Generation?", k=3)



Recherche: 'What is Retrieval Augmented Generation?'
Recherche des 3 documents les plus pertinents...

R√©sultat #1 | Score: 0.3134
Source: data/arxiv/Complex_QA_and_language_models_hybrid_architectures_Survey.pdf
Page: 40

Contenu:
L. Sifre, M. Valko, S. Osindero, T. Lillicrap, N. Heess, and C. Blundell, ‚ÄúRetrieval-Augmented Reinforcement Learning, ‚Äù
arXiv:2202.08417 [cs], Mar. 2022.
[182] L. Zhou and K. Small, ‚ÄúInverse Reinforcement Learning with Natural Language Goals, ‚Äù Dec. 2020.
[183] Y. Pruksachatkun, J. Phang, H. Liu, P...

R√©sultat #2 | Score: 0.3308
Source: data/arxiv/Complex_QA_and_language_models_hybrid_architectures_Survey.pdf
Page: 31

Contenu:
‚Ä¢ Retrieval augmented LLM: keeping a maximum of information out of model while making it easy to
access and update without any re-train has an important potential but is often less efficient and may
require equal computation when comparing "total additional answer generation time" vs "training",
new...

R√©sultat #3 | S

## √âtape 2 - RAG avec Ollama

### Configuration d'Ollama

**Sur Google Colab** : Ex√©cutez la cellule suivante pour installer et lancer Ollama automatiquement.

**En local** : Assurez-vous qu'Ollama est install√© et lanc√© :
```bash
# Dans un terminal
apt-get install -y zstd curl
curl -fsSL https://ollama.com/install.sh | sh
ollama serve &
ollama pull qwen2.5:3b
```

Note : Utilisez `qwen2.5:3b` pour Colab (plus l√©ger) ou `qwen3:8b` en local si vous avez plus de RAM.

In [42]:
import subprocess
import time

if IS_COLAB:
    print("Installation d'Ollama sur Google Colab...")
    print("Cela peut prendre 2-3 minutes...\n")

    # Installation d'Ollama
    !curl -fsSL https://ollama.com/install.sh | sh

    # Lancement du serveur Ollama en arri√®re-plan
    print("\nLancement du serveur Ollama...")
    subprocess.Popen(["ollama", "serve"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

    # Attendre que le serveur d√©marre
    time.sleep(5)

    # T√©l√©chargement du mod√®le
    print("T√©l√©chargement du mod√®le qwen2.5:3b...")
    print("Cela peut prendre quelques minutes...\n")
    !ollama pull qwen2.5:3b

    print("\nOllama est pr√™t √† √™tre utilis√© !")
    print("Mod√®le qwen2.5:3b charg√©\n")

else:
    print("Environnement local d√©tect√©")
    print("Assurez-vous qu'Ollama est lanc√© :")
    print("   1. Dans un terminal : ollama serve")
    print("   2. T√©l√©charger le mod√®le : ollama pull qwen2.5:3b")
    print("\nSi Ollama n'est pas install√© : https://ollama.com/download\n")


Installation d'Ollama sur Google Colab...
Cela peut prendre 2-3 minutes...

>>> Cleaning up old version at /usr/local/lib/ollama
>>> Installing ollama to /usr/local
>>> Downloading ollama-linux-amd64.tgz
######################################################################## 100.0%
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.

Lancement du serveur Ollama...
T√©l√©chargement du mod√®le qwen2.5:3b...
Cela peut prendre quelques minutes...

[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l

Ollama est pr√™t √† √™tre utilis√© !
Mod√®le qwen2.5:3b charg√©



### Exercice 3 : Prompt Template

Cr√©ation d'un template de prompt optimis√© pour le RAG.

In [27]:
system_prompt = """Tu es un assistant expert en intelligence artificielle et machine learning.
Ta mission est de r√©pondre aux questions de l'utilisateur en te basant UNIQUEMENT sur le contexte fourni.

Instructions importantes :
- Utilise EXCLUSIVEMENT les informations du contexte pour r√©pondre
- Si l'information n'est pas dans le contexte, dis clairement "Je ne trouve pas cette information dans les documents fournis"
- Cite les sources quand c'est pertinent
- R√©ponds de mani√®re claire, structur√©e et concise
- Si la question est en fran√ßais, r√©ponds en fran√ßais. Si elle est en anglais, r√©ponds en anglais

Contexte :
{context}

Question : {input}

R√©ponse :"""

prompt = ChatPromptTemplate.from_template(system_prompt)

print("Template de prompt cr√©√©")
print("\nAper√ßu du template:")
print(system_prompt[:300] + "...")


Template de prompt cr√©√©

Aper√ßu du template:
Tu es un assistant expert en intelligence artificielle et machine learning.
Ta mission est de r√©pondre aux questions de l'utilisateur en te basant UNIQUEMENT sur le contexte fourni.

Instructions importantes :
- Utilise EXCLUSIVEMENT les informations du contexte pour r√©pondre
- Si l'information n'es...


### Exercice 4 : Cha√Æne RAG avec Ollama

Construction de la cha√Æne RAG compl√®te.

In [43]:
print("Initialisation du mod√®le Qwen via Ollama...")

llm = ChatOllama(
    model="qwen2.5:3b",
    temperature=0,
)

print("Mod√®le LLM initialis√©")


Initialisation du mod√®le Qwen via Ollama...
Mod√®le LLM initialis√©


In [29]:
def create_rag_chain(vectorstore, prompt, llm):
    """
    Cr√©e une cha√Æne RAG compl√®te.

    Args:
        vectorstore: La base vectorielle
        prompt: Le template de prompt
        llm: Le mod√®le de langage

    Returns:
        La cha√Æne RAG
    """
    retriever = vectorstore.as_retriever(
        search_type="similarity",
        search_kwargs={"k": 4}
    )

    combine_docs_chain = create_stuff_documents_chain(llm, prompt)

    rag_chain = create_retrieval_chain(retriever, combine_docs_chain)

    return rag_chain

rag_chain = create_rag_chain(vectorstore, prompt, llm)
print("Cha√Æne RAG cr√©√©e avec succ√®s")


Cha√Æne RAG cr√©√©e avec succ√®s


In [30]:
import requests
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError

def check_ollama_server():
    """V√©rifie si le serveur Ollama est accessible."""
    try:
        response = requests.get("http://localhost:11434/api/tags", timeout=3)
        if response.status_code == 200:
            models = response.json().get('models', [])
            model_names = [m.get('name', '') for m in models]
            print(f"Serveur Ollama accessible")
            print(f"Mod√®les disponibles: {model_names}")
            return True
        else:
            print(f"Serveur Ollama r√©pond avec le code {response.status_code}")
            return False
    except requests.exceptions.RequestException as e:
        print(f"Impossible de joindre le serveur Ollama: {e}")
        print("\nSolutions:")
        print("   1. Ex√©cutez d'abord la cellule d'installation d'Ollama (cellule 11)")
        print("   2. Ou en local: ollama serve")
        return False

def ask_question(question: str, timeout: int = 60):
    """
    Pose une question au syst√®me RAG avec timeout.

    Args:
        question: La question √† poser
        timeout: Timeout en secondes (d√©faut: 60s)

    Returns:
        La r√©ponse g√©n√©r√©e ou None en cas d'erreur
    """
    # V√©rification pr√©alable du serveur
    if not check_ollama_server():
        return None

    print(f"\n{'='*80}")
    print(f"Question: {question}")
    print(f"{'='*80}\n")
    print("G√©n√©ration de la r√©ponse (timeout: {timeout}s)...\n")

    try:
        # Ex√©cution avec timeout
        with ThreadPoolExecutor(max_workers=1) as executor:
            future = executor.submit(rag_chain.invoke, {"input": question})
            try:
                response = future.result(timeout=timeout)
            except FuturesTimeoutError:
                print(f"\nTIMEOUT apr√®s {timeout} secondes!")
                print("\nCela sugg√®re que:")
                print("   - Le serveur Ollama ne r√©pond pas")
                print("   - Le mod√®le n'est pas charg√© correctement")
                print("\nEssayez:")
                print("   1. Interrompre la cellule")
                print("   2. R√©-ex√©cuter la cellule d'installation Ollama")
                print("   3. V√©rifier: !ollama list")
                return None

        print(f"R√©ponse:")
        print(response["answer"])

        print(f"\nDocuments sources utilis√©s:")
        for i, doc in enumerate(response["context"], 1):
            print(f"  {i}. {doc.metadata.get('source', 'N/A')} (page {doc.metadata.get('page', 'N/A')})")

        return response

    except Exception as e:
        print(f"\nErreur: {type(e).__name__}: {str(e)}")
        print("\nV√©rifiez que toutes les cellules pr√©c√©dentes ont √©t√© ex√©cut√©es")
        return None

response = ask_question("What are the main challenges in multi-agent reinforcement learning?", timeout=60)

‚úÖ Serveur Ollama accessible
üì¶ Mod√®les disponibles: ['qwen2.5:3b']

Question: What are the main challenges in multi-agent reinforcement learning?

‚è≥ G√©n√©ration de la r√©ponse (timeout: {timeout}s)...

R√©ponse:
Les principales d√©fis identifi√©s dans le domaine de la r√©seaux d'apprentissage par renforcement multi-agents (MARL) sont les suivants :

1. **Non-Markovien Due to Human Intervention** : Les interactions humaines introduisent souvent des √©l√©ments non-markoviens, ce qui rend difficile l'application des m√©thodes MARL traditionnelles.

2. **Diversit√© des comportements humains** : La diversit√© des comportements humains peut entra√Æner une variabilit√© dans les sc√©narios d'apprentissage et rendre la g√©n√©ralisation plus complexe.

3. **Complexit√© H√©t√©rog√®ne** : Les syst√®mes physiques intelligents peuvent avoir des caract√©ristiques h√©t√©rog√®nes, ce qui rend le d√©veloppement de strat√©gies coh√©rentes et efficaces plus difficile.

4. **Scalabilit√© Multi-Huma

### Exercice 5 : M√©moire conversationnelle

Ajout de la capacit√© √† se souvenir des conversations pr√©c√©dentes.

In [37]:
contextualized_system_prompt = """Tu es un assistant expert en intelligence artificielle et machine learning.
Ta mission est de r√©pondre aux questions de l'utilisateur en te basant sur le contexte fourni et l'historique de la conversation.

Instructions importantes :
- Utilise les informations du contexte et de la conversation pr√©c√©dente
- R√©ponds de mani√®re coh√©rente avec l'historique
- Si l'information n'est pas disponible, dis-le clairement
- R√©ponds dans la m√™me langue que la question

Contexte :
{context}
"""

qa_prompt = ChatPromptTemplate.from_messages([
    ("system", contextualized_system_prompt),
    MessagesPlaceholder("chat_history"),
    ("human", "{input}"),
])

print("Template avec historique cr√©√©")


Template avec historique cr√©√©


In [38]:
store = {}

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    """R√©cup√®re ou cr√©e l'historique d'une session."""
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
combine_docs_chain = create_stuff_documents_chain(llm, qa_prompt)
rag_chain_with_history = create_retrieval_chain(retriever, combine_docs_chain)

conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain_with_history,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

print("Cha√Æne RAG avec m√©moire cr√©√©e")


Cha√Æne RAG avec m√©moire cr√©√©e


In [39]:
def chat_with_memory(question: str, session_id: str = "default"):
    """
    Conversation avec m√©moire de l'historique.

    Args:
        question: La question √† poser
        session_id: ID de la session de conversation
    """
    print(f"\n{'='*80}")
    print(f"Vous: {question}")
    print(f"{'='*80}\n")

    response = conversational_rag_chain.invoke(
        {"input": question},
        config={"configurable": {"session_id": session_id}}
    )

    print(f"Assistant: {response['answer']}\n")

    return response

print("Test de conversation avec m√©moire:\n")
session_id = "test_session_1"

chat_with_memory("What is Retrieval Augmented Generation?", session_id)
chat_with_memory("What are its main advantages?", session_id)
chat_with_memory("Can you give me a concrete example?", session_id)


Test de conversation avec m√©moire:


Vous: What is Retrieval Augmented Generation?

Assistant: Retrieval-Augmented Generation refers to a technique in natural language processing where the model uses pre-existing knowledge or data from external sources (such as databases, documents, or other texts) to enhance its own generation process. This approach aims to improve the quality and relevance of text generated by the model by leveraging additional information that is not directly provided within the input prompt.

In Retrieval-Augmented Generation, a model like a Language Model in Context (LMC), which can generate text based on context from previous inputs, incorporates external retrieval mechanisms. These mechanisms allow the model to access and utilize data stored externally, such as knowledge bases or previously generated texts, to inform its own generation process. This can help the model produce more accurate, relevant, and diverse outputs.

The key idea is that by augmenting the 

{'input': 'Can you give me a concrete example?',
 'chat_history': [HumanMessage(content='What is Retrieval Augmented Generation?', additional_kwargs={}, response_metadata={}),
  AIMessage(content="Retrieval-Augmented Generation refers to a technique in natural language processing where the model uses pre-existing knowledge or data from external sources (such as databases, documents, or other texts) to enhance its own generation process. This approach aims to improve the quality and relevance of text generated by the model by leveraging additional information that is not directly provided within the input prompt.\n\nIn Retrieval-Augmented Generation, a model like a Language Model in Context (LMC), which can generate text based on context from previous inputs, incorporates external retrieval mechanisms. These mechanisms allow the model to access and utilize data stored externally, such as knowledge bases or previously generated texts, to inform its own generation process. This can help t

### Exercice 6 : Outil de r√©sum√© de documents complets

Cr√©ation d'un outil pour r√©cup√©rer et r√©sumer un document entier.

In [40]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains.summarize import load_summarize_chain

def summarize_document(pdf_path: str, chain_type: str = "map_reduce"):
    """
    R√©sume un document PDF complet.

    Args:
        pdf_path: Chemin vers le fichier PDF
        chain_type: Type de cha√Æne de r√©sum√© ("map_reduce" ou "stuff")

    Returns:
        Le r√©sum√© du document
    """
    print(f"\nChargement du document: {pdf_path}")

    loader = PyPDFLoader(pdf_path)
    docs = loader.load()

    print(f"{len(docs)} pages charg√©es")
    print(f"\nG√©n√©ration du r√©sum√© (m√©thode: {chain_type})...\n")

    summarize_chain = load_summarize_chain(
        llm,
        chain_type=chain_type,
        verbose=False
    )

    summary = summarize_chain.invoke(docs)

    print(f"R√©sum√© du document '{Path(pdf_path).name}':")
    print(f"{'='*80}")
    print(summary["output_text"])
    print(f"{'='*80}\n")

    return summary

import glob
pdf_files = glob.glob("data/arxiv/*.pdf")
print(f"{len(pdf_files)} fichiers PDF disponibles:")
for i, pdf in enumerate(pdf_files[:5], 1):
    print(f"  {i}. {Path(pdf).name}")

if pdf_files:
    summary = summarize_document(pdf_files[0])


10 fichiers PDF disponibles:
  1. A_Survey_of_Software-Defined_Smart_Grid_Networks_Security_Threats_and__Defense_Techniques.pdf
  2. A_survey_on_the_complexity_of_learning_quantum_states.pdf
  3. Macroeconomic_Effects_of_Inflation_Targeting_A_Survey_of_the_Empirical__Literature.pdf
  4. Multi-Agent_Reinforcement_Learning_Methods_Applications_Visionary__Prospects_and_Challenges.pdf
  5. Reconfigurable_Intelligent_Surface_Assisted_Railway_Communications_A__survey.pdf

Chargement du document: data/arxiv/A_Survey_of_Software-Defined_Smart_Grid_Networks_Security_Threats_and__Defense_Techniques.pdf
25 pages charg√©es

G√©n√©ration du r√©sum√© (m√©thode: map_reduce)...



tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (6089 > 1024). Running this sequence through the model will result in indexing errors


R√©sum√© du document 'A_Survey_of_Software-Defined_Smart_Grid_Networks_Security_Threats_and__Defense_Techniques.pdf':
This document discusses various aspects of Software-Defined Smart Grid (SD-SG) network security, focusing on attacks and defense mechanisms. Key points include:

1. **Cyberattacks**:
   - Common DDoS/DoS attacks are prevalent in SD-SG networks.
   - Controller impersonation or black hole attacks targeting SDN controllers are significant concerns.
   - Machine learning and deep learning methods are used for detection.

2. **Defense Techniques**:
   - Blockchain technology enhances security through distributed consensus mechanisms, ensuring system integrity and scalability.
   - Machine learning (ML) techniques like parallel recurrent neural networks (RNNs) with LSTM and GRU models are effective in detecting DDoS attacks in SDN-based SCADA systems.

3. **SDN Attributes**:
   - Centralized controllers monitor network traffic using lightweight entropy-based defense mechanis

## √âtape 3 - Interface Utilisateur

### Exercice 7 : IHM avec Gradio

Cr√©ation d'une interface graphique interactive pour le chatbot RAG.

In [45]:
import uuid

def gradio_chat(message, history, session_id):
    """
    Fonction de chat pour Gradio.

    Args:
        message: Message de l'utilisateur
        history: Historique de la conversation
        session_id: ID de session unique

    Returns:
        La r√©ponse de l'assistant
    """
    try:
        response = conversational_rag_chain.invoke(
            {"input": message},
            config={"configurable": {"session_id": session_id}}
        )
        return response["answer"]
    except Exception as e:
        return f"Erreur: {str(e)}\n\nAssurez-vous qu'Ollama est lanc√© avec le mod√®le qwen2.5:3b"

def create_gradio_interface():
    """Cr√©e l'interface Gradio."""

    with gr.Blocks(title="RAG Chatbot", theme=gr.themes.Soft()) as demo:
        gr.Markdown("""
        # Chatbot RAG - Assistant IA

        Posez vos questions sur les documents scientifiques index√©s.
        Le chatbot utilise **Retrieval Augmented Generation** pour fournir des r√©ponses pr√©cises bas√©es sur les documents.
        """)

        with gr.Row():
            with gr.Column(scale=3):
                chatbot = gr.Chatbot(
                    height=500,
                    label="Conversation",
                    avatar_images=(None, "ü§ñ")
                )

                with gr.Row():
                    msg = gr.Textbox(
                        label="Votre question",
                        placeholder="Posez votre question ici...",
                        lines=2,
                        scale=4
                    )
                    submit = gr.Button("Envoyer üì§", scale=1, variant="primary")

                with gr.Row():
                    clear = gr.Button("üóëÔ∏è Nouvelle conversation")

            with gr.Column(scale=1):
                gr.Markdown("### ‚ÑπÔ∏è Informations")
                session_display = gr.Textbox(
                    label="ID de session",
                    value=str(uuid.uuid4()),
                    interactive=False
                )
                gr.Markdown("""
                ### üìä Statistiques
                - **Documents index√©s:** Arxiv PDFs
                - **Mod√®le:** Qwen 2.5 (3B)
                - **Embeddings:** multilingual-e5-base
                - **Vector Store:** ChromaDB

                ### üí° Conseils
                - Posez des questions claires
                - Le chatbot se souvient du contexte
                - Cliquez sur "Nouvelle conversation" pour recommencer
                """)

        session_state = gr.State(str(uuid.uuid4()))

        def respond(message, chat_history, session_id):
            """G√®re la r√©ponse du chatbot."""
            bot_response = gradio_chat(message, chat_history, session_id)
            chat_history.append((message, bot_response))
            return "", chat_history

        def clear_chat():
            """R√©initialise la conversation."""
            new_session_id = str(uuid.uuid4())
            return [], new_session_id, new_session_id

        msg.submit(respond, [msg, chatbot, session_state], [msg, chatbot])
        submit.click(respond, [msg, chatbot, session_state], [msg, chatbot])
        clear.click(clear_chat, None, [chatbot, session_state, session_display])

    return demo

print("Cr√©ation de l'interface Gradio...")
demo = create_gradio_interface()

demo.launch(share=True, server_name="0.0.0.0")


Cr√©ation de l'interface Gradio...
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://d3093ee5316d83d034.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


