In [8]:
!pip install -q streamlit google-generativeai python-dotenv langchain PyPDF2 chromadb faiss-cpu langchain_google_genai langchain-community

Configurez votre clé API Google :

In [9]:
import os
os.environ['GOOGLE_API_KEY'] = 'AIzaSyAnTbYRayfCWmW-uegiW-KTbF_r8cdytw0'

Créez le fichier .env (optionnel si vous utilisez os.environ)

In [10]:
with open('.env', 'w') as f:
    f.write('GOOGLE_API_KEY=VOTRE_CLE_API_ICI')

In [11]:
%%writefile app.py
import streamlit as st
import google.generativeai as genai
from dotenv import load_dotenv
import os
import csv
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain_community.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate

# Charger les variables d'environnement
load_dotenv()

def get_pdf_text(pdf_docs):
    """Extrait le texte de tous les fichiers PDF uploadés"""
    text = ""
    for pdf in pdf_docs:
        pdf_reader = PdfReader(pdf)
        for page in pdf_reader.pages:
            text += page.extract_text()
    return text

def get_text_chunks(text):
    """Divise le texte en chunks avec overlap"""
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=10000, chunk_overlap=1000)
    chunks = splitter.split_text(text)
    return chunks  # list of strings

def get_vector_store(chunks):
    """Crée et sauvegarde un vector store FAISS"""
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")  # type: ignore
    vector_store = FAISS.from_texts(chunks, embedding=embeddings)
    vector_store.save_local("faiss_index")

def get_conversational_chain():
    """Crée la chaîne conversationnelle avec Gemini"""
    prompt_template = """
    Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not available in the context just say, "answer is not available in the context", don't provide the wrong answer\n\n
    Context:\n {context}?\n
    Question: \n{question}\n

    Answer:
    """

    model = ChatGoogleGenerativeAI(model="gemini-pro",
                                   client=genai,
                                   temperature=0.3,
                                   )
    prompt = PromptTemplate(template=prompt_template,
                            input_variables=["context", "question"])
    chain = load_qa_chain(llm=model, chain_type="stuff", prompt=prompt)
    return chain

def clear_chat_history():
    """Remet à zéro l'historique de chat"""
    st.session_state.messages = [
        {"role": "assistant", "content": "Upload some PDFs and ask me a question"}]

def user_input(user_question):
    """Traite la question de l'utilisateur et retourne une réponse"""
    embeddings = GoogleGenerativeAIEmbeddings(
        model="models/embedding-001")  # type: ignore

    new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
    docs = new_db.similarity_search(user_question)

    chain = get_conversational_chain()

    context = "\n".join([doc.page_content for doc in docs])
    response = chain(
        {"input_documents": docs, "context": context, "question": user_question},
        return_only_outputs=True, )

    return response['output_text']

def save_user_info(name, phone, email):
    """Sauvegarde les informations utilisateur dans un fichier CSV"""
    file_exists = os.path.isfile('user_info.csv')
    with open('user_info.csv', mode='a', newline='') as file:
        fieldnames = ['Name', 'Phone', 'Email']
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        if not file_exists:
            writer.writeheader()
        writer.writerow({'Name': name, 'Phone': phone, 'Email': email})

def main():
    st.set_page_config(  # Configure the Streamlit page settings
        page_title="Chat with PDF using Gemini",  # Set the browser tab title
        page_icon="🤖",  # Set the favicon/icon for the page
        layout="wide"  # Use a wide layout for more horizontal space
    )

    # Sidebar for uploading PDF files
    with st.sidebar:  # Begin sidebar container
        st.title("Menu")  # Display the title "Menu" in the sidebar
        pdf_docs = st.file_uploader(  # file uploader widget
            "Upload your PDF Files and Click on the Submit & Process Button",
            accept_multiple_files=True)  # Allow multiple PDF uploads
        if st.button("Submit & Process"):  # Add a button "Submit & Process" to trigger processing
            with st.spinner("Processing..."):  # Show a spinner while processing
                raw_text = get_pdf_text(pdf_docs)  # Extract raw text from uploaded PDFs
                text_chunks = get_text_chunks(raw_text)  # Split the text into smaller chunks
                get_vector_store(text_chunks)  # Build or update vector store for retrieval
                st.success("Done")  # Show a success message once processing finishes

    # Main content area for displaying chat messages
    st.title("Chat with PDF files using Gemini 🙋‍♂️")  # Display the main page title
    st.write("Welcome to the chat!")  # Display a welcome message

    # Add clear chat history button in sidebar
    with st.sidebar:
        if st.button('Clear Chat History'):  # Add a button 'Clear Chat History'
            clear_chat_history()

    # Chat input
    if "messages" not in st.session_state.keys():  # If no messages stored yet
        st.session_state.messages = [  # Initialize chat history with a default assistant message
            {"role": "assistant", "content": "Upload some PDFs and ask me a question"}]

    for message in st.session_state.messages:  # Loop over stored chat messages
        with st.chat_message(message["role"]):  # Render each message with the correct role
            st.markdown(f"**{message['role'].capitalize()}:** {message['content']}")  # Display the message content

    if prompt := st.chat_input():  # If the user enters a new chat prompt
        st.session_state.messages.append({"role": "user", "content": prompt})  # Add the user message to session state
        with st.chat_message("user"):  # Render the user's message in the chat
            st.markdown(f"**User:** {prompt}")  # Display the user's input

        # Check for specific user request to call them
        if "call me" in prompt.lower():  # If prompt contains 'call me'
            st.session_state.collecting_info = True  # Flag to start collecting user contact info

        if st.session_state.messages[-1]["role"] != "assistant":  # If the last message isn't from the assistant yet
            with st.chat_message("assistant"):  # Prepare to render the assistant's response
                with st.spinner("Thinking..."):  # Show a spinner while the model generates a response
                    response = user_input(prompt)  # Call the retrieval+LLM pipeline to get an answer
                    st.session_state.messages.append({"role": "assistant", "content": response})  # Store the assistant's reply
                    st.markdown(f"**Assistant:** {response}")  # Display the assistant's response

    # Collect user information
    if "collecting_info" in st.session_state and st.session_state.collecting_info:  # If flagged to collect info
        st.subheader("Contact Information")  # Prompt for contact form
        with st.form(key="contact_form"):  # Begin a form for user contact details
            name = st.text_input("Name")  # Input field for name
            phone = st.text_input("Phone Number")  # Input field for phone number
            email = st.text_input("Email Address")  # Input field for email address
            submit_button = st.form_submit_button("Submit Contact Info")  # Button to submit the form

            if submit_button:  # When the form is submitted
                save_user_info(name, phone, email)  # Save the user info to a CSV file
                st.session_state.messages.append({
                    "role": "assistant",
                    "content": f"Thank you, {name}. We will contact you at {phone} or {email}."
                })  # Thank the user
                st.session_state.collecting_info = False  # Stop collecting info
                st.experimental_rerun()  # Refresh the app to show the new message

if __name__ == "__main__":  # Entry point check
    main()  # Run the main function

Writing app.py


Lancez l'application Streamlit :

Accédez à l'application via le tunnel ngrok :

In [19]:
!streamlit run app.py \
  --server.port 8501 \
  --server.address 0.0.0.0 \
  --server.enableCORS false \
  --server.enableXsrfProtection false \
  &>/dev/null &

In [21]:
from pyngrok import ngrok

# Authentification (si vous en avez un)
ngrok.set_auth_token("30Y4I20WWMblLpoQmZBTJyvdfis_4tfap1sfjHNepzEv9yrdk")

# Lancement du tunnel
public_url = ngrok.connect(addr='8501', proto='http')
print(f"🌍 Your app is live at: {public_url}")

🌍 Your app is live at: NgrokTunnel: "https://a78b85513fed.ngrok-free.app" -> "http://localhost:8501"
