## Version Chroma

In [1]:
import requests
from bs4 import BeautifulSoup
import gradio as gr
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama.llms import OllamaLLM
from langchain.chains import RetrievalQA
from langchain.docstore.document import Document

def extract_text_from_url(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    page_text = soup.get_text()
    print(page_text[:1000]) 
    return page_text

def split_text(text, chunk_size=500):
    return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]


def create_qa_chain():
    # LLM y otros modelos
    llm = OllamaLLM(model="llama3.2", server_url="http://localhost:11434") 
    embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vectorstore = Chroma(persist_directory="./vectorstore", embedding_function=embedding_model)

    # URL de Wikipedia (puedes modificarla si lo deseas)
    url = "https://en.wikipedia.org/wiki/2024_G20_Rio_de_Janeiro_summit"

    
    # Extraer y procesar el contenido
    page_content = extract_text_from_url(url)
    chunks = split_text(page_content)
    print(f"Number of chunks: {len(chunks)}")

    documents = [Document(page_content=chunk) for chunk in chunks]
    vectorstore.add_documents(documents)
    print("Documents added to the vector store.")

    # Crear el prompt de la cadena de pregunta-respuesta
    prompt = ChatPromptTemplate.from_template(
        template="Use the context below to answer the user's question:\n\n{context}\n\nQuestion: {question}\nAnswer:"
    )

    # Crear un retriever
    retriever = vectorstore.as_retriever()

    # Crear la cadena de QA
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        retriever=retriever,
        chain_type_kwargs={"prompt": prompt}
    )
    
    return qa_chain

# Crear la cadena de QA al inicio
qa_chain = create_qa_chain()

def answer_question(question):
    print(f"Processing question: {question}")  # Para ver la pregunta procesada
    result = qa_chain.run(question)
    print(f"Answer: {result}")  # Ver la respuesta generada
    return result


# Interfaz Gradio
iface = gr.Interface(fn=answer_question, 
                     inputs="text", 
                     outputs="text", 
                     live=True, 
                     title="Wikipedia QA",
                     description="Pregúntame sobre la página de Wikipedia de el G20 de Brasil 2024.")

# Ejecutar la interfaz Gradio
iface.launch()



  from .autonotebook import tqdm as notebook_tqdm






2024 G20 Rio de Janeiro summit - Wikipedia




































Jump to content







Main menu





Main menu
move to sidebar
hide



		Navigation
	


Main pageContentsCurrent eventsRandom articleAbout WikipediaContact us





		Contribute
	


HelpLearn to editCommunity portalRecent changesUpload file



















Search











Search






















Appearance
















Donate

Create account

Log in








Personal tools





Donate Create account Log in





		Pages for logged out editors learn more



ContributionsTalk




























Contents
move to sidebar
hide




(Top)





1
Presidency








2
Agenda priorities




Toggle Agenda priorities subsection





2.1
G20 Social










3
Treaty against hunger and poverty








4
Preparations








5
Issues




Toggle Issues subsection





5.1
Russia and Ukraine








5.2
Other issues










6
Participating leaders








7
Invited guests








8
Participating in



Processing question: What is the title of the page?


  result = qa_chain.run(question)


Answer: The title of the page is "2024 G20 Rio de Janeiro summit".


## Version MongloAtlas

In [1]:
import requests
from bs4 import BeautifulSoup
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.prompts import ChatPromptTemplate
from langchain_ollama.llms import OllamaLLM
from langchain.chains import RetrievalQA
from pymongo import MongoClient
from langchain_mongodb import MongoDBAtlasVectorSearch
import gradio as gr

# Conexión a MongoDB Atlas
MONGODB_ATLAS_CLUSTER_URI = "mongodb+srv://usuarioMongoAltas:contrasenha@cluster0.xmaru.mongodb.net/"
client = MongoClient(MONGODB_ATLAS_CLUSTER_URI)

DB_NAME = "vectorstore"  # Cambia este valor al nombre de tu base de datos
COLLECTION_NAME = "paginaweb"  # Cambia este valor al nombre de tu colección
ATLAS_VECTOR_SEARCH_INDEX_NAME = "g20_summit_index"

# Referencia a la colección de MongoDB
MONGODB_COLLECTION = client[DB_NAME][COLLECTION_NAME]

# Inicializa los embeddings de HuggingFace
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Configuración de MongoDB Atlas Vector Search
vector_store = MongoDBAtlasVectorSearch(
    collection=MONGODB_COLLECTION,
    embedding=embeddings,
    index_name=ATLAS_VECTOR_SEARCH_INDEX_NAME,
    relevance_score_fn="cosine",  # Usamos la similitud del coseno para la recuperación
)

# Modelo LLM
llm = OllamaLLM(model="llama3.2", server_url="http://localhost:11434")

# Crear el prompt para la cadena de preguntas y respuestas
prompt = ChatPromptTemplate.from_template(
    template="Use the context below to answer the user's question:\n\n{context}\n\nQuestion: {question}\nAnswer:"
)

# Función para recuperar documentos desde MongoDB usando el vector store
retriever = vector_store.as_retriever()

# Crear la cadena de preguntas y respuestas
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type_kwargs={"prompt": prompt}
)

# Función para responder preguntas basadas en el contenido de MongoDB
def responder_pregunta(pregunta):
    response = qa_chain.run(pregunta)
    return response

# Crear la interfaz de usuario de Gradio
interface = gr.Interface(
    fn=responder_pregunta,                # Función que maneja la lógica
    inputs="text",                         # Tipo de entrada (texto)
    outputs="text",                        # Tipo de salida (texto)
    live=True,                             # Activar respuesta en vivo
    title="Sistema de Respuestas G20",     # Título de la interfaz
    description="Introduce una pregunta sobre el G20 Summit 2024 para obtener información de la página web."
)

# Ejecutar la interfaz
interface.launch()


  from .autonotebook import tqdm as notebook_tqdm
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




  response = qa_chain.run(pregunta)
