In [None]:
import bs4
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
import os

# Cargar contenido de una página web usando WebBaseLoader
def load_web_content(url):
    print(f"Loading content from {url}...")
    loader = WebBaseLoader(url)
    documents = loader.load()
    print(f"Loaded {len(documents)} document(s) from the webpage.")
    return documents

# Dividir el contenido cargado en fragmentos más pequeños
def split_text(documents):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    print("Splitting text into chunks...")
    chunks = text_splitter.split_documents(documents)
    print(f"Split into {len(chunks)} chunks.")
    return chunks

# Crear un índice usando Chroma para almacenar los embeddings
def create_chroma_index(chunks):
    os.environ["OPENAI_API_KEY"] = "Ap_key"
    embeddings = OpenAIEmbeddings()
    print("Creating Chroma index...")
    vector_store = Chroma.from_documents(chunks, embeddings)
    return vector_store

# Función para generar respuestas basadas en una consulta
def generate_response(query, vector_store):
    print(f"Generating response for query: {query}")
    retriever = vector_store.as_retriever()
    response = retriever.get_relevant_documents(query)
    return response

# Aplicación principal
def main(url, query):
    # 1. Cargar contenido de la web
    documents = load_web_content(url)
    
    # 2. Dividir el contenido en fragmentos más pequeños
    chunks = split_text(documents)
    
    # 3. Crear el índice Chroma
    vector_store = create_chroma_index(chunks)
    
    # 4. Generar respuesta basada en la consulta
    response = generate_response(query, vector_store)
    
    # 5. Mostrar la respuesta
    print("Generated Response: ")
    for res in response:
        print(res['text'])

# Ejemplo de uso
if __name__ == "__main__":
    url = "https://es.wikipedia.org/wiki/Inteligencia_artificial"  
    query = "What is the main topic of the website?" 
    main(url, query)


Loading content from https://es.wikipedia.org/wiki/Inteligencia_artificial...
Loaded 1 document(s) from the webpage.
Splitting text into chunks...
Split into 152 chunks.
Creating Chroma index...


AuthenticationError: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-proj-********************************************************************************************************************************************************J0EA. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}