In [64]:
# import requests
# from bs4 import BeautifulSoup

# # Extrae los párrafos de la página web
# def extract_paragraphs_from_web(url):
#     # Realizamos la solicitud HTTP a la página web
#     response = requests.get(url)
#     if response.status_code != 200:
#         print(f"Error al acceder a la página: {response.status_code}")
#         return []

#     # Parseamos el contenido HTML de la página
#     soup = BeautifulSoup(response.text, 'html.parser')
    
#     # Buscamos el contenido principal. Modifica 'div.content' según la estructura del sitio web.
#     content = soup.find('div', {'class': 'content'})  
    
#     if not content:
#         print("No se pudo encontrar el contenido principal de la página.")
#         return []

#     # Extraemos los párrafos del contenido principal
#     paragraphs = content.find_all('p')
    
#     # Convertimos los elementos <p> a texto y eliminamos espacios innecesarios
#     return [paragraph.get_text(strip=True) for paragraph in paragraphs if paragraph.get_text(strip=True)]

# # URL del artículo
# url = "https://oyister.oyis.org/articles/book-review-the-stranger-by-albert-camus"

# # Llamada a la función
# web_paragraphs = extract_paragraphs_from_web(url)

# print(web_paragraphs)


In [65]:
import requests
from bs4 import BeautifulSoup

# Extrae el texto de la página web y divide cada línea en un fragmento de texto
def extract_lines_from_web(url):
    # Realizamos la solicitud HTTP a la página web
    response = requests.get(url)
    if response.status_code != 200:
        print(f"Error al acceder a la página: {response.status_code}")
        return []

    # Parseamos el contenido HTML de la página
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Extraemos el texto de la página web. El texto principal está en un artículo con la clase 'article-content'
    content = soup.find('div', {'class': 'content'})  
    
    if not content:
        print("No se pudo encontrar el contenido principal de la página.")
        return []

    # Extraemos todo el texto del artículo
    full_text = content.get_text("\n", strip=True)
    
    # Dividimos el texto en líneas
    lines = full_text.splitlines()
    
    # Filtramos líneas vacías y devolvemos solo las líneas no vacías
    return [line.strip() for line in lines if line.strip()]


url = "https://oyister.oyis.org/articles/book-review-the-stranger-by-albert-camus" 

web_lines = extract_lines_from_web(url)

print(web_lines)


['Book Review: The Stranger By Albert Camus', 'Review', 'Dec 16', 'Written By', 'Inhyuk K.', '“Maman died today. Or yesterday maybe, I don’t know. I got a telegram from the home: ‘Mother deceased. Funeral tomorrow. Faithfully yours.’ That doesn’t mean anything. Maybe it was yesterday.”', 'What an opening line.', 'The Stranger', 'written by French writer Albert Camus is a novel that is centered around the philosophical idea of existentialism.', 'It would be absurd to define existentialism in a paragraph, but here is the oversimplified summary of existentialism: the world is full of meaninglessness. There is no innate purpose or rules in our lives, and all of us are running towards the same ending: death. Existentialism says that in a meaningless world, we must make our own choices, bear responsibility for those choices, and find meaning and purpose on our own.', 'The main character of', 'The Stranger', ', Meursault, personifies existentialism. The opening line spoken by Meursault clearl

In [66]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

In [67]:
from langchain_chroma import Chroma


vector_store = Chroma.from_texts(
    texts=web_lines,
    collection_name="book_review",
    embedding=embeddings,
)

In [68]:
from langchain_chroma import Chroma

vector_store = Chroma(
    collection_name="book_review",
    embedding_function=embeddings,
)

In [69]:
retriever = vector_store.as_retriever()

In [111]:
question = "Tell me about the philosophy of the book"
docs = vector_store.similarity_search(question, k=8)
len(docs)

8

In [112]:
docs

[Document(metadata={}, page_content='written by French writer Albert Camus is a novel that is centered around the philosophical idea of existentialism.'),
 Document(metadata={}, page_content='written by French writer Albert Camus is a novel that is centered around the philosophical idea of existentialism.'),
 Document(metadata={}, page_content='written by French writer Albert Camus is a novel that is centered around the philosophical idea of existentialism.'),
 Document(metadata={}, page_content='. I learned that I am responsible to make decisions based on my own beliefs and values. This book inspired me to become less bound by societal expectations and to start inquiring about what kind of person I truly am.'),
 Document(metadata={}, page_content='. I learned that I am responsible to make decisions based on my own beliefs and values. This book inspired me to become less bound by societal expectations and to start inquiring about what kind of person I truly am.'),
 Document(metadata={}

In [72]:
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import ChatPromptTemplate
from langchain_ollama.chat_models import ChatOllama
from langchain_core.runnables import RunnableLambda, RunnablePassthrough

# Prompt
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

# Local LLM
ollama_llm = "llama3.2"
model_local = ChatOllama(model=ollama_llm)

# Chain
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model_local
    | StrOutputParser()
)

In [101]:
chain.invoke("Tell me about the philosophy of the book")

'The philosophy of the book is centered around existentialism, a philosophical idea that emphasizes individual freedom and choice. According to the context provided, this novel inspired its reader to become less bound by societal expectations and to question their own identity, suggesting that the book encourages readers to take personal responsibility for making decisions based on their own beliefs and values.'

In [102]:
chain.invoke("Tell me about the main character")

"The main character's name is Meursault, and he personifies existentialism. He is portrayed as an anomaly in society, not adhering to traditional expectations or values. Throughout the story, his actions and reactions suggest that he is cynical about societal norms and may be considered a psychopath by some standards. However, after reading the book, it seems that the author's view on Meursault has changed, indicating that there may be more depth to his character than initially meets the eye."

In [108]:
chain.invoke("What do you think about the review?")

"There is no information provided in the context about a review. The context only mentions four documents with identical metadata and page content ('a'). Therefore, it's not possible to form an opinion or answer a question about a review based on this context."