In [None]:
!pip install langchain langchain_ollama
!pip install chromadb sentence-transformers langchain_huggingface langchain_chroma
!pip install gradio
!pip install bs4


## Librerias que necesitamos

In [23]:
import requests
from bs4 import BeautifulSoup
from langchain.prompts import ChatPromptTemplate
from langchain_ollama.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

In [39]:

url = 'https://en.wikipedia.org/wiki/Deportivo_de_La_Coru%C3%B1a'
response = requests.get(url)
if response.status_code == 200:
    soup = BeautifulSoup(response.content, 'html.parser')
    text = soup.get_text(separator="\n", strip=True)

In [25]:
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=1000,  
    chunk_overlap=100,  
    length_function=len
)
chunks = text_splitter.split_text(text)

Created a chunk of size 1056, which is longer than the specified 1000


In [27]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

In [28]:
from langchain_chroma import Chroma


vector_store = Chroma.from_texts(
    texts=chunks,
    collection_name="futbol",
    embedding=embeddings,
    persist_directory="./datasets"
)

In [29]:
results = vector_store.similarity_search_with_score(
    "The History of Football", k=3,
)
for res, score in results:
    print(f"* [SIM={score:3f}] {res.page_content}")


* [SIM=0.432555] . Professional Football Researchers Association. Archived from
the original
(PDF)
on 11 June 2014
. Retrieved
26 January
2010
.
^
a
b
Meacham, Scott (2006).
"Old Division Football, The Indigenous Mob Soccer Of Dartmouth College (pdf)"
(PDF)
. dartmo.com.
Archived
(PDF)
from the original on 16 June 2007
. Retrieved
16 May
2007
.
^
Lewis, Guy M. (1969). "Teddy Roosevelt's Role in the 1905 Football Controversy".
The Research Quarterly
.
40
(4): 717–724.
PMID
4903389
.
^
A history of Winchester College. by Arthur F Leach. Duckworth, 1899
ISBN
1-4446-5884-0
^
"2003, "Richard Mulcaster"
"
. Footballnetwork.org. Archived from
the original
on 15 April 2010
. Retrieved
19 June
2010
.
^
Francis Peabody Magoun. (1938) History of football from the beginnings to 1871. p.27. Retrieved 2010-02-09.
^
Rowley, Christopher (2015).
The Shared Origins of Football, Rugby, and Soccer
. Rowman & Littlefield. p. 86.
ISBN
978-1-4422-4619-5
.
Archived
from the original on 27 February 2023
. Retr

In [30]:
retriever = vector_store.as_retriever(
    search_type="similarity",  search_kwargs={"k": 3}
)

In [None]:
query = "What is football"
results = retriever.get_relevant_documents(query)  

for i, result in enumerate(results):
    print(f"Resultado {i+1}:")
    print(result.page_content) 
    print()

Resultado 1:
Goals or points resulting from players putting the ball between two
goalposts
.
The goal or line being
defended
by the opposing team.
Players using only their body to move the ball, i.e. no additional equipment such as bats or sticks.
In all codes, common skills include
passing
,
tackling
, evasion of tackles, catching and
kicking
.
[
10
]
In most codes, there are rules restricting the movement of players
offside
, and players scoring a goal must put the ball either under or over a
crossbar
between the
goalposts
.
Etymology
Main article:
Football
(word)
There are conflicting explanations of the origin of the word "football".  It is widely assumed that the word "football" (or the phrase "foot ball") refers to the action of the foot kicking a ball.
[
13
]
There is an alternative explanation, which is that football originally referred to a variety of games in
medieval Europe
that were played
on foot
.
[
14
]
There is no conclusive evidence for either explanation.
Early histor

In [33]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

ollama_llm = "llama3.2"
model_local = ChatOllama(model=ollama_llm)

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model_local
    | StrOutputParser()
)

In [43]:
chain.invoke("When was Deportivo de La Coruña founded?")

'There is no mention of Deportivo de La Coruña in the provided context. The documents appear to be related to the history of football, but they do not provide information about a team called Deportivo de La Coruña.'

## Crear GUI

In [41]:
def search_chroma(query, top_k):
    try:   
        results = vector_store.similarity_search(query, k=top_k)
        return "\n\n".join(
            [f"**Result {i+1}:**\n{doc.page_content}" for i, doc in enumerate(results)]
        )
    except Exception as e:
        return f"Error: {e}"

In [42]:
import gradio as gr

In [44]:
with gr.Blocks() as demo:
    gr.Markdown("### Chroma Database Search")
    
    with gr.Row():
        query_input = gr.Textbox(label="Enter Your Query", placeholder="Type your question here...")
        top_k_input = gr.Slider(1, 10, step=1, value=5, label="Number of Results")

    search_button = gr.Button("Search")
    output_box = gr.Textbox(label="Search Results", lines=15)

    search_button.click(fn=search_chroma, inputs=[query_input, top_k_input], outputs=output_box)

demo.launch()

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


