# Football News Assistant Using RAG

In [1]:
from uuid import uuid4
from dotenv import load_dotenv
from pathlib import Path
from langchain.chains import RetrievalQAWithSourcesChain
from langchain_community.document_loaders import UnstructuredURLLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_groq import ChatGroq
from langchain_huggingface.embeddings import HuggingFaceEmbeddings


In [2]:
load_dotenv()


True

##  Define Constants and Globals

In [3]:
CHUNK_SIZE = 1000
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
VECTORSTORE_DIR = Path().resolve() / "resources/vectorstore"
COLLECTION_NAME = "football_news"

llm = None
vector_store = None


## Define the Initialization Function

In [4]:
def initialize_components():
    global llm, vector_store

    if llm is None:
        llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0.9, max_tokens=500)

    if vector_store is None:
        ef = HuggingFaceEmbeddings(
            model_name=EMBEDDING_MODEL,
            model_kwargs={"trust_remote_code": True}
        )

        vector_store = Chroma(
            collection_name=COLLECTION_NAME,
            embedding_function=ef,
            persist_directory=str(VECTORSTORE_DIR)
        )

 ## Define the URL Processing Function

In [5]:
def process_urls(urls):
    print("Initializing Components")
    initialize_components()

    print("Resetting vector store...✅")
    vector_store.reset_collection()

    print("Loading data...✅")
    loader = UnstructuredURLLoader(urls=urls)
    data = loader.load()

    print("Splitting text into chunks...✅")
    text_splitter = RecursiveCharacterTextSplitter(
        separators=["\n\n", "\n", ".", " "],
        chunk_size=CHUNK_SIZE
    )
    docs = text_splitter.split_documents(data)

    print("Add chunks to vector database...✅")
    uuids = [str(uuid4()) for _ in range(len(docs))]
    vector_store.add_documents(docs, ids=uuids)

    print("Done adding docs to vector database...✅")

## Define the Query Function

In [6]:
def generate_answer(query):
    if not vector_store:
        raise RuntimeError("Vector database is not initialized ")

    chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vector_store.as_retriever())
    result = chain.invoke({"question": query}, return_only_outputs=True)
    sources = result.get("sources", "")

    return result['answer'], sources

 ## Run URLS

In [7]:
urls= ["https://supersport.com/football/spain/news/d403d482-9af8-4020-9ec9-9b7b6ba1297f/-we-can-do-big-things-worthy-of-real-madrid-says-alonso-as-he-starts-as-coach"]
process_urls(urls)

Initializing Components
Resetting vector store...✅
Loading data...✅
Splitting text into chunks...✅
Add chunks to vector database...✅
Done adding docs to vector database...✅


## Ask  Question 

In [None]:
answer, sources = generate_answer("What did Xabi Alonso say about his plans as Real Madrid coach?")
print(f"Answer: {answer}")
print(f"Sources: {sources}")

## User Interface using Gradio

In [9]:
import gradio as gr



def process_and_query(url, question):
    try:
        print(f" URL received: {url}")
        process_urls([url])

        answer, sources = generate_answer(question)
        return f"###  Answer\n{answer}\n\n---\n###  Sources\n{sources}"

    except Exception as e:
        return f" Error: {str(e)}"

with gr.Blocks() as demo:
    gr.Markdown("##  Football News Assistant")
    gr.Markdown("Enter a football news article URL and ask a question about it.")

    with gr.Row():
        url_input = gr.Textbox(label="News Article URL", placeholder="Paste a URL from SuperSport, etc.")
        question_input = gr.Textbox(label="Your Question", placeholder="e.g. What did Xabi Alonso say?")

    submit_btn = gr.Button("Submit")
    output_md = gr.Markdown("")

    submit_btn.click(fn=process_and_query, inputs=[url_input, question_input], outputs=output_md)

demo.launch()


Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




 URL received: https://supersport.com/football/spain/news/d403d482-9af8-4020-9ec9-9b7b6ba1297f/-we-can-do-big-things-worthy-of-real-madrid-says-alonso-as-he-starts-as-coach
Initializing Components
Resetting vector store...✅
Loading data...✅
Splitting text into chunks...✅
Add chunks to vector database...✅
Done adding docs to vector database...✅
