In [6]:
! pip install python-dotenv 
! pip install gradio 

Collecting gradio
  Downloading gradio-5.22.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.1-py3-none-macosx_11_0_arm64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (from gradio)
  Downloading safehttpx-0.1.6-py3-none-any.whl.metadata (4.2 kB)
Collecting semantic-version~=2.0 (from gradio)
  Downloadi

In [12]:
import os
from dotenv import load_dotenv
from openai import OpenAI
import gradio as gr
from sentence_transformers import SentenceTransformer
import chromadb
# Initialize
load_dotenv()
openai_api_key = os.getenv('OPENAI_API_KEY')

openai = OpenAI()
MODEL = 'gpt-4o-mini'
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"

In [None]:
client = chromadb.PersistentClient(path="../databases/news")  # Use mounted volume path
collection = client.get_or_create_collection(name="processed_news")
embedding_model = SentenceTransformer(EMBEDDING_MODEL)
query_embedding = embedding_model.encode(["Sports"]).tolist()[0]

# Document retrieval
results = collection.query(
    query_embeddings=[query_embedding],
    n_results=3,
    include=["metadatas", "documents"]
)



In [30]:
results['metadatas'][0]

[{'author': '9news.com',
  'category': 'national',
  'link': 'https://www.9news.com/article/sports/high-school/prep-rally-honor-roll/73-ddbba0b2-5be6-4d3d-a508-4627fb4617e8',
  'published_date': 'Tue, 18 Mar 2025 20:08:47 GMT',
  'sub_category': 'high-school, local-sports, local, news, sports, hs-football, all-usa, hs-basketball, hs-baseball, home',
  'title': 'Prep Rally Honor Roll (3/18/25)'},
 {'author': 'Matthew Sullivan',
  'category': 'sport',
  'published_date': 'March 17, 2025 - 6:50PM',
  'sub_category': 'More Sports',
  'title': 'Bodybuilding star drops scary truth bomb'},
 {'author': 'Alex Smith',
  'category': 'sport',
  'link': 'https://www.news.com.au/sport/olympics/olympic-legend-bradley-wiggins-comes-clean-on-horrifying-ordeal/news-story/20c3a08b0dc6a804a7178b6650b0b33a',
  'published_date': 'March 10, 2025 - 2:08PM',
  'sub_category': 'Olympics',
  'title': 'Olympic legend’s horrifying revelations'}]

In [32]:


system_message = "You are a helpful assistant"
def chat(message, history):
    client = chromadb.PersistentClient(path="../databases/news")  # Use mounted volume path
    collection = client.get_or_create_collection(name="processed_news")

    # Embedding model setup
    
    embedding_model = SentenceTransformer(EMBEDDING_MODEL)
    query_embedding = embedding_model.encode([message]).tolist()[0]

    # Document retrieval
    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=3,
        include=["metadatas", "documents"]
    )
    
    # Build context
    context = "\n".join(results['documents'][0])
    full_prompt = f"""Context information:
    {context}
    
    User Question: {message}
    
    Answer based on the context and your knowledge:"""



    messages = [{"role": "system", "content": system_message}] + history + [{"role": "user", "content": full_prompt}]

    print("History is:")
    print(history)
    print("And messages is:")
    print(messages)

    stream = openai.chat.completions.create(model=MODEL, messages=messages, stream=True)
    print(results['metadatas'][0])

    response = ""
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
 # Use:
        references = "\n".join([f"- {ref}" for ref in results['metadatas'][0]])
        yield f"{response}\n\nReferences:\n{references}"

In [None]:
gr.ChatInterface(fn=chat, type="messages").launch()