In [3]:
! pip install python-dotenv 
! pip install gradio 



In [4]:
import os
from dotenv import load_dotenv
from openai import OpenAI
import gradio as gr
from sentence_transformers import SentenceTransformer
import chromadb
# Initialize
load_dotenv()
openai_api_key = os.getenv('OPENAI_API_KEY')

openai = OpenAI()
MODEL = 'gpt-4o-mini'
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"

In [5]:
client = chromadb.PersistentClient(path="../databases/news")  # Use mounted volume path
collection = client.get_or_create_collection(name="processed_news")
embedding_model = SentenceTransformer(EMBEDDING_MODEL)
query_embedding = embedding_model.encode(["Sports"]).tolist()[0]

# Document retrieval
results = collection.query(
    query_embeddings=[query_embedding],
    n_results=3,
    include=["metadatas", "documents"]
)



In [6]:
results['metadatas'][0]

[{'author': '9news.com',
  'category': 'national',
  'link': 'https://www.9news.com/article/sports/high-school/prep-rally-honor-roll/73-ddbba0b2-5be6-4d3d-a508-4627fb4617e8',
  'published_date': 'Tue, 18 Mar 2025 20:08:47 GMT',
  'sub_category': 'high-school, local-sports, local, news, sports, hs-football, all-usa, hs-basketball, hs-baseball, home',
  'title': 'Prep Rally Honor Roll (3/18/25)'},
 {'author': '9news.com',
  'category': 'national',
  'link': 'https://www.9news.com/article/sports/high-school/prep-rally-honor-roll/73-ddbba0b2-5be6-4d3d-a508-4627fb4617e8',
  'published_date': 'Tue, 18 Mar 2025 20:08:47 GMT',
  'sub_category': 'high-school, local-sports, local, news, sports, hs-football, all-usa, hs-basketball, hs-baseball, home',
  'title': 'Prep Rally Honor Roll (3/18/25)'},
 {'author': 'Matthew Sullivan',
  'category': 'sports',
  'published_date': 'March 17, 2025 - 6:50PM',
  'sub_category': 'More Sports',
  'title': 'Bodybuilding star drops scary truth bomb'}]

In [7]:


system_message = "You are a helpful assistant"
def chat(message, history):
    client = chromadb.PersistentClient(path="../databases/news")  # Use mounted volume path
    collection = client.get_or_create_collection(name="processed_news")

    # Embedding model setup
    
    embedding_model = SentenceTransformer(EMBEDDING_MODEL)
    query_embedding = embedding_model.encode([message]).tolist()[0]

    # Document retrieval
    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=3,
        include=["metadatas", "documents"]
    )
    
    # Build context
    context = "\n".join(results['documents'][0])
    full_prompt = f"""Context information:
    {context}
    
    User Question: {message}
    
    Answer based on the context and your knowledge:"""



    messages = [{"role": "system", "content": system_message}] + history + [{"role": "user", "content": full_prompt}]

    print("History is:")
    print(history)
    print("And messages is:")
    print(messages)

    stream = openai.chat.completions.create(model=MODEL, messages=messages, stream=True)
    print(results['metadatas'][0])

    response = ""
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
 # Use:
        references = "\n".join([f"- {ref}" for ref in results['metadatas'][0]])
        yield f"{response}\n\nReferences:\n{references}"

In [None]:
gr.ChatInterface(fn=chat, type="messages").launch()

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




History is:
[]
And messages is:
[{'role': 'system', 'content': 'You are a helpful assistant'}, {'role': 'user', 'content': 'Context information:\n    watch the best high school sports moments of the week and vote for your favorite \nwatch the best high school sports moments of the week and vote for your favorite \nthe complex  called zebulon  will include spaces for basketball  baseball  softball  volleyball  football  soccer and hockey \n\n    User Question: Hi Tell me about sports\n\n    Answer based on the context and your knowledge:'}]
[{'author': '9news.com', 'category': 'national', 'link': 'https://www.9news.com/article/sports/high-school/prep-rally-honor-roll/73-ddbba0b2-5be6-4d3d-a508-4627fb4617e8', 'published_date': 'Tue, 18 Mar 2025 20:08:47 GMT', 'sub_category': 'high-school, local-sports, local, news, sports, hs-football, all-usa, hs-basketball, hs-baseball, home', 'title': 'Prep Rally Honor Roll (3/18/25)'}, {'author': '9news.com', 'category': 'national', 'link': 'https://