In [2]:
pip install -r requirements.txt




In [3]:
import requests
from datetime import datetime, timezone
from sentence_transformers import SentenceTransformer
from pinecone import Pinecone, ServerlessSpec
import schedule
import time

# ========================
# CONFIGURATION
# ========================
PINECONE_API_KEY = "pcsk_6e4xVY_Cyg5dTpVDnq1VczBX6PmPfX6264qWAH8Gur7PGqc9kigACZSNAAFdsN18HG5asp"
INDEX_NAME = "wikipedia-knowledge"

# Wikipedia topics to track
WIKI_TOPICS = [
    "Artificial_intelligence",
    "Machine_learning",
    "Space_exploration",
    "Climate_change",
    "Python_(programming_language)"
]

# ========================
# INITIALIZE
# ========================
model = SentenceTransformer("all-MiniLM-L6-v2")
pc = Pinecone(api_key=PINECONE_API_KEY)

# Create index if not exists
if INDEX_NAME not in [idx["name"] for idx in pc.list_indexes()]:
    pc.create_index(
        name=INDEX_NAME,
        dimension=384,  # embedding size for all-MiniLM-L6-v2
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )

index = pc.Index(INDEX_NAME)

# ========================
# FUNCTIONS
# ========================
def fetch_wikipedia_summary(topic):
    """Fetch summary text for a given Wikipedia topic."""
    url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{topic}"
    try:
        r = requests.get(url, timeout=10)
        r.raise_for_status()
        data = r.json()
        return data.get("extract", ""), data.get("content_urls", {}).get("desktop", {}).get("page", "")
    except Exception as e:
        print(f"⚠ Error fetching {topic}: {e}")
        return "", ""

def update_knowledge():
    print("🔄 Running update pipeline...")
    for topic in WIKI_TOPICS:
        text, source_url = fetch_wikipedia_summary(topic)
        if text:
            embedding = model.encode(text).tolist()
            now = datetime.now(timezone.utc).isoformat()
            index.upsert([
                {
                    "id": f"{topic}-{now}",
                    "values": embedding,
                    "metadata": {
                        "topic": topic,
                        "source": source_url,
                        "text": text,
                        "time": now
                    }
                }
            ])
    print("✅ Update complete.")

def search_knowledge(query, top_k=3):
    """Search for relevant Wikipedia summaries in Pinecone."""
    query_embedding = model.encode(query).tolist()
    results = index.query(vector=query_embedding, top_k=top_k, include_metadata=True)
    return [
        {
            "topic": match["metadata"]["topic"],
            "source": match["metadata"]["source"],
            "text": match["metadata"]["text"],
            "time": match["metadata"]["time"]
        }
        for match in results["matches"]
    ]

# ========================
# SCHEDULER: update every hour
# ========================
schedule.every(1).hours.do(update_knowledge)

# Run once at startup
update_knowledge()

# ========================
# INTERACTIVE CHAT LOOP
# ========================
while True:
    schedule.run_pending()
    user_input = input("\nAsk me something (type 'exit' to quit): ")
    if user_input.lower() in ["exit", "quit"]:
        break

    results = search_knowledge(user_input, top_k=1)
    if results:
        res = results[0]
        print(f"\nTopic: {res['topic']}")
        print(f"Source: {res['source']}")
        print(f"Text snippet: {res['text'][:500]}...")
    else:
        print("No results found.")

🔄 Running update pipeline...
✅ Update complete.



Ask me something (type 'exit' to quit):  what is python



Topic: Python_(programming_language)
Source: https://en.wikipedia.org/wiki/Python_(programming_language)
Text snippet: Python is a high-level, general-purpose programming language. Its design philosophy emphasizes code readability with the use of significant indentation....



Ask me something (type 'exit' to quit):  exit
