In [None]:
!pip install langchain langchain-groq sentence-transformers chromadb langchain_community

In [3]:
import os
from google.colab import userdata

GROQ_API_KEY = userdata.get('GROQ_API_KEY')
os.environ['GROQ_API_KEY'] = GROQ_API_KEY

In [10]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
from langchain_groq import ChatGroq
llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0.1)

In [5]:
from langchain.embeddings import SentenceTransformerEmbeddings
embeddings = SentenceTransformerEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

  embeddings = SentenceTransformerEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [11]:
from langchain.vectorstores import Chroma
vectorstore = Chroma(collection_name="dynamic_kb", embedding_function=embeddings)

# storage for tracking document versions
document_store = {}

In [7]:
import time
from datetime import datetime
from langchain_core.documents import Document

def add_or_update_document(doc_id, content):
    """Add new document or update existing one"""

    ### Check if document changed
    if doc_id in document_store and document_store[doc_id]['content'] == content:
        print(f"📄 No changes in {doc_id}")
        return

    ### Remove old version if exists
    if doc_id in document_store:
        old_ids = document_store[doc_id]['vector_ids']
        vectorstore.delete(ids=old_ids)
        print(f"🗑️ Removed old version of {doc_id}")

    ### Add new version....
    doc = Document(
        page_content=content,
        metadata={
            "doc_id": doc_id,
            "timestamp": datetime.now().isoformat(),
            "version": int(time.time())
        }
    )

    result = vectorstore.add_documents([doc])

    ### Store tracking info..
    document_store[doc_id] = {
        'content': content,
        'vector_ids': result,
        'last_updated': datetime.now().isoformat()
    }

    print(f"✅ Updated {doc_id}")

In [8]:
def query_knowledge_base(question):
    """Query the dynamic knowledge base"""

    docs = vectorstore.similarity_search(question, k=3)

    if not docs:
        return "No information found."

    ### Context with timestamps
    context = ""
    for doc in docs:
        timestamp = doc.metadata.get('timestamp', 'unknown')
        context += f"[Updated: {timestamp[:10]}]\n{doc.page_content}\n\n"

    prompt = f"""
    Answer the question based on this information:

    {context}

    Question: {question}
    Answer:
    """

    response = llm.invoke(prompt)
    return response.content

In [12]:
# Add initial document
add_or_update_document("policy", """
Work Policy:
- Remote work: 2 days per week
- Vacation: 20 days per year
""")

answer = query_knowledge_base("What is the remote work policy?")
print(f"Answer: {answer}")

✅ Updated policy
Answer: The remote work policy is 2 days per week.


In [13]:
# Update the document
add_or_update_document("policy", """
Work Policy (Updated):
- Remote work: 4 days per week
- Vacation: 25 days per year
- Health days: 3 days per month
""")

# Query again to see updated info
answer = query_knowledge_base("What is the remote work policy?")
print(f"Updated Answer: {answer}")

🗑️ Removed old version of policy
✅ Updated policy
Updated Answer: The remote work policy is 4 days per week.


In [14]:
print(f"\nDocuments in store: {list(document_store.keys())}")
for doc_id, info in document_store.items():
    print(f"📄 {doc_id}: Last updated {info['last_updated'][:10]}")


Documents in store: ['policy']
📄 policy: Last updated 2025-08-07


In [15]:
def update_multiple_documents(documents_dict):
    """Update multiple documents at once"""

    print(f"🔄 Updating {len(documents_dict)} documents...")

    for doc_id, content in documents_dict.items():
        add_or_update_document(doc_id, content)

    print("✅ Batch update completed!")

In [16]:
documents = {
    "hr_policy": "HR Policy: New hiring process starts Monday",
    "tech_update": "Tech Update: New AI tools available for all teams",
    "office_news": "Office News: New coffee machine installed in break room"
}

update_multiple_documents(documents)

🔄 Updating 3 documents...
✅ Updated hr_policy
✅ Updated tech_update
✅ Updated office_news
✅ Batch update completed!


In [17]:
questions = [
    "What's new in HR?",
    "Any tech updates?",
    "What's happening in the office?"
]

for q in questions:
    print(f"\nQ: {q}")
    print(f"A: {query_knowledge_base(q)}")


Q: What's new in HR?
A: A new hiring process is starting on Monday.

Q: Any tech updates?
A: Yes, new AI tools are now available for all teams, as of the latest update on 2025-08-07.

Q: What's happening in the office?
A: A new coffee machine has been installed in the break room. Additionally, a new hiring process will be implemented starting Monday. The office also has a work policy in place, which includes allowing remote work for 2 days per week and providing 20 vacation days per year.


In [18]:
print(f"\nDocuments in store: {list(document_store.keys())}")
for doc_id, info in document_store.items():
    print(f"📄 {doc_id}: Last updated {info['last_updated'][:10]}")


Documents in store: ['policy', 'hr_policy', 'tech_update', 'office_news']
📄 policy: Last updated 2025-08-07
📄 hr_policy: Last updated 2025-08-07
📄 tech_update: Last updated 2025-08-07
📄 office_news: Last updated 2025-08-07


In [20]:
add_or_update_document("products", """
Product Catalog 2025:
- AI Assistant: $99/month
- Data Analytics Tool: $199/month
- Security Suite: $299/month
""")

print(query_knowledge_base("How much does the AI Assistant cost?"))

📄 No changes in products
The AI Assistant costs $99/month.


In [21]:
add_or_update_document("products", """
Product Catalog 2025 (New Pricing):
- AI Assistant: $79/month (20% off!)
- Data Analytics Tool: $149/month (price drop!)
- Security Suite: $299/month
- New: Mobile App: $49/month
""")

print(query_knowledge_base("What are the current AI Assistant prices?"))

🗑️ Removed old version of products
✅ Updated products
The current AI Assistant price is $79/month, which is 20% off the original price.


In [22]:
add_or_update_document("company_news", """
Company News - January 2025:
- Hired 50 new employees
- Opened office in Tokyo
- Revenue increased 30%
""")

✅ Updated company_news


In [23]:
add_or_update_document("company_news", """
Company News - February 2025:
- Hired 100 more employees (total 150 new hires)
- Tokyo office fully operational
- Revenue increased 45% (updated numbers)
- Acquired startup TechFlow
""")

🗑️ Removed old version of company_news
✅ Updated company_news


In [24]:
queries = [
    "How many new employees were hired?",
    "What's the latest revenue growth?",
    "Tell me about recent acquisitions"
]

for q in queries:
    print(f"\nQ: {q}")
    print(f"A: {query_knowledge_base(q)}")


Q: How many new employees were hired?
A: According to the Company News from February 2025, the company hired 100 more employees, and it is mentioned that this brings the total to 150 new hires. 

Answer: 150

Q: What's the latest revenue growth?
A: The latest revenue growth is 45%.

Q: Tell me about recent acquisitions
A: The company recently acquired a startup called TechFlow, as announced in the Company News for February 2025.


In [25]:
documents = {
    "meeting_notes": """
Weekly Team Meeting - March 2025:
- Project Alpha: On track, launching next month
- Budget approved for new hires
- Office renovation starts in April
""",

    "technical_specs": """
System Requirements:
- Python 3.9+
- RAM: 16GB minimum
- Storage: 500GB SSD
- GPU: NVIDIA RTX 4080 recommended
""",

    "customer_feedback": """
Recent Customer Reviews:
- "Excellent product, saved us 40% time" - TechCorp
- "Great support team, very responsive" - StartupXYZ
- "Could use more integrations" - BigCompany
"""
}

update_multiple_documents(documents)

🔄 Updating 3 documents...
✅ Updated meeting_notes
✅ Updated technical_specs
✅ Updated customer_feedback
✅ Batch update completed!


In [26]:
test_queries = [
    "When is Project Alpha launching?",
    "What are the system requirements?",
    "What do customers say about the product?",
    "Is there any feedback about support?",
    "What hardware do I need?"
]

for q in test_queries:
    print(f"\n❓ {q}")
    print(f"💬 {query_knowledge_base(q)}")


❓ When is Project Alpha launching?
💬 Project Alpha is launching next month, according to the Weekly Team Meeting notes from March 2025. Since the information was last updated on August 7, 2025, "next month" would refer to April 2025.

❓ What are the system requirements?
💬 The system requirements are:
1. Python 3.9 or later
2. A minimum of 16GB RAM
3. A 500GB SSD for storage
4. An NVIDIA RTX 4080 GPU (recommended)

❓ What do customers say about the product?
💬 Customers have provided positive reviews, stating that the product is "excellent" and has saved them a significant amount of time (40% as mentioned by TechCorp). They also appreciate the support team, describing them as "great" and "very responsive" (as mentioned by StartupXYZ). However, one customer (BigCompany) has suggested that the product "could use more integrations".

❓ Is there any feedback about support?
💬 Yes, there is feedback about support. According to the Recent Customer Reviews, StartupXYZ mentioned that the support

In [27]:
def show_knowledge_base_status():
    """Show current status of all documents"""
    print("\n📚 KNOWLEDGE BASE STATUS")
    print("="*40)

    if not document_store:
        print("No documents in knowledge base")
        return

    for doc_id, info in document_store.items():
        last_update = info['last_updated'][:19]
        content_preview = info['content'][:50] + "..." if len(info['content']) > 50 else info['content']
        print(f"📄 {doc_id}")
        print(f"   Last Updated: {last_update}")
        print(f"   Preview: {content_preview}")
        print()

show_knowledge_base_status()


📚 KNOWLEDGE BASE STATUS
📄 policy
   Last Updated: 2025-08-07T05:06:09
   Preview: 
Work Policy (Updated):
- Remote work: 4 days per ...

📄 hr_policy
   Last Updated: 2025-08-07T05:07:05
   Preview: HR Policy: New hiring process starts Monday

📄 tech_update
   Last Updated: 2025-08-07T05:07:05
   Preview: Tech Update: New AI tools available for all teams

📄 office_news
   Last Updated: 2025-08-07T05:07:05
   Preview: Office News: New coffee machine installed in break...

📄 products
   Last Updated: 2025-08-07T05:21:02
   Preview: 
Product Catalog 2025 (New Pricing):
- AI Assistan...

📄 company_news
   Last Updated: 2025-08-07T05:21:35
   Preview: 
Company News - February 2025:
- Hired 100 more em...

📄 meeting_notes
   Last Updated: 2025-08-07T05:21:54
   Preview: 
Weekly Team Meeting - March 2025:
- Project Alpha...

📄 technical_specs
   Last Updated: 2025-08-07T05:21:54
   Preview: 
System Requirements:
- Python 3.9+
- RAM: 16GB mi...

📄 customer_feedback
   Last Updated: 2025-08-07T