In [1]:
# ========================================
# 🤖 VECTOR CHATBOT WITH MEMORY
# Complete Implementation for Google Colab
# ========================================

# ========================================
# STEP 1: INSTALL DEPENDENCIES
# ========================================
print("📦 Installing dependencies...")
!pip -q install chromadb sentence-transformers transformers accelerate einops torch
print("✅ Installation complete!\n")

📦 Installing dependencies...
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.8/20.8 MB[0m [31m93.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m278.2/278.2 kB[0m [31m18.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m70.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m103.3/103.3 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.4/17.4 MB[0m [31m57.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.5/72.5 kB[0m

In [2]:
# ========================================
# STEP 2: IMPORT LIBRARIES
# ========================================
print("📚 Importing libraries...")

import os
import time
import uuid
import json
from dataclasses import dataclass, asdict
from typing import List, Dict, Optional, Tuple

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import chromadb
from chromadb.config import Settings
from sentence_transformers import SentenceTransformer

print("✅ Libraries imported!\n")

📚 Importing libraries...
✅ Libraries imported!



In [3]:
# ========================================
# STEP 3: CONFIGURATION
# ========================================
print("⚙️ Setting up configuration...")

# Model Settings
CHAT_MODEL = "Qwen/Qwen2.5-0.5B-Instruct"
EMBED_MODEL = "all-MiniLM-L6-v2"

# Database Settings
PERSIST_DIR = "/content/memory_db"
COLLECTION_NAME = "chat_memory"

# Chat Settings
MAX_NEW_TOKENS = 256
TEMPERATURE = 0.7
TOP_K_RETRIEVAL = 6
CONTEXT_CHAR_LIMIT = 1600

# User Settings
USER_ID = "colab_user"
CONVERSATION_ID = "conv_001"

# System Prompt
SYSTEM_PROMPT = (
    "You are a helpful and friendly assistant. "
    "When you remember something from past conversations, mention it naturally. "
    "Keep responses concise and helpful."
)

# Create persist directory
os.makedirs(PERSIST_DIR, exist_ok=True)
print(f"✅ Configuration ready! Database: {PERSIST_DIR}\n")


⚙️ Setting up configuration...
✅ Configuration ready! Database: /content/memory_db



In [4]:
# ========================================
# STEP 4: EMBEDDING HANDLER CLASS
# ========================================
print("🔢 Creating Embedding Handler...")

class EmbeddingHandler:
    """Converts text to vector embeddings"""

    def __init__(self, model_name: str = EMBED_MODEL):
        print(f"  Loading embedding model: {model_name}...")
        self.model = SentenceTransformer(model_name)
        print(f"  ✅ Embedding model loaded!")

    def embed_texts(self, texts: List[str]) -> List[List[float]]:
        """Convert texts to embeddings"""
        vectors = self.model.encode(
            texts,
            convert_to_numpy=True,
            normalize_embeddings=True
        )
        return [vec.tolist() for vec in vectors]

    def embed_single(self, text: str) -> List[float]:
        """Convert single text to embedding"""
        return self.embed_texts([text])[0]

print("✅ Embedding Handler ready!\n")

# ========================================
# STEP 5: MEMORY ITEM CLASS
# ========================================

@dataclass
class MemoryItem:
    """Single memory entry"""
    id: str
    role: str
    content: str
    user_id: str
    conversation_id: str
    created_at: float

🔢 Creating Embedding Handler...
✅ Embedding Handler ready!



In [5]:
# ========================================
# STEP 6: MEMORY MANAGER CLASS
# ========================================
print("💾 Creating Memory Manager...")

class MemoryManager:
    """Manages conversation memory"""

    def __init__(self):
        print("  Initializing ChromaDB...")

        # Create embedder
        self.embedder = EmbeddingHandler()

        # Setup ChromaDB
        self.client = chromadb.PersistentClient(
            path=PERSIST_DIR,
            settings=Settings(anonymized_telemetry=False)
        )

        self.collection = self.client.get_or_create_collection(
            name=COLLECTION_NAME,
            metadata={"hnsw:space": "cosine"}
        )

        print(f"  ✅ Memory ready! Stored items: {self.collection.count()}")

    def add(self, items: List[MemoryItem]):
        """Add memories"""
        if not items:
            return

        texts = [item.content for item in items]
        embeddings = self.embedder.embed_texts(texts)

        self.collection.add(
            ids=[item.id for item in items],
            documents=texts,
            metadatas=[asdict(item) for item in items],
            embeddings=embeddings
        )

    def retrieve(self, query: str, top_k: int = 6) -> List[Tuple[str, Dict, float]]:
        """Retrieve relevant memories"""
        query_embedding = self.embedder.embed_single(query)

        results = self.collection.query(
            query_embeddings=[query_embedding],
            n_results=top_k,
            where={"user_id": USER_ID},
            include=["documents", "metadatas", "distances"]
        )

        docs = results.get("documents", [[]])[0]
        metas = results.get("metadatas", [[]])[0]
        dists = results.get("distances", [[]])[0]

        return list(zip(docs, metas, dists))

    def build_context(self, retrieved: List[Tuple[str, Dict, float]]) -> str:
        """Build context from memories"""
        if not retrieved:
            return "(no relevant memory)"

        chunks = []
        used_chars = 0

        for i, (doc, meta, dist) in enumerate(retrieved, start=1):
            chunk = f"[memory {i}] {meta.get('role')}: {doc.strip()}\n"

            if used_chars + len(chunk) > CONTEXT_CHAR_LIMIT:
                break

            chunks.append(chunk)
            used_chars += len(chunk)

        return "\n".join(chunks) if chunks else "(no relevant memory)"

    def create_memory_item(self, role: str, content: str) -> MemoryItem:
        """Create new memory"""
        return MemoryItem(
            id=str(uuid.uuid4()),
            role=role,
            content=content,
            user_id=USER_ID,
            conversation_id=CONVERSATION_ID,
            created_at=time.time()
        )

    def count(self) -> int:
        """Get memory count"""
        return self.collection.count()

    def search(self, query: str, top_k: int = 5) -> List[Dict]:
        """Search memories"""
        results = self.retrieve(query, top_k=top_k)

        formatted = []
        for doc, meta, dist in results:
            formatted.append({
                "role": meta.get("role"),
                "content": doc,
                "distance": round(dist, 3),
                "timestamp": time.strftime("%Y-%m-%d %H:%M:%S",
                                          time.localtime(meta.get("created_at", 0)))
            })

        return formatted

print("✅ Memory Manager ready!\n")


💾 Creating Memory Manager...
✅ Memory Manager ready!



In [6]:
# ========================================
# STEP 7: CHATBOT CLASS
# ========================================
print("🤖 Creating Chatbot...")

class Chatbot:
    """Main chatbot with memory"""

    def __init__(self):
        print("\n" + "="*50)
        print("  INITIALIZING CHATBOT")
        print("="*50 + "\n")

        # Initialize memory
        self.memory = MemoryManager()

        # Load chat model
        print(f"  Loading chat model: {CHAT_MODEL}...")
        self.tokenizer = AutoTokenizer.from_pretrained(CHAT_MODEL)

        self.model = AutoModelForCausalLM.from_pretrained(
            CHAT_MODEL,
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
            device_map="auto"
        )

        self.pipeline = pipeline(
            "text-generation",
            model=self.model,
            tokenizer=self.tokenizer
        )

        print("  ✅ Chat model loaded!")
        print("\n" + "="*50)
        print("  ✅ CHATBOT READY!")
        print("="*50 + "\n")

    def generate_response(self, messages: List[Dict[str, str]]) -> str:
        """Generate model response"""
        prompt = self.tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )

        output = self.pipeline(
            prompt,
            max_new_tokens=MAX_NEW_TOKENS,
            do_sample=True,
            temperature=TEMPERATURE,
            top_p=0.9,
            top_k=50
        )

        generated_text = output[0]["generated_text"][len(prompt):]
        return generated_text.strip()

    def chat(self, user_message: str) -> str:
        """Main chat function"""
        # Retrieve context
        retrieved = self.memory.retrieve(user_message, top_k=TOP_K_RETRIEVAL)
        context = self.memory.build_context(retrieved)

        # Build messages
        messages = [
            {"role": "system", "content": SYSTEM_PROMPT},
            {
                "role": "user",
                "content": f"PAST CONTEXT:\n{context}\n\nQUESTION: {user_message}"
            }
        ]

        # Generate response
        response = self.generate_response(messages)

        # Store in memory
        user_item = self.memory.create_memory_item("user", user_message)
        assistant_item = self.memory.create_memory_item("assistant", response)
        self.memory.add([user_item, assistant_item])

        return response

    def search_memory(self, query: str, top_k: int = 5) -> List[Dict]:
        """Search conversation history"""
        return self.memory.search(query, top_k=top_k)

    def memory_count(self) -> int:
        """Get total memories"""
        return self.memory.count()

    def seed_memory(self, conversations: List[tuple]):
        """Seed initial memories"""
        items = []
        for role, content in conversations:
            items.append(self.memory.create_memory_item(role, content))

        self.memory.add(items)
        print(f"✓ Seeded {len(items)} memories\n")

print("✅ Chatbot class ready!\n")

🤖 Creating Chatbot...
✅ Chatbot class ready!



In [7]:
# ========================================
# STEP 8: INITIALIZE CHATBOT
# ========================================
print("🚀 Starting chatbot initialization...\n")

bot = Chatbot()

🚀 Starting chatbot initialization...


  INITIALIZING CHATBOT

  Initializing ChromaDB...
  Loading embedding model: all-MiniLM-L6-v2...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

  ✅ Embedding model loaded!
  ✅ Memory ready! Stored items: 0
  Loading chat model: Qwen/Qwen2.5-0.5B-Instruct...


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/659 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors:   0%|          | 0.00/988M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

Device set to use cpu


  ✅ Chat model loaded!

  ✅ CHATBOT READY!



In [8]:
# ========================================
# STEP 9: SEED INITIAL MEMORIES
# ========================================
print("🌱 Seeding initial memories...")

bot.seed_memory([
    ("user", "My name is Alex"),
    ("assistant", "Nice to meet you, Alex!"),
    ("user", "I prefer Python examples over JavaScript"),
    ("assistant", "Got it! I'll use Python for examples."),
    ("user", "I'm learning machine learning"),
    ("assistant", "That's great! ML is fascinating!")
])


🌱 Seeding initial memories...
✓ Seeded 6 memories



In [9]:
# ========================================
# STEP 10: INTERACTIVE CHAT FUNCTION
# ========================================

def chat_with_bot():
    """Interactive chat function"""
    print("\n" + "="*60)
    print("🎯 CHATBOT STARTED!")
    print("="*60)
    print("Commands:")
    print("  • Just type to chat")
    print("  • 'search: <query>' - Search memory")
    print("  • 'count' - Show total memories")
    print("  • 'exit' - Stop chatting")
    print("="*60 + "\n")

    while True:
        try:
            user_input = input("You: ").strip()

            if not user_input:
                continue

            # Exit
            if user_input.lower() in ["exit", "quit", "stop"]:
                print("\n👋 Goodbye!\n")
                break

            # Memory count
            if user_input.lower() == "count":
                count = bot.memory_count()
                print(f"📊 Total memories: {count}\n")
                continue

            # Memory search
            if user_input.lower().startswith("search:"):
                query = user_input[7:].strip()
                if query:
                    print(f"\n🔍 Searching for: '{query}'")
                    results = bot.search_memory(query, top_k=3)

                    if results:
                        for i, result in enumerate(results, 1):
                            print(f"\n[{i}] {result['role']} (similarity: {result['distance']})")
                            print(f"    Time: {result['timestamp']}")
                            print(f"    Content: {result['content']}")
                    else:
                        print("No results found.")
                    print()
                continue

            # Regular chat
            print("Assistant: ", end="", flush=True)
            response = bot.chat(user_input)
            print(response + "\n")

        except KeyboardInterrupt:
            print("\n\n👋 Goodbye!\n")
            break
        except Exception as e:
            print(f"❌ Error: {e}\n")

In [10]:
# ========================================
# STEP 11: DEMO CONVERSATIONS
# ========================================

print("\n" + "="*60)
print("📝 DEMO CONVERSATIONS")
print("="*60 + "\n")

demo_questions = [
    "What's my name?",
    "What programming language do I prefer?",
    "What am I learning?",
]

for question in demo_questions:
    print(f"You: {question}")
    response = bot.chat(question)
    print(f"Assistant: {response}\n")


📝 DEMO CONVERSATIONS

You: What's my name?
Assistant: Your name is Alex.

You: What programming language do I prefer?
Assistant: I prefer Python.

You: What am I learning?
Assistant: You are learning about machine learning.



In [11]:
# ========================================
# STEP 12: START INTERACTIVE CHAT
# ========================================

print("\n" + "="*60)
print("Now you can chat! Run the function below:")
print("="*60 + "\n")

# Uncomment the line below to start chatting
# chat_with_bot()

print("""
To start chatting, run:
    chat_with_bot()

Or chat directly:
    response = bot.chat("Your message here")
    print(response)

Search memory:
    results = bot.search_memory("query")
    for r in results:
        print(r)

Check memory count:
    print(f"Total memories: {bot.memory_count()}")
""")


Now you can chat! Run the function below:


To start chatting, run:
    chat_with_bot()

Or chat directly:
    response = bot.chat("Your message here")
    print(response)

Search memory:
    results = bot.search_memory("query")
    for r in results:
        print(r)

Check memory count:
    print(f"Total memories: {bot.memory_count()}")



In [12]:
chat_with_bot()


🎯 CHATBOT STARTED!
Commands:
  • Just type to chat
  • 'search: <query>' - Search memory
  • 'count' - Show total memories
  • 'exit' - Stop chatting

You: hii
Assistant: Hello! How can I assist you today?

You: whats your name?
Assistant: Your name is Alex.

You: my name is sayandip 
Assistant: Hello Sayandip! How can I assist you today?

You: can you teach me python
Assistant: Sure! If you're interested in learning Python, I'd recommend starting with the official Python documentation, which is a great resource for getting started. You can find extensive tutorials, examples, and explanations on various topics within Python, including data science, web development, and more. Additionally, many online courses and platforms like Coursera, Udemy, and edX offer free or paid options that can be very beneficial for beginners. For practical coding projects, consider looking at Python libraries such as NumPy, Pandas, and Matplotlib to get hands-on experience with specific areas of interest.



In [13]:
# Single message
response = bot.chat("Tell me a joke")
print(response)

Why did the tomato turn red?
Because it saw the salad dressing!


In [14]:
# Search past conversations
results = bot.search_memory("name", top_k=3)
for r in results:
    print(f"{r['role']}: {r['content']}")

user: What's my name?
user: whats your name?
user: My name is Alex


In [15]:
print(f"Total memories: {bot.memory_count()}")

Total memories: 22
