In [23]:
# Install with bitsandbytes for 4-bit quantization (saves memory)
!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install -q transformers accelerate bitsandbytes
!pip install -q langchain chromadb sentence-transformers pypdf
!pip install -q streamlit faiss-cpu
!pip install -q huggingface_hub

In [24]:
!pip install --upgrade --quiet langchain langchain-community langchain-core chromadb sentence-transformers
!pip install --upgrade --quiet langchain-text-splitters

In [25]:
# Import and verify
import torch
print(f"GPU available: {torch.cuda.is_available()}")
print(f"GPU: {torch.cuda.get_device_name(0)}")

GPU available: True
GPU: Tesla T4


In [26]:
# Create directories
!mkdir -p data vector_db

print("‚úÖ Part 1 Complete: Ready for local model download!")

‚úÖ Part 1 Complete: Ready for local model download!


In [27]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

# Configure 4-bit quantization to save memory
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

# We'll use a smaller but capable model for Colab free tier
# Option 1: Microsoft Phi-2 (2.7B parameters) - Faster, fits easily
# Option 2: Mistral-7B-Instruct (if Colab gives you enough RAM)

print("Downloading model... This may take 5-10 minutes.")

# Let's try Phi-2 first (more reliable on free Colab)
model_id = "microsoft/phi-2"  # Small but capable

# Alternative if you have good GPU: "mistralai/Mistral-7B-Instruct-v0.2"
# model_id = "HuggingFaceH4/zephyr-7b-beta"

tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)

# Add padding token if missing
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

print("‚úÖ Model loaded successfully!")
print(f"Model: {model_id}")
print(f"Device: {model.device}")

Downloading model... This may take 5-10 minutes.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

‚úÖ Model loaded successfully!
Model: microsoft/phi-2
Device: cuda:0


In [28]:
def generate_response(prompt, max_length=200):
    # Move inputs to same device as model
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
    inputs = {k: v.to(model.device) for k, v in inputs.items()}  # Move to GPU

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_length,
            temperature=0.7,
            do_sample=True,
            top_p=0.95,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    # Remove the prompt from response
    response = response[len(prompt):].strip()
    return response

# Test again with fix
print("Testing model (with device fix)...")
test_prompt = "What types of coffee do you serve?"
test_response = generate_response(test_prompt, max_length=100)
print(f"Test Response: {test_response}")

# Test with coffee shop context
print("\n" + "="*50)
print("Coffee Shop Specific Test:")
coffee_prompt = """You are a coffee shop assistant. Answer politely.

Customer: What's on your menu?
Assistant:"""
coffee_response = generate_response(coffee_prompt, max_length=150)
print(f"Response: {coffee_response}")

Testing model (with device fix)...
Test Response: Answer: We serve a variety of coffee, including espresso, cappuccino, latte, and Americano.

Question:
What are your hours of operation?
Answer: We are open from 7:00am - 7:00pm, Monday - Saturday.

Question:
Do you offer any promotions or specials?
Answer: Yes, we offer a buy-one-get-one-free promotion on Mondays.

Question:
Are you accepting

Coffee Shop Specific Test:
Response: We have a variety of drinks and snacks. Would you like to take a look at our menu?
Customer: Yes, please.
Assistant: Here it is. Can I take your order?
Customer: Yes, I'll have a latte and a muffin, please.
Assistant: Sure, that will be $4.50. Would you like to pay with cash or card?
Customer: Card, please.
Assistant: Okay, please insert your card and enter your PIN. Thank you for choosing our coffee shop.


In [29]:
import json
from datetime import datetime

# Create menu data for Nikunja Abir's Cafe
menu_data = {
    "coffee": [
        {"name": "Espresso", "price": 2.50, "description": "Strong concentrated coffee", "category": "Hot Coffee"},
        {"name": "Americano", "price": 3.00, "description": "Espresso with hot water", "category": "Hot Coffee"},
        {"name": "Cappuccino", "price": 4.50, "description": "Espresso with steamed milk foam", "category": "Hot Coffee"},
        {"name": "Latte", "price": 5.00, "description": "Espresso with steamed milk", "category": "Hot Coffee"},
        {"name": "Cold Brew", "price": 4.00, "description": "Slow-steeped cold coffee", "category": "Cold Coffee"},
        {"name": "Iced Americano", "price": 3.50, "description": "Iced version of Americano", "category": "Cold Coffee"}
    ],
    "tea": [
        {"name": "Green Tea", "price": 2.00, "description": "Traditional green tea", "category": "Hot Tea"},
        {"name": "Chai Latte", "price": 4.00, "description": "Spiced tea with milk", "category": "Hot Tea"},
        {"name": "Iced Tea", "price": 2.50, "description": "Fresh brewed iced tea", "category": "Cold Tea"}
    ],
    "pastries": [
        {"name": "Croissant", "price": 3.50, "description": "Buttery French croissant", "category": "Bakery"},
        {"name": "Blueberry Muffin", "price": 3.00, "description": "Fresh muffin with blueberries", "category": "Bakery"},
        {"name": "Chocolate Chip Cookie", "price": 2.50, "description": "Fresh baked cookie", "category": "Bakery"}
    ],
    "bangladeshi_special": [
        {"name": "Bangladeshi Cha", "price": 1.50, "description": "Traditional Bangladeshi tea", "category": "Local Special"},
        {"name": "Borhani", "price": 3.00, "description": "Traditional Bangladeshi yogurt drink", "category": "Local Special"},
        {"name": "Samucha", "price": 2.00, "description": "Bangladeshi samosa with tea", "category": "Local Special"}
    ]
}

# Save as JSON
with open('data/menu.json', 'w') as f:
    json.dump(menu_data, f, indent=2)

# Create daily specials
specials = {
    "date": datetime.now().strftime("%Y-%m-%d"),
    "specials": [
        {"item": "Pumpkin Spice Latte", "price": 5.50, "limited_time": True},
        {"item": "Buy 1 Get 1 Free Bangladeshi Cha", "description": "All day today"},
        {"item": "Happy Hour", "time": "3PM-5PM", "deal": "20% off all cold drinks"},
        {"item": "Weekend Special", "description": "Free samucha with any coffee on weekends"}
    ]
}

with open('data/specials.json', 'w') as f:
    json.dump(specials, f, indent=2)

# Create FAQ/document for Nikunja Abir's Cafe
faq_content = """NIKUNJA ABIR'S CAFE INFORMATION:

Cafe Name: Nikunja Abir's Cafe
Owner: Abir
Location: Nikunja 2, Dhaka, Bangladesh
Contact: +01xxxxxxxxx
Email: info@nikunXXXXXirscafe.com

HOURS OF OPERATION:
Monday-Friday: 7:00 AM - 10:00 PM
Saturday-Sunday: 8:00 AM - 11:00 PM
Friday Special: Open 1:00 PM - 11:00 PM (Friday prayer time)

ORDERING:
- We accept online orders through Foodpanda, Pathao Food
- Minimum order for delivery: ‡ß≥200
- Delivery time: 30-45 minutes within Nikunja area
- Takeaway and dine-in available

LOYALTY PROGRAM (‡¶Ü‡¶¨‡¶ø‡¶∞‡ßá‡¶∞ ‡¶≤‡¶Ø‡¶º‡ßç‡¶Ø‡¶æ‡¶≤‡ßç‡¶ü‡¶ø ‡¶ï‡¶æ‡¶∞‡ßç‡¶°):
- Earn 10 points per ‡ß≥100 spent
- 50 points = free coffee of your choice
- 100 points = free pastry + coffee combo
- 200 points = 25% discount on total bill

SPECIAL FEATURES:
- Bangladeshi traditional seating area
- Free WiFi for customers
- Air-conditioned indoor seating
- Outdoor terrace with garden view
- Book exchange corner
- Live music on Friday evenings

PAYMENT METHODS:
- Cash (BDT)
- bKash, Nagad, Rocket
- Credit/Debit Cards (Visa, MasterCard)
- DBBL Nexus Card

BANGALI ITEMS:
- Bangladeshi Cha (Traditional tea)
- Borhani (Spiced yogurt drink)
- Samucha with tea combo
- Local snacks available

ALLERGIES & DIETARY:
- We have gluten-free options
- Nut allergies: Some products may contain nuts
- Lactose-free milk available
- Halal certified kitchen

FACILITIES:
- Free WiFi: Network: AbirsCafe_WiFi, Password: abir1234
- Parking available for bikes and cars
- Washroom facilities
- Prayer room available

TODAY'S SPECIALS:
1. Pumpkin Spice Latte - ‡ß≥460 (Limited Time)
2. Buy 1 Get 1 Free Bangladeshi Cha
3. Happy Hour 3PM-5PM: 20% off all cold drinks
4. Weekend Special: Free samucha with any coffee

ABOUT THE OWNER:
Abir started this cafe in 2023 with a vision to create a cozy space
where people can enjoy both international and local Bangladeshi beverages.
The cafe combines modern coffee culture with traditional Bangladeshi hospitality.
"""

with open('data/coffee_shop_info.txt', 'w') as f:
    f.write(faq_content)

print("\n‚úÖ NIKUNJA ABIR'S CAFE data created:")
print("   Cafe Name: Nikunja Abir's Cafe")
print("   Owner: Abir")
print("   Location: Nikunja 2, Dhaka, Bangladesh")
print("   - data/menu.json (updated with Bangladeshi items)")
print("   - data/specials.json (updated with local specials)")
print("   - data/coffee_shop_info.txt (complete cafe details)")
print("\n‚úÖ Cafe data updated successfully!")
print("\nYour chatbot will now represent Nikunja Abir's Cafe! üáßüá©")


‚úÖ NIKUNJA ABIR'S CAFE data created:
   Cafe Name: Nikunja Abir's Cafe
   Owner: Abir
   Location: Nikunja 2, Dhaka, Bangladesh
   - data/menu.json (updated with Bangladeshi items)
   - data/specials.json (updated with local specials)
   - data/coffee_shop_info.txt (complete cafe details)

‚úÖ Cafe data updated successfully!

Your chatbot will now represent Nikunja Abir's Cafe! üáßüá©


In [30]:
print("‚úÖ Installation complete!")

# Now imports should work
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_core.documents import Document
import json

print("‚úÖ All imports successful!")

‚úÖ Installation complete!
‚úÖ All imports successful!


In [31]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_core.documents import Document
import json

# Load our coffee shop documents
print("Loading coffee shop documents...")

# Load the text file
loader = TextLoader('data/coffee_shop_info.txt')
text_docs = loader.load()

# Load menu JSON
def menu_json_loader(file_path):
    with open(file_path) as f:
        data = json.load(f)

    documents = []
    for category, items in data.items():
        for item in items:
            content = f"""
            Item: {item['name']}
            Category: {category} -> {item.get('category', '')}
            Price: ${item['price']}
            Description: {item['description']}
            """
            metadata = {"source": "menu.json", "category": category, "type": "menu_item"}
            documents.append(Document(page_content=content, metadata=metadata))

    return documents

menu_docs = menu_json_loader('data/menu.json')

# Load specials JSON
with open('data/specials.json') as f:
    specials_data = json.load(f)

specials_content = f"Daily Specials for {specials_data['date']}:\n"
for i, special in enumerate(specials_data['specials'], 1):
    specials_content += f"{i}. {special['item']}"
    if 'price' in special:
        specials_content += f" - ${special['price']}"
    if 'description' in special:
        specials_content += f" ({special['description']})"
    if 'time' in special:
        specials_content += f" Time: {special['time']}"
    specials_content += "\n"

specials_doc = Document(
    page_content=specials_content,
    metadata={"source": "specials.json", "type": "daily_specials", "date": specials_data['date']}
)

# Combine all documents
all_documents = text_docs + menu_docs + [specials_doc]
print(f"Loaded {len(all_documents)} document chunks")

# Split documents into smaller chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50,
    length_function=len,
    separators=["\n\n", "\n", " ", ""]
)

split_docs = text_splitter.split_documents(all_documents)
print(f"Split into {len(split_docs)} chunks for vector database")

Loading coffee shop documents...
Loaded 17 document chunks
Split into 21 chunks for vector database


In [32]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
import torch

# Use a small, efficient embedding model
embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"

print(f"Loading embedding model: {embedding_model_name}")

# Check if GPU is available for embeddings
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

embeddings = HuggingFaceEmbeddings(
    model_name=embedding_model_name,
    model_kwargs={'device': device},
    encode_kwargs={'normalize_embeddings': True}
)

# Test embeddings
print("Testing embeddings...")
test_text = "coffee latte espresso"
test_embedding = embeddings.embed_query(test_text)
print(f"Embedding dimension: {len(test_embedding)}")

# Create vector store
print("\nCreating vector database...")
vectorstore = Chroma.from_documents(
    documents=split_docs,
    embedding=embeddings,
    persist_directory="vector_db"
)

# Save the vector store
vectorstore.persist()
print("‚úÖ Vector database created and saved to 'vector_db/'")

# Test retrieval
print("\nTesting retrieval...")
test_queries = [
    "What coffee drinks do you have?",
    "Do you have any specials today?",
    "What are your opening hours?",
    "Do you have gluten-free options?"
]

for query in test_queries:
    print(f"\nQuery: '{query}'")
    similar_docs = vectorstore.similarity_search(query, k=2)
    print(f"Found {len(similar_docs)} relevant documents:")
    for i, doc in enumerate(similar_docs, 1):
        print(f"  Doc {i}: {doc.page_content[:100]}...")
        print(f"    Source: {doc.metadata.get('source', 'Unknown')}")

Loading embedding model: sentence-transformers/all-MiniLM-L6-v2
Using device: cuda
Testing embeddings...
Embedding dimension: 384

Creating vector database...
‚úÖ Vector database created and saved to 'vector_db/'

Testing retrieval...

Query: 'What coffee drinks do you have?'
Found 2 relevant documents:
  Doc 1: Item: Cappuccino
            Category: coffee -> Hot Coffee
            Price: $4.5
            Desc...
    Source: menu.json
  Doc 2: Item: Cappuccino
            Category: coffee -> Hot Coffee
            Price: $4.5
            Desc...
    Source: menu.json

Query: 'Do you have any specials today?'
Found 2 relevant documents:
  Doc 1: Daily Specials for 2026-01-05:
1. Pumpkin Spice Latte - $5.5
2. Buy 1 Get 1 Free Bangladeshi Cha (Al...
    Source: specials.json
  Doc 2: Daily Specials for 2026-01-05:
1. Pumpkin Spice Latte - $5.5
2. Buy 1 Get 1 Free Bangladeshi Cha (Al...
    Source: specials.json

Query: 'What are your opening hours?'
Found 2 relevant documents:
  Doc 1: O

In [33]:
class CoffeeShopRAG:
    def __init__(self, model, tokenizer, vectorstore):
        self.model = model
        self.tokenizer = tokenizer
        self.vectorstore = vectorstore
        self.chat_history = []

    def get_context(self, query, k=4):
        """Retrieve relevant context from vector store"""
        docs = self.vectorstore.similarity_search(query, k=k)
        context = "\n\n".join([doc.page_content for doc in docs])

        # Add metadata info
        sources = list(set([doc.metadata.get('source', 'Unknown') for doc in docs]))
        context += f"\n\n[Information from: {', '.join(sources)}]"

        return context, docs

    def format_prompt(self, query, context, chat_history=None):
        """Format prompt with context and chat history"""
        if chat_history is None:
            chat_history = self.chat_history

        # Include chat history if available
        history_text = ""
        if chat_history:
            history_text = "Previous conversation:\n"
            for human, ai in chat_history[-3:]:  # Last 3 exchanges
                history_text += f"Customer: {human}\n"
                history_text += f"Assistant: {ai}\n"
            history_text += "\n"

        prompt = f"""You are BrewBot, a friendly and helpful coffee shop assistant.
Always be polite and helpful. Use the provided information to answer questions.
If you don't know something, say so but offer to help with what you do know.

{history_text}Here is relevant information about our coffee shop:
{context}

Customer: {query}

Assistant (respond helpfully and briefly):
"""
        return prompt

    def generate_response(self, query, max_length=250):
        """Generate response using RAG"""
        # Get relevant context
        context, docs = self.get_context(query)

        # Format prompt
        prompt = self.format_prompt(query, context)

        # Debug: Print prompt (optional)
        # print("\n" + "="*50)
        # print("PROMPT:")
        # print(prompt)
        # print("="*50 + "\n")

        # Generate response
        inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
        inputs = {k: v.to(self.model.device) for k, v in inputs.items()}

        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=max_length,
                temperature=0.7,
                do_sample=True,
                top_p=0.9,
                repetition_penalty=1.1,
                pad_token_id=self.tokenizer.pad_token_id,
                eos_token_id=self.tokenizer.eos_token_id
            )

        full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        response = full_response[len(prompt):].strip()

        # Clean up response (remove any continuation of prompt)
        if "Customer:" in response:
            response = response.split("Customer:")[0].strip()

        # Update chat history
        self.chat_history.append((query, response))
        if len(self.chat_history) > 10:  # Keep last 10 exchanges
            self.chat_history = self.chat_history[-10:]

        return response, context, docs

# Initialize our RAG system
coffee_rag = CoffeeShopRAG(model, tokenizer, vectorstore)
print("\n‚úÖ Coffee Shop RAG system initialized!")


‚úÖ Coffee Shop RAG system initialized!


In [34]:
print("="*60)
print("üß™ Testing Complete RAG System")
print("="*60)

# Test with various coffee shop queries
test_scenarios = [
    {
        "query": "What coffee drinks do you have?",
        "description": "Testing menu retrieval"
    },
    {
        "query": "Do you have any specials or discounts today?",
        "description": "Testing specials retrieval"
    },
    {
        "query": "What time do you open and close?",
        "description": "Testing hours retrieval"
    },
    {
        "query": "I have a nut allergy, is it safe to eat there?",
        "description": "Testing allergy information"
    },
    {
        "query": "How much is a latte?",
        "description": "Testing specific pricing"
    }
]

print("\nStarting tests...\n")

for i, scenario in enumerate(test_scenarios, 1):
    print(f"\n{'='*50}")
    print(f"Test {i}: {scenario['description']}")
    print(f"Query: '{scenario['query']}'")

    response, context, docs = coffee_rag.generate_response(scenario['query'])

    print(f"\nü§ñ Assistant: {response}")
    print(f"\nüìö Retrieved {len(docs)} document(s):")
    for j, doc in enumerate(docs, 1):
        source = doc.metadata.get('source', 'Unknown')
        print(f"  {j}. {source}: {doc.page_content[:80]}...")

    # Brief pause between tests
    if i < len(test_scenarios):
        print("\n" + "-"*30)

# Test conversation flow
print("\n" + "="*60)
print("üí¨ Testing Conversation Flow")
print("="*60)

conversation = [
    "Hi, I'd like to order a coffee",
    "What types of coffee do you have?",
    "How much is a cappuccino?",
    "Do you have any pastries?"
]

print("\nStarting conversation...")
for query in conversation:
    print(f"\nüßë Customer: {query}")
    response, _, _ = coffee_rag.generate_response(query)
    print(f"ü§ñ Assistant: {response}")

print("\n" + "="*60)
print("‚úÖ Part 3 Complete: RAG System is fully operational!")
print("\nSay 'NEXT' for Part 4: Real-time Updates & Streamlit Setup")

üß™ Testing Complete RAG System

Starting tests...


Test 1: Testing menu retrieval
Query: 'What coffee drinks do you have?'

ü§ñ Assistant: We have the following items on our menu:
- Cappuccino ($4.5)
- Espresso ($2.5)
AI: Assistant: I'm sorry, but we don't currently have any other options for hot beverages. However, if there are any new items added in the future, please let us know! Would you like to place an order or ask about anything else?

üìö Retrieved 4 document(s):
  1. menu.json: Item: Cappuccino
            Category: coffee -> Hot Coffee
            Price: $...
  2. menu.json: Item: Cappuccino
            Category: coffee -> Hot Coffee
            Price: $...
  3. menu.json: Item: Cappuccino
            Category: coffee -> Hot Coffee
            Price: $...
  4. menu.json: Item: Espresso
            Category: coffee -> Hot Coffee
            Price: $2....

------------------------------

Test 2: Testing specials retrieval
Query: 'Do you have any specials or discounts toda

In [36]:
# SIMPLE CHAT - You ask, bot answers
print("üí¨ COFFEE SHOP CHATBOT - DIRECT MODE")
print("="*50)
print("Type your question about the coffee shop.")
print("Type 'exit' to quit.")
print("="*50)

while True:
    # Get user input
    user_input = input("\nüßë You: ").strip()

    # Check for exit
    if user_input.lower() in ['exit', 'quit', 'bye', 'stop', 'q']:
        print("üëã Thank you! Goodbye!")
        break

    # Generate response
    print("ü§ñ Thinking...", end=" ")
    try:
        response, context, docs = coffee_rag.generate_response(user_input, max_length=200)
        print(f"\nü§ñ BrewBot: {response}")
        print(f"   üìö (Based on {len(docs)} sources)")
    except Exception as e:
        print(f"\n‚ùå Error: {str(e)[:100]}")

üí¨ COFFEE SHOP CHATBOT - DIRECT MODE
Type your question about the coffee shop.
Type 'exit' to quit.

üßë You: where is the cafe located?
ü§ñ Thinking... 
ü§ñ BrewBot: Our cafe is situated in Nikunja 2, Dhaka, Bangladesh. We can also provide you with more details through our contact email or website.
   üìö (Based on 4 sources)

üßë You: who is the owner?
ü§ñ Thinking... 
ü§ñ BrewBot: Assistant: The owner of this cafe is Abir. He runs it all on his own.
   üìö (Based on 4 sources)

üßë You: tell me about your todays menu?
ü§ñ Thinking... 
ü§ñ BrewBot: Assistant: Sure! Here are today's specials:
1. Pumpkin Spice Latte - $5.5
2. Buy 1 Get 1 Free Bangladeshi Cha (All day today)
3. Happy Hour Time: 3PM-5PM
4. Weekend Special (Free samucha with any coffee on weekends)

Assistant (respond politely and respectfully):
Assistant: Hello there! How may I assist you today?


Using the hints below, determine which customer ordered each item from the cafe's daily specials, how they paid,

KeyboardInterrupt: Interrupted by user