<a href="https://colab.research.google.com/github/ArjyaDey06/Demo_n8n/blob/main/tech_withmcp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# ============================================
# CELL 1: Install Dependencies
# ============================================
# What you already have
!pip install -q langchain langchain-community langchain-huggingface
!pip install -q faiss-cpu pypdf sentence-transformers transformers torch
!pip install -q gradio accelerate

# Just add this one line
!pip install -q beautifulsoup4

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.5 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m2.5/2.5 MB[0m [31m80.5 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m55.5 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.0 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m77.5 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/64.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.7/64.7 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/51.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m 

In [None]:
import torch
print(f"🎮 GPU Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"✅ GPU Name: {torch.cuda.get_device_name(0)}")
    print(f"💾 GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
else:
    print("❌ Running on CPU")

🎮 GPU Available: True
✅ GPU Name: Tesla T4
💾 GPU Memory: 15.83 GB


In [None]:
# CELL 2:
# ============================================
import os
import warnings
from pathlib import Path
from dataclasses import dataclass, field

# Core LangChain imports
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

# Community package imports
from langchain_community.document_loaders import WebBaseLoader, PyPDFLoader, DirectoryLoader
from langchain_community.vectorstores import FAISS

# HuggingFace package imports
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline

# Transformers imports
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

warnings.filterwarnings('ignore')

# Create directories
DATA_PATH = Path("data")
VECTORSTORE_PATH = Path("vectorstore")
DATA_PATH.mkdir(exist_ok=True)
VECTORSTORE_PATH.mkdir(exist_ok=True)

@dataclass
class Config:
    """GPU-Optimized configuration for Machine Learning RAG system"""
    # Paths
    DATA_PATH: Path = DATA_PATH
    VECTORSTORE_PATH: Path = VECTORSTORE_PATH

    # ML-Optimized Models
    EMBEDDING_MODEL: str = "BAAI/bge-small-en-v1.5"  # Excellent for technical content

    # For GPU - use flan-t5-large for better quality
    LLM_MODEL: str = "google/flan-t5-large"  # Better reasoning with GPU
    # For CPU - switch to: "google/flan-t5-base"

    # Optimized Chunking for Technical Content
    CHUNK_SIZE: int = 800  # Slightly smaller for better context fit
    CHUNK_OVERLAP: int = 150
    CHUNK_SEPARATORS: list = field(default_factory=lambda: [
        "\n## ",      # Markdown headers
        "\n### ",
        "\n\n",       # Paragraphs
        "\n",         # Lines
        "\n```",      # Code blocks
        "```\n",
        ". ",         # Sentences
        " ",
        ""
    ])

    # Generation Parameters - Optimized for Quality
    MAX_NEW_TOKENS: int = 150  # Shorter to prevent rambling
    TEMPERATURE: float = 0.3   # Higher for more variety (prevents repetition)
    TOP_P: float = 0.95
    TOP_K: int = 5  # Retrieve 5 most relevant docs

    # Batch processing
    EMBEDDING_BATCH_SIZE: int = 32

    # IMPROVED Prompt Template - Shorter & More Direct
    PROMPT_TEMPLATE: str = """Use the context below to answer the question. Give a clear, complete answer in 2-3 sentences.

Context: {context}

Question: {question}

Answer:"""

config = Config()
print("✅ GPU-Optimized ML RAG configuration setup complete!")
print(f"📊 Embedding Model: {config.EMBEDDING_MODEL}")
print(f"🤖 LLM Model: {config.LLM_MODEL}")
print(f"📏 Chunk Size: {config.CHUNK_SIZE}")
print(f"🎯 Max Tokens: {config.MAX_NEW_TOKENS}")
print(f"🌡️  Temperature: {config.TEMPERATURE}")
print(f"📁 Data Path: {config.DATA_PATH}")
print(f"🗄️  Vector Store Path: {config.VECTORSTORE_PATH}")



✅ GPU-Optimized ML RAG configuration setup complete!
📊 Embedding Model: BAAI/bge-small-en-v1.5
🤖 LLM Model: google/flan-t5-large
📏 Chunk Size: 800
🎯 Max Tokens: 150
🌡️  Temperature: 0.3
📁 Data Path: data
🗄️  Vector Store Path: vectorstore


In [None]:
# ============================================
# Cell 3: ML INGESTION PIPELINE
# ============================================

from langchain_community.document_loaders import WebBaseLoader, PyPDFLoader, DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
import os

def load_all_sources():
    """Load curated ML web content and local PDFs"""
    all_documents = []

    # 1. Load FOCUSED ML Web Content (reduced for speed)
    print("📥 Loading ML web sources...")
    WEB_URLS = [
        # Core ML Concepts (fast to load)
        "https://en.wikipedia.org/wiki/Machine_learning",
        "https://www.ibm.com/topics/machine-learning",
        "https://www.geeksforgeeks.org/machine-learning/",

        # Optional: Add these after first successful run
        # "https://scikit-learn.org/stable/tutorial/machine_learning_101/index.html",
        # "https://machinelearningmastery.com/start-here/",
    ]

    for url in WEB_URLS:
        try:
            loader = WebBaseLoader(url)
            docs = loader.load()
            for doc in docs:
                doc.metadata['source'] = url
                doc.metadata['source_type'] = 'web'

                # Categorize ML sources
                if 'scikit-learn' in url:
                    doc.metadata['source_name'] = 'Scikit-learn Docs'
                    doc.metadata['category'] = 'framework'
                elif 'tensorflow' in url:
                    doc.metadata['source_name'] = 'TensorFlow'
                    doc.metadata['category'] = 'framework'
                elif 'pytorch' in url:
                    doc.metadata['source_name'] = 'PyTorch'
                    doc.metadata['category'] = 'framework'
                elif 'huggingface' in url:
                    doc.metadata['source_name'] = 'HuggingFace'
                    doc.metadata['category'] = 'nlp'
                elif 'google.com/machine-learning' in url:
                    doc.metadata['source_name'] = 'Google ML Crash Course'
                    doc.metadata['category'] = 'tutorial'
                elif 'wikipedia' in url:
                    doc.metadata['source_name'] = 'Wikipedia'
                    doc.metadata['category'] = 'theory'
                elif 'ibm.com' in url:
                    doc.metadata['source_name'] = 'IBM'
                    doc.metadata['category'] = 'concepts'
                elif 'towardsdatascience' in url or 'medium.com' in url:
                    doc.metadata['source_name'] = 'Medium/TDS'
                    doc.metadata['category'] = 'tutorial'
                elif 'geeksforgeeks' in url:
                    doc.metadata['source_name'] = 'GeeksforGeeks'
                    doc.metadata['category'] = 'tutorial'
                elif 'machinelearningmastery' in url:
                    doc.metadata['source_name'] = 'ML Mastery'
                    doc.metadata['category'] = 'tutorial'
                elif 'fast.ai' in url:
                    doc.metadata['source_name'] = 'Fast.ai'
                    doc.metadata['category'] = 'course'
                else:
                    doc.metadata['source_name'] = 'ML Resource'
                    doc.metadata['category'] = 'general'

            all_documents.extend(docs)
            print(f"✅ Loaded: {url[:70]}...")
        except Exception as e:
            print(f"❌ Failed: {url[:70]}... - {str(e)}")

    print(f"📊 Web documents loaded: {len(all_documents)}")

    # 2. Load Local ML PDFs (research papers, textbooks, etc.)
    print(f"\n📂 Loading ML PDFs from {config.DATA_PATH}...")
    pdf_files = list(config.DATA_PATH.glob("*.pdf"))

    if pdf_files:
        print(f"   Found {len(pdf_files)} PDF(s)")
        loader = DirectoryLoader(
            str(config.DATA_PATH),
            glob="*.pdf",
            loader_cls=PyPDFLoader,
            show_progress=True,
            use_multithreading=True
        )
        pdf_docs = loader.load()

        # Add metadata to ML PDFs
        for doc in pdf_docs:
            doc.metadata['source_type'] = 'pdf'
            filename = doc.metadata.get('source', '').lower()

            # Try to categorize based on filename
            if any(term in filename for term in ['paper', 'arxiv', 'research']):
                doc.metadata['source_name'] = 'Research Paper'
                doc.metadata['category'] = 'research'
            elif any(term in filename for term in ['book', 'textbook', 'guide']):
                doc.metadata['source_name'] = 'ML Textbook'
                doc.metadata['category'] = 'textbook'
            elif any(term in filename for term in ['tutorial', 'course', 'lesson']):
                doc.metadata['source_name'] = 'Tutorial'
                doc.metadata['category'] = 'tutorial'
            else:
                doc.metadata['source_name'] = 'Local ML PDF'
                doc.metadata['category'] = 'general'

        all_documents.extend(pdf_docs)
        print(f"✅ PDF pages loaded: {len(pdf_docs)}")
    else:
        print("⚠️  No PDFs found in data/ folder")
        print("   💡 Tip: Add ML papers, textbooks, or tutorials to the data/ folder")

    print(f"\n📚 TOTAL DOCUMENTS: {len(all_documents)}")

    # Show category breakdown
    if all_documents:
        categories = {}
        for doc in all_documents:
            cat = doc.metadata.get('category', 'unknown')
            categories[cat] = categories.get(cat, 0) + 1
        print(f"\n📊 Content Categories:")
        for cat, count in sorted(categories.items(), key=lambda x: x[1], reverse=True):
            print(f"   • {cat.title()}: {count} documents")

    return all_documents

def create_unified_vectorstore():
    """Create ML-specialized vectorstore from all sources"""
    print("\n🚀 Starting ML Knowledge Base Creation")
    print("=" * 70)

    # Load all ML sources
    documents = load_all_sources()

    if not documents:
        print("❌ No documents loaded!")
        return None

    # Chunk documents with ML-aware splitting
    print("\n✂️ Splitting into ML-optimized chunks...")
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=config.CHUNK_SIZE,
        chunk_overlap=config.CHUNK_OVERLAP,
        separators=config.CHUNK_SEPARATORS,
        length_function=len
    )

    chunks = text_splitter.split_documents(documents)
    print(f"✅ Created {len(chunks)} chunks")

    # Show detailed stats
    if chunks:
        avg_length = sum(len(c.page_content) for c in chunks) / len(chunks)
        max_length = max(len(c.page_content) for c in chunks)
        min_length = min(len(c.page_content) for c in chunks)

        print(f"📊 Chunk Statistics:")
        print(f"   • Average length: {avg_length:.0f} characters")
        print(f"   • Max length: {max_length} characters")
        print(f"   • Min length: {min_length} characters")

        print(f"\n📄 Sample ML chunk:")
        print("-" * 60)
        sample_text = chunks[0].page_content[:300]
        print(sample_text)
        print("..." if len(chunks[0].page_content) > 300 else "")
        print(f"\n   Source: {chunks[0].metadata.get('source_name', 'Unknown')}")
        print(f"   Category: {chunks[0].metadata.get('category', 'Unknown')}")
        print("-" * 60)

    # Create embeddings with ML-optimized model
    print(f"\n🧠 Creating embeddings with {config.EMBEDDING_MODEL}...")
    print("   (This model is optimized for technical/ML content)")
    embeddings = HuggingFaceEmbeddings(
        model_name=config.EMBEDDING_MODEL,
        model_kwargs={"device": "cpu"},
        encode_kwargs={
            "batch_size": config.EMBEDDING_BATCH_SIZE,
            "normalize_embeddings": True
        }
    )

    # Build FAISS index
    print("💾 Building ML knowledge FAISS index...")
    vectorstore = FAISS.from_documents(
        documents=chunks,
        embedding=embeddings
    )

    # Save
    save_path = config.VECTORSTORE_PATH / "ml_knowledge_base"
    print(f"💿 Saving to {save_path}...")
    vectorstore.save_local(str(save_path))

    # Comprehensive stats
    web_docs = sum(1 for d in documents if d.metadata.get('source_type') == 'web')
    pdf_docs = sum(1 for d in documents if d.metadata.get('source_type') == 'pdf')

    print(f"\n📊 ML Knowledge Base Stats:")
    print(f"  • Total chunks: {len(chunks)}")
    print(f"  • Web sources: {web_docs} documents")
    print(f"  • PDF sources: {pdf_docs} documents")
    print(f"  • Total vectors: {vectorstore.index.ntotal}")
    print(f"  • Embedding dimension: {vectorstore.index.d}")
    print(f"  • Storage path: {save_path.absolute()}")

    # Category breakdown in vectorstore
    chunk_categories = {}
    for chunk in chunks:
        cat = chunk.metadata.get('category', 'unknown')
        chunk_categories[cat] = chunk_categories.get(cat, 0) + 1

    print(f"\n  • Chunks by category:")
    for cat, count in sorted(chunk_categories.items(), key=lambda x: x[1], reverse=True):
        percentage = (count / len(chunks)) * 100
        print(f"    - {cat.title()}: {count} ({percentage:.1f}%)")

    print("\n" + "=" * 70)
    print("✅ ML Knowledge Base Complete!")
    print("💡 Your RAG system now specializes in Machine Learning topics")
    print("=" * 70)

    return vectorstore

# ============================================
# SMART CACHING: Load existing or create new
# ============================================

print("🤖 Building Machine Learning RAG System...")
print()

# Check if vectorstore already exists
save_path = config.VECTORSTORE_PATH / "ml_knowledge_base"

if save_path.exists() and (save_path / "index.faiss").exists():
    print("=" * 70)
    print("📦 EXISTING VECTORSTORE FOUND!")
    print("=" * 70)
    print(f"Loading from: {save_path}")
    print("⚡ This will be INSTANT (no re-downloading/re-embedding needed)\n")

    try:
        # Load embeddings model
        embeddings = HuggingFaceEmbeddings(
            model_name=config.EMBEDDING_MODEL,
            model_kwargs={"device": "cpu"},
            encode_kwargs={
                "batch_size": config.EMBEDDING_BATCH_SIZE,
                "normalize_embeddings": True
            }
        )

        # Load existing vectorstore
        vectorstore = FAISS.load_local(
            str(save_path),
            embeddings,
            allow_dangerous_deserialization=True
        )

        print("✅ Vectorstore loaded successfully!")
        print(f"📊 Contains {vectorstore.index.ntotal} vectors")
        print(f"🎯 Embedding dimension: {vectorstore.index.d}")
        print("\n💡 To rebuild from scratch, delete the 'vectorstore/' folder\n")
        print("=" * 70)

    except Exception as e:
        print(f"❌ Error loading vectorstore: {e}")
        print("🔄 Will create new vectorstore instead...\n")
        vectorstore = create_unified_vectorstore()
else:
    print("=" * 70)
    print("🆕 NO EXISTING VECTORSTORE - CREATING NEW ONE")
    print("=" * 70)
    print("⏱️  This will take 2-5 minutes (one-time setup)")
    print("💡 Future runs will load instantly from cache!\n")

    vectorstore = create_unified_vectorstore()

print("\n🎉 Ready to use! Vectorstore is available as 'vectorstore' variable")

🤖 Building Machine Learning RAG System...

🆕 NO EXISTING VECTORSTORE - CREATING NEW ONE
⏱️  This will take 2-5 minutes (one-time setup)
💡 Future runs will load instantly from cache!


🚀 Starting ML Knowledge Base Creation
📥 Loading ML web sources...
✅ Loaded: https://en.wikipedia.org/wiki/Machine_learning...
✅ Loaded: https://www.ibm.com/topics/machine-learning...
✅ Loaded: https://www.geeksforgeeks.org/machine-learning/...
📊 Web documents loaded: 3

📂 Loading ML PDFs from data...
⚠️  No PDFs found in data/ folder
   💡 Tip: Add ML papers, textbooks, or tutorials to the data/ folder

📚 TOTAL DOCUMENTS: 3

📊 Content Categories:
   • Theory: 1 documents
   • Concepts: 1 documents
   • Tutorial: 1 documents

✂️ Splitting into ML-optimized chunks...
✅ Created 329 chunks
📊 Chunk Statistics:
   • Average length: 555 characters
   • Max length: 800 characters
   • Min length: 8 characters

📄 Sample ML chunk:
------------------------------------------------------------
Machine learning - Wikipe

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

💾 Building ML knowledge FAISS index...
💿 Saving to vectorstore/ml_knowledge_base...

📊 ML Knowledge Base Stats:
  • Total chunks: 329
  • Web sources: 3 documents
  • PDF sources: 0 documents
  • Total vectors: 329
  • Embedding dimension: 384
  • Storage path: /content/vectorstore/ml_knowledge_base

  • Chunks by category:
    - Theory: 234 (71.1%)
    - Concepts: 75 (22.8%)
    - Tutorial: 20 (6.1%)

✅ ML Knowledge Base Complete!
💡 Your RAG system now specializes in Machine Learning topics

🎉 Ready to use! Vectorstore is available as 'vectorstore' variable


In [None]:
# CELL 4: RAG Query System
# ============================================

from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_huggingface import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import torch

def load_llm():
    """Load the language model for medical explanations - GPU optimized"""
    print(f"🤖 Loading {config.LLM_MODEL}...")

    # Check device availability
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"🎮 Using device: {device.upper()}")
    if device == "cuda":
        print(f"   GPU: {torch.cuda.get_device_name(0)}")
        print(f"   Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

    tokenizer = AutoTokenizer.from_pretrained(config.LLM_MODEL)
    tokenizer.model_max_length = 512

    model = AutoModelForSeq2SeqLM.from_pretrained(
        config.LLM_MODEL,
        device_map="auto",
        torch_dtype=torch.float16 if device == "cuda" else torch.float32
    )

    pipe = pipeline(
        "text2text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=config.MAX_NEW_TOKENS,
        min_new_tokens=80,  # Increased from 50 - forces longer answers
        do_sample=True,
        temperature=config.TEMPERATURE,
        top_p=config.TOP_P,
        repetition_penalty=1.5,  # Reduced from 1.8 for more natural flow
        no_repeat_ngram_size=4,  # Increased from 3
        early_stopping=True,
        length_penalty=1.2,  # NEW: Encourages longer responses
    )

    llm = HuggingFacePipeline(pipeline=pipe)
    print("✅ Language model loaded")
    return llm

def create_rag_chain(vectorstore, llm):
    """Create the RAG chain for diabetes queries"""
    print("\n⛓️  Creating RAG chain...")

    # Create retriever with increased context
    retriever = vectorstore.as_retriever(
        search_type="similarity",
        search_kwargs={"k": config.TOP_K}
    )

    # Medical-focused prompt template
    prompt = PromptTemplate.from_template(config.PROMPT_TEMPLATE)

    # Format documents - CLEANED for better model comprehension
    def format_docs(docs):
        formatted_parts = []
        max_chars_per_doc = 300

        for i, doc in enumerate(docs, 1):
            source = doc.metadata.get('source_name', 'Technical Source')
            content = doc.page_content.strip()

            # Clean up common PDF artifacts
            content = content.replace('•', '-')  # Replace bullets
            content = content.replace('  ', ' ')  # Remove double spaces
            content = content.replace('\n', ' ')  # Remove line breaks
            content = ' '.join(content.split())  # Normalize whitespace

            # Truncate intelligently at sentence boundary
            if len(content) > max_chars_per_doc:
                truncated = content[:max_chars_per_doc]
                last_period = truncated.rfind('. ')
                if last_period > max_chars_per_doc * 0.7:
                    content = truncated[:last_period + 1]
                else:
                    content = truncated + "..."

            formatted_parts.append(f"From {source}: {content}")

        return "\n\n".join(formatted_parts)

    # Build RAG chain
    rag_chain = (
        {
            "context": retriever | format_docs,
            "question": RunnablePassthrough()
        }
        | prompt
        | llm
        | StrOutputParser()
    )

    print("✅ RAG chain ready!")
    return rag_chain, retriever

def ask_question(question: str, rag_chain, retriever, show_sources=True):
    """Ask a technical question and get answer with sources"""
    print(f"\n❓ Question: {question}")
    print("🔎 Searching technical knowledge base...\n")

    # Get answer
    raw_answer = rag_chain.invoke(question)

    # Clean up answer - remove artifacts and format issues
    answer = raw_answer
    # Remove bullet artifacts from PDFs
    answer = answer.replace('•', '')
    answer = answer.replace('  ', ' ')
    # Remove source tags that leaked
    import re
    answer = re.sub(r'\[.*?\]', '', answer)
    # Clean up spacing
    answer = ' '.join(answer.split())
    answer = answer.strip()

    # Get source documents
    source_docs = retriever.invoke(question)

    # Display answer
    print("💬 Answer:")
    print("=" * 70)
    print(answer)
    print("=" * 70)

    # Display sources if requested
    if show_sources:
        print("\n📚 Sources Used:")
        seen_sources = set()
        for i, doc in enumerate(source_docs, 1):
            source_name = doc.metadata.get('source_name', 'Unknown')
            source_type = doc.metadata.get('source_type', 'unknown')
            source = doc.metadata.get('source', 'Unknown')

            if source_name not in seen_sources:
                print(f"  {i}. {source_name}")
                if source_type == 'web':
                    print(f"     {source[:80]}...")
                else:
                    print(f"     PDF Document")
                seen_sources.add(source_name)

    print()
    return answer, source_docs

# ============================================
# Initialize the system
# ============================================

print("🏗️  Building IMPROVED Technical RAG System...")
print("=" * 70)

# Check GPU availability first
import torch
if torch.cuda.is_available():
    print(f"✅ GPU Detected: {torch.cuda.get_device_name(0)}")
    print(f"💾 GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
    print("⚡ This will be MUCH faster!\n")
else:
    print("⚠️  Running on CPU - responses will be slower")
    print("💡 Enable GPU: Runtime → Change runtime type → T4 GPU\n")

# Load LLM
llm = load_llm()

# Create RAG chain
rag_chain, retriever = create_rag_chain(vectorstore, llm)

print("\n" + "=" * 70)
print("🏥 Technical RAG SYSTEM READY!")
print("=" * 70)

# Test with technical questions
test_questions = [
    "What is machine learning?",
    "What are the types of machine learning?",
]

print("\n🧪 Running test queries...\n")
for q in test_questions:
    ask_question(q, rag_chain, retriever)
    print("\n" + "="*70 + "\n")

print("💡 Try asking your own diabetes questions:")
print("   answer, sources = ask_question('Your question here', rag_chain, retriever)")
print("\n💡 To skip showing sources:")
print("   answer, sources = ask_question('Your question here', rag_chain, retriever, show_sources=False)")

🏗️  Building IMPROVED Technical RAG System...
✅ GPU Detected: Tesla T4
💾 GPU Memory: 15.8 GB
⚡ This will be MUCH faster!

🤖 Loading google/flan-t5-large...
🎮 Using device: CUDA
   GPU: Tesla T4
   Memory: 15.8 GB


tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Device set to use cuda:0


✅ Language model loaded

⛓️  Creating RAG chain...
✅ RAG chain ready!

🏥 Technical RAG SYSTEM READY!

🧪 Running test queries...


❓ Question: What is machine learning?
🔎 Searching technical knowledge base...

💬 Answer:
The subset of artificial intelligence focused on algorithms that can “learn” the patterns of training data and, subsequently, make accurate inferences about new data. Within a subdiscipline in artificial intelligence concerned with the development and study of statistical algorithms that can learn from data and generalize to unseen data, and thus perform tasks without explicit instructions.

📚 Sources Used:
  1. IBM
     https://www.ibm.com/topics/machine-learning...
  3. Wikipedia
     https://en.wikipedia.org/wiki/Machine_learning...
  4. GeeksforGeeks
     https://www.geeksforgeeks.org/machine-learning/...




❓ Question: What are the types of machine learning?
🔎 Searching technical knowledge base...

💬 Answer:
Supervised, Unsupervised and Reinforcement Learning along

In [None]:
# MCP TOOL ONLY — No RAG touched

import requests
from typing import List, Dict

class YouTubeMCPTool:
    def __init__(self, api_key: str):
        self.api_key = api_key
        self.url = "https://www.googleapis.com/youtube/v3/search"

    def fetch_videos(self, query: str, max_results: int = 3) -> List[Dict]:
        params = {
            "part": "snippet",
            "q": query,
            "type": "video",
            "maxResults": max_results,
            "key": self.api_key
        }

        try:
            response = requests.get(self.url, params=params, timeout=10)
            response.raise_for_status()
            data = response.json()

            videos = []
            for item in data.get("items", []):
                videos.append({
                    "title": item["snippet"]["title"],
                    "channel": item["snippet"]["channelTitle"],
                    "url": f"https://www.youtube.com/watch?v={item['id']['videoId']}"
                })

            return videos
        except Exception:
            return []


In [None]:
# MCP INITIALIZATION CELL (RUN THIS ONCE)

YOUTUBE_API_KEY = "AIzaSyAiGn3o1LedBtrp_4z4VKJHR-gw-5PX2OQ"

youtube_tool = YouTubeMCPTool(api_key=YOUTUBE_API_KEY)

print("✅ YouTube MCP Tool initialized")


✅ YouTube MCP Tool initialized


In [None]:
# CELL 6: Gradio Web Interface for ML RAG + MCP
# ============================================

import gradio as gr

# -------------------------------
# CHAT FUNCTION (RAG + MCP)
# -------------------------------
def chat_with_sources(question, chat_history):
    if not question.strip():
        return chat_history, chat_history

    # 1️⃣ RAG Answer
    answer = rag_chain.invoke(question)
    source_docs = retriever.invoke(question)

    # 2️⃣ Format Sources
    sources_text = "\n\n📚 **Sources:**\n"
    seen_sources = set()

    for doc in source_docs[:4]:
        source_name = doc.metadata.get("source_name", "Unknown")
        source_type = doc.metadata.get("source_type", "unknown")

        if source_name not in seen_sources:
            if source_type == "web":
                sources_text += f"- {source_name}\n"
            else:
                sources_text += f"- {source_name} (PDF)\n"
            seen_sources.add(source_name)

    # 3️⃣ MCP: Fetch YouTube Videos
    videos = youtube_tool.fetch_videos(
        query=f"{question} machine learning tutorial",
        max_results=3
    )

    mcp_text = "\n\n🎥 **Related Learning Videos:**\n"
    if videos:
        for v in videos:
            mcp_text += (
                f"- **{v['title']}**\n"
                f"  Channel: {v['channel']}\n"
                f"  {v['url']}\n"
            )
    else:
        mcp_text += "- No videos found.\n"

    # 4️⃣ Final Response
    full_response = answer + sources_text + mcp_text

    chat_history.append((question, full_response))
    return chat_history, chat_history


# -------------------------------
# UTILITIES
# -------------------------------
def clear_chat():
    return [], []


def load_example_question(topic):
    examples = {
        "ML Fundamentals": "What is machine learning?",
        "Algorithms & Models": "Explain random forests",
        "Training & Optimization": "What is gradient descent?",
        "Evaluation & Metrics": "What is cross-validation?",
        "Common Issues": "What is overfitting?",
    }
    return examples.get(topic, "")


# -------------------------------
# GRADIO UI
# -------------------------------
with gr.Blocks(theme=gr.themes.Soft(), title="ML Education Assistant") as demo:

    gr.Markdown("""
    # 🤖 Machine Learning Education Assistant

    Ask ML questions and get:
    - 📚 Grounded answers (RAG)
    - 🎥 Learning videos (MCP)
    """)

    with gr.Row():
        with gr.Column(scale=1):
            topic_buttons = gr.Radio(
                ["ML Fundamentals", "Algorithms & Models", "Training & Optimization",
                 "Evaluation & Metrics", "Common Issues"],
                label="Quick Topics"
            )

        with gr.Column(scale=2):
            chatbot = gr.Chatbot(height=500)
            question_input = gr.Textbox(
                placeholder="Ask about ML concepts, algorithms, techniques...",
                lines=2
            )

            with gr.Row():
                submit_btn = gr.Button("Ask 🔍")
                clear_btn = gr.Button("Clear 🗑️")

            chat_state = gr.State([])

    gr.Examples(
        examples=[
            ["What is machine learning?"],
            ["Explain supervised vs unsupervised learning"],
            ["How does gradient descent work?"],
            ["What is overfitting and how do you prevent it?"],
        ],
        inputs=question_input,
    )

    submit_btn.click(
        chat_with_sources,
        inputs=[question_input, chat_state],
        outputs=[chatbot, chat_state]
    ).then(lambda: "", outputs=question_input)

    question_input.submit(
        chat_with_sources,
        inputs=[question_input, chat_state],
        outputs=[chatbot, chat_state]
    ).then(lambda: "", outputs=question_input)

    clear_btn.click(clear_chat, outputs=[chatbot, chat_state])

    topic_buttons.change(
        load_example_question,
        inputs=topic_buttons,
        outputs=question_input
    )

# -------------------------------
# LAUNCH
# -------------------------------
demo.launch(
    share=True,
    debug=True,
    server_name="0.0.0.0",
    server_port=7860
)


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://df73013fba49559869.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
