<a href="https://colab.research.google.com/github/Anirudh11011/Testing-MCP/blob/main/Testing_MCP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip -q install -U \
  langchain-community langchain-core langchain-text-splitters langchain-groq \
  chromadb sentence-transformers \
  "mcp[cli]" ddgs nest_asyncio requests bs4


In [None]:
import os
from getpass import getpass

import nest_asyncio # Import nest_asyncio
nest_asyncio.apply() # Apply the patch for asyncio in Colab

from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings

from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_groq import ChatGroq

# MCP client (your file)
from mcp_client import call_tool

In [None]:
os.environ["GROQ_API_KEY"] = "xx"

In [None]:
from mcp_client import list_tools
print(list_tools())


meta=None nextCursor=None tools=[Tool(name='web_search', title=None, description='\n    Web search (DuckDuckGo via ddgs).\n    Returns list of {title, url, snippet}.\n    ', inputSchema={'properties': {'query': {'title': 'Query', 'type': 'string'}, 'max_results': {'default': 6, 'title': 'Max Results', 'type': 'integer'}}, 'required': ['query'], 'title': 'web_searchArguments', 'type': 'object'}, outputSchema={'properties': {'result': {'items': {'additionalProperties': True, 'type': 'object'}, 'title': 'Result', 'type': 'array'}}, 'required': ['result'], 'title': 'web_searchOutput', 'type': 'object'}, icons=None, annotations=None, meta=None, execution=None), Tool(name='fetch_url', title=None, description='\n    Downloads a page and returns cleaned visible text.\n    Useful after search, so LLM can read the page.\n    ', inputSchema={'properties': {'url': {'title': 'Url', 'type': 'string'}, 'timeout_s': {'default': 15, 'title': 'Timeout S', 'type': 'integer'}, 'max_chars': {'default': 800

In [None]:
UNIVERSITY_SITE = "uta.edu"  # change this

DB_DIR = os.path.join(os.getcwd(), "university_db")

llm = ChatGroq(model="llama-3.1-8b-instant", temperature=0)

embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

vectordb = Chroma(
    collection_name="university_assistant",
    embedding_function=embedding,
    persist_directory=DB_DIR,
)

retriever = vectordb.as_retriever(search_kwargs={"k": 4})


  embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

BertModel LOAD REPORT from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
  vectordb = Chroma(


In [None]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=150
)

def search_university_site(question: str, max_results: int = 6):
    query = f"site:{UNIVERSITY_SITE} {question}"
    return call_tool("web_search", query=query, max_results=max_results)

def extract_valid_urls(search_results):
    # Expected MCP output: list of dicts: {"title","url","snippet"}
    if isinstance(search_results, list):
        urls = []
        for r in search_results:
            if isinstance(r, dict):
                u = r.get("url")
                if u and u.startswith("http"):
                    urls.append(u)
        return urls
    return []

def fetch_page_text(url: str):
    # MCP tool
    result = call_tool("fetch_url", url=url)
    if isinstance(result, dict):
        return result.get("text")
    return None

def store_web_content(text: str, source_url: str):
    docs = [Document(page_content=text, metadata={"source": source_url})]
    splits = text_splitter.split_documents(docs)
    vectordb.add_documents(splits)
    vectordb.persist()


In [None]:
prompt = ChatPromptTemplate.from_messages([
    (
        "system",
        f"You are a university assistant for {UNIVERSITY_SITE}. "
        "Answer ONLY from the provided context. "
        "If the answer is not in context, respond exactly with: NOT_FOUND"
    ),
    ("human", "Question: {question}\n\nContext:\n{context}"),
])

qa_chain = prompt | llm | StrOutputParser()

def format_docs(docs):
    if not docs:
        return ""
    return "\n\n".join(d.page_content for d in docs)

def needs_web_search(answer: str) -> bool:
    normalized = (answer or "").strip().lower()
    return normalized in {"not_found", "not found", "not_found."} or not normalized

def answer_from_context(question: str):
    docs = retriever.invoke(question)
    context = format_docs(docs)
    answer = qa_chain.invoke({"question": question, "context": context})
    return answer.strip(), docs


In [None]:
def university_assistant(question: str, max_urls: int = 3):
    # 1) Try from existing DB
    answer, _ = answer_from_context(question)
    if not needs_web_search(answer):
        return answer

    # 2) Use MCP web search
    print("üîé Searching university website...")
    search_results = search_university_site(question, max_results=6)
    candidate_urls = extract_valid_urls(search_results)

    if not candidate_urls:
        return "Sorry, I couldn't find any relevant university pages to check."

    # 3) Fetch + store top pages
    ingested = 0
    for url in candidate_urls[:max_urls]:
        page_text = fetch_page_text(url)
        if not page_text:
            continue
        store_web_content(page_text, url)
        ingested += 1

    if ingested == 0:
        return "I checked the university site but could not ingest any usable content."

    # 4) Answer again after ingest
    print(f"üìö Stored {ingested} new page(s) into the vector DB. Re-running the answer...")
    refreshed_answer, _ = answer_from_context(question)

    if needs_web_search(refreshed_answer):
        return "I searched the university site but still cannot find a reliable answer."

    return refreshed_answer


In [None]:
def chat():
    print("üéì University Assistant is ready!")
    print("Type 'exit' or 'quit' to stop.\n")

    while True:
        try:
            q = input("Type your question here: ").strip()
            if not q:
                continue
            if q.lower() in ["exit", "quit"]:
                print("üëã Goodbye!")
                break

            response = university_assistant(q)
            print("\nAssistant:", response, "\n")

        except KeyboardInterrupt:
            print("\nüëã Goodbye!")
            break
        except Exception as e:
            print("‚ö†Ô∏è Error:", str(e))


In [None]:
chat()


üéì University Assistant is ready!
Type 'exit' or 'quit' to stop.


üëã Goodbye!


In [None]:
from mcp_client import call_tool

r = call_tool("web_search", query="site:uta.edu admissions", max_results=2)
print(type(r), r)


<class 'list'> [{'title': 'Admissions - Admissions - The University of Texas at Arlington', 'url': 'https://www.uta.edu/admissions/admissions-new/', 'snippet': 'Fall 2026 Admission Application Opens August 1, 2025 Mark your calendar! Our fall 2026 Admission Application Opens August 1, 2025. If you need help completing your application, please contact your admissions counselor.'}, {'title': 'Application Status - Admissions - The University of Texas at ...', 'url': 'https://www.uta.edu/admissions/apply/application-status', 'snippet': 'Under the admissions section, you will see the status of your application. If you have a status of "Incomplete," please refer to your to-do list for any items that we may still need from you.'}]


In [None]:
from mcp_client import call_tool

url = "https://www.uta.edu/admissions/apply"
page = call_tool("fetch_url", url=url)

print(type(page))
print(page.keys())
print(page["text"][:400])


<class 'dict'>
dict_keys(['url', 'status_code', 'text'])
Apply - Admissions - The University of Texas at Arlington Students Family Faculty & Staff Alumni Maps Events UTA West Visit Give Espa√±ol Search U T A Academics Admissions About UTA Campus Life Research News Athletics Apply Apply Undergraduate Graduate Request Information Undergraduate At The University of Texas at Arlington, you'll find a vibrant community with hundreds of student organizations, k
