In [10]:
import fitz  # PyMuPDF for PDFs

doc = fitz.open("/Users/nageshmuniraja/Downloads/MCP_DOC.pdf")
text = ""
for page in doc:
    text += page.get_text()
chunks = text.split("\n\n")
print(chunks)

['Building MCP servers for deep research\nBuild an MCP server to use with deep research via API or ChatGPT.\nModel Context Protocol (MCP) is an open protocol that\'s becoming the industry standard for\nextending AI models with additional tools and knowledge. Remote MCP servers can be used\nto connect models over the Internet to new data sources and capabilities.\nIn this guide, we\'ll cover how to build a remote MCP server that reads data from a private data\nsource (a vector store) and makes it available to deep research models in ChatGPT via\nconnectors, and via API.\nYou can use data from any source to power a remote MCP server, but for simplicity, we will use\nvector stores in the OpenAI API. Begin by uploading a PDF document to a new vector store -\nyou can use this public domain 19th century book about cats for an example.\nYou can upload files and create a vector store in the dashboard here, or you can create vector\nstores and upload files via API. Follow the vector store guide

In [30]:
from pinecone import Pinecone, ServerlessSpec

# Initialize Pinecone
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

# Check if index exists
index_name = "support-knowledge"
if index_name not in [i.name for i in pc.list_indexes()]:
    pc.create_index(
        name=index_name,
        dimension=1536,  # Match your embedding model (OpenAI = 1536 for text-embedding-ada-002)
        metric="cosine",
        spec=ServerlessSpec(
            cloud="aws",        # or "gcp"
            region="us-east-1"  # or the region matching your Pinecone project
        )
    )
    print(f"✅ Created index '{index_name}'")
else:
    print(f"ℹ️ Index '{index_name}' already exists.")

# Get the index
index = pc.Index(index_name)


✅ Created index 'support-knowledge'


In [51]:
from dotenv import load_dotenv
import os
from langchain_community.embeddings.openai import OpenAIEmbeddings
from pinecone import Pinecone, ServerlessSpec

# Load environment variables from .env
load_dotenv()

# Read your API keys
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_ENVIRONMENT = os.getenv("PINECONE_ENVIRONMENT")  # e.g., "gcp-starter", "us-east1-gcp"

# Use OpenAI to generate embeddings
embedding = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
vectors = embedding.embed_documents(chunks)

# Initialize Pinecone
pc= Pinecone(api_key=PINECONE_API_KEY)

# Connect to the index
index = pc.Index("support-knowledge")

# Upsert embeddings
# index.upsert([(f"id_{i}", vector) for i, vector in enumerate(vectors)])
index.upsert([
    (f"id_{i}", vector, {"text": chunk})
    for i, (vector, chunk) in enumerate(zip(vectors, chunks))
])

print(f"✅ Upserted {len(vectors)} vectors to Pinecone index 'support-knowledge'")


✅ Upserted 1 vectors to Pinecone index 'support-knowledge'


In [52]:
def retrieve_relevant_chunks(user_query, embedding, index, k=3):
    query_vec = embedding.embed_query(user_query)
    results = index.query(vector=query_vec, top_k=k, include_metadata=True)
    return [r['metadata']['text'] for r in results['matches']]


In [66]:
query = 'highlight the summary of document'

In [67]:
import openai
context = "\n".join(retrieve_relevant_chunks(query, embedding, index))

prompt = f"""
You are a helpful support agent. Using ONLY the context below, answer the user's question.
Context:
{context}

User Query: {query}
"""
client = openai.OpenAI(api_key=OPENAI_API_KEY)

response = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[{"role": "user", "content": prompt}]
)

print(response.choices[0].message.content)



The document discusses building MCP servers for deep research, creating remote MCP servers, connecting them to ChatGPT, implementing search and fetch tools, handling authentication, risks associated with custom MCP servers, and connecting to trusted servers.
