In [29]:
from mcp.server.fastmcp import FastMCP
import httpx
# import asyncio
from urllib.parse import urljoin
import requests
import os
# Create MCP server
mcp = FastMCP("Jina Content Extractor")

JINA_BASE_URL = "https://r.jina.ai/"

print("✓ Created Jina Content Extractor MCP server")



✓ Created Jina Content Extractor MCP server


### Tool 1: Extract Content

In [18]:

@mcp.tool()
def extract_content(url: str) -> str:
    """Extract clean content from URL using Jina Reader"""
    try:
        jina_url = urljoin(JINA_BASE_URL, url)
        response = requests.get(jina_url, timeout=30)
        
        if response.status_code == 200:
            content = response.text
            return f"Content from {url}:\n\n{content[:1000]}..."
        else:
            return f"Extraction failed: HTTP {response.status_code}"
    except Exception as e:
        return f"Error: {str(e)}"

print("✓ Added extract_content tool")



✓ Added extract_content tool


### Tool 2: Extract with Markdown

In [19]:

@mcp.tool()
def extract_markdown(url: str) -> str:
    """Extract content as markdown"""
    try:
        jina_url = f"{JINA_BASE_URL}{url}"
        response = requests.get(jina_url, params={"format": "markdown"}, timeout=30)
        
        if response.status_code == 200:
            return f"Markdown from {url}:\n\n{response.text[:800]}..."
        else:
            return f"Failed: HTTP {response.status_code}"
    except Exception as e:
        return f"Error: {str(e)}"

print("✓ Added extract_markdown tool")



✓ Added extract_markdown tool


In [20]:
# Resource: Supported formats
@mcp.resource("formats://supported")
def get_supported_formats() -> str:
    """List supported extraction formats"""
    formats = [
        "Plain text (default)",
        "Markdown",
        "JSON structured",
        "HTML cleaned"
    ]
    
    return "Supported extraction formats:\n" + "\n".join([
        f"• {fmt}" for fmt in formats
    ])

print("✓ Added formats resource")


✓ Added formats resource


In [22]:
def test_jina_extractor():
    """Test Jina extraction"""
    test_urls = [
        "https://arxiv.org/abs/2301.07041",
        "https://github.com/microsoft/mcp-for-beginners"
    ]
    
    for url in test_urls:
        print(f"\nExtracting: {url}")
        result = extract_content(url)
        print(f"Result: {result[:1000]}...")

print("✓ Jina extractor ready")
test_jina_extractor()

✓ Jina extractor ready

Extracting: https://arxiv.org/abs/2301.07041
Result: Content from https://arxiv.org/abs/2301.07041:

<!DOCTYPE html>
<html lang="en">

<head>  <title>[2301.07041] Verifiable Fully Homomorphic Encryption</title>
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <link rel="apple-touch-icon" sizes="180x180" href="/static/browse/0.3.4/images/icons/apple-touch-icon.png">
  <link rel="icon" type="image/png" sizes="32x32" href="/static/browse/0.3.4/images/icons/favicon-32x32.png">
  <link rel="icon" type="image/png" sizes="16x16" href="/static/browse/0.3.4/images/icons/favicon-16x16.png">
  <link rel="manifest" href="/static/browse/0.3.4/images/icons/site.webmanifest">
  <link rel="mask-icon" href="/static/browse/0.3.4/images/icons/safari-pinned-tab.svg" color="#5bbad5">
  <meta name="msapplication-TileColor" content="#da532c">
  <meta name="theme-color" content="#ffffff">
  <link rel="stylesheet" type="text/css" media="screen" href="/static/brow

In [24]:
# Test markdown extraction
def test_markdown():
    print("\n=== Testing Markdown Extraction ===")
    
    test_urls = [
        "https://arxiv.org/abs/2301.07041",
        "https://github.com/microsoft/mcp-for-beginners"
    ]
    
    for url in test_urls:
        print(f"\nMarkdown from: {url}")
        result = extract_markdown(url)
        print(f"Result: {result[:200]}...")

# Run test
test_markdown()


=== Testing Markdown Extraction ===

Markdown from: https://arxiv.org/abs/2301.07041
Result: Markdown from https://arxiv.org/abs/2301.07041:

Title: Verifiable Fully Homomorphic Encryption

URL Source: https://arxiv.org/abs/2301.07041?format=markdown

Markdown Content:
[2301.07041] Verifiable...

Markdown from: https://github.com/microsoft/mcp-for-beginners
Result: Markdown from https://github.com/microsoft/mcp-for-beginners:

Title: GitHub - microsoft/mcp-for-beginners: This open-source curriculum is designed to teach the concepts and fundamentals of the Model ...


In [23]:
def extract_clean_text(url: str) -> str:
    try:
        # Try different Jina URL format
        clean_url = f"https://r.jina.ai/{url}?format=text"
        response = requests.get(clean_url, timeout=30)
        return response.text[:1000] if response.status_code == 200 else "Failed"
    except Exception as e:
        return f"Error: {e}"

# Test
result = extract_clean_text("https://arxiv.org/abs/2301.07041")
print(result)

Title: Verifiable Fully Homomorphic Encryption

URL Source: https://arxiv.org/abs/2301.07041?format=text

Markdown Content:
[2301.07041] Verifiable Fully Homomorphic Encryption


[Skip to main content](https://arxiv.org/abs/2301.07041?format=text#content)

[![Image 1: Cornell University Logo](https://arxiv.org/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg)](https://www.cornell.edu/)

We gratefully acknowledge support from the Simons Foundation, [member institutions](https://info.arxiv.org/about/ourmembers.html), and all contributors.[Donate](https://info.arxiv.org/about/donate.html)

[](https://arxiv.org/IgnoreMe)

[![Image 2: arxiv logo](https://arxiv.org/static/browse/0.3.4/images/arxiv-logo-one-color-white.svg)](https://arxiv.org/)>[cs](https://arxiv.org/list/cs/recent)> arXiv:2301.07041 

[Help](https://info.arxiv.org/help) | [Advanced Search](https://arxiv.org/search/advanced)

Search

[![Image 3: arXiv logo](https://arxiv.org/static/browse/0.


In [31]:
@mcp.tool()
def extract_arxiv_pdf(paper_id: str) -> str:
    """Extract PDF content from ArXiv paper"""
    try:
        import requests
        from PyPDF2 import PdfReader
        from io import BytesIO
        
        # Download PDF
        pdf_url = f"https://arxiv.org/pdf/{paper_id}.pdf"
        response = requests.get(pdf_url)
        
        if response.status_code == 200:
            # Extract text from PDF
            pdf_reader = PdfReader(BytesIO(response.content))
            text = ""
            for page in pdf_reader.pages[:3]:  # First 3 pages
                text += page.extract_text()
            
            return f"PDF content from {paper_id}:\n\n{text[:1000]}..."
        else:
            return f"PDF download failed: {response.status_code}"
            
    except Exception as e:
        return f"PDF extraction error: {str(e)}"

# Test
result = extract_arxiv_pdf("2301.07041")
print(result)

PDF content from 2301.07041:

arXiv:2301.07041v2  [cs.CR]  11 Feb 2023Veriﬁable Fully Homomorphic Encryption
Alexander Viand*, Christian Knabenhans*, Anwar Hithnawi
ETH Zurich
Abstract —Fully Homomorphic Encryption (FHE) is seeing in-
creasing real-world deployment to protect data in use by all ow-
ing computation over encrypted data. However, the same mal-
leability that enables homomorphic computations also rais es
integrity issues, which have so far been mostly overlooked.
While FHE’s lack of integrity has obvious implications for
correctness, it also has severe implications for conﬁdenti ality:
a malicious server can leverage the lack of integrity to carr y out
interactive key-recovery attacks. As a result, virtually a ll FHE
schemes and applications assume an honest-but-curious ser ver
who does not deviate from the protocol. In practice, however ,
this assumption is insufﬁcient for a wide range of deploymen t
scenarios. While there has been work that aims to address
this gap, thes

In [33]:
from dotenv import load_dotenv
load_dotenv("/home/siamai/deepsad/local-deep-researcher/.env")

True

In [34]:
@mcp.tool()
def extract_arxiv_pdf(paper_id: str) -> str:
    """Extract ArXiv PDF content using Jina"""
    try:
        pdf_url = f"https://arxiv.org/pdf/{paper_id}.pdf"
        jina_url = f"https://r.jina.ai/{pdf_url}"
        jina_api_key = os.getenv("JINA_API_KEY")
        if not jina_api_key:
            return "Error: JINA_API_KEY environment variable not set"
        headers = {"Authorization": f"Bearer {jina_api_key}"}
        response = requests.get(jina_url, headers=headers, timeout=60)
        
        if response.status_code == 200:
            content = response.text
            return f"PDF content from {paper_id}:\n\n{content[:1500]}..."
        else:
            return f"PDF extraction failed: HTTP {response.status_code}"
            
    except Exception as e:
        return f"Error: {str(e)}"


# Test PDF extraction
def test_pdf_extraction():
    print("=== Testing PDF Extraction ===")
    
    # Test ArXiv paper PDF
    result = extract_arxiv_pdf("2301.07041")
    print(f"ArXiv PDF: {result[:1000]}...")
    
# Run test
test_pdf_extraction()

=== Testing PDF Extraction ===
ArXiv PDF: PDF content from 2301.07041:

Title: 

URL Source: https://arxiv.org/pdf/2301.07041.pdf

Markdown Content:
> arXiv:2301.07041v2 [cs.CR] 11 Feb 2023

# Verifiable Fully Homomorphic Encryption 

Alexander Viand *, Christian Knabenhans *, Anwar Hithnawi 

ETH Zurich 

Abstract —Fully Homomorphic Encryption (FHE) is seeing in-creasing real-world deployment to protect data in use by allow-ing computation over encrypted data. However, the same mal-leability that enables homomorphic computations also raises 

integrity issues, which have so far been mostly overlooked. While FHE’s lack of integrity has obvious implications for correctness, it also has severe implications for confidentiality: a malicious server can leverage the lack of integrity to carry out interactive key-recovery attacks. As a result, virtually all FHE schemes and applications assume an honest-but-curious server who does not deviate from the protocol. In practice, however, this assum

In [35]:
@mcp.tool()
def extract_any_pdf(pdf_url: str) -> str:
    """Extract any PDF using Jina"""
    try:
        jina_url = f"https://r.jina.ai/{pdf_url}"
        jina_api_key = os.getenv("JINA_API_KEY")
        if not jina_api_key:
            return "Error: JINA_API_KEY environment variable not set"
        headers = {"Authorization": f"Bearer {jina_api_key}"}
        response = requests.get(jina_url, headers=headers, timeout=60)
        
        if response.status_code == 200:
            return f"PDF content:\n\n{response.text[:1500]}..."
        else:
            return f"Failed: HTTP {response.status_code}"
            
    except Exception as e:
        return f"Error: {str(e)}"
    
# Test any PDF extraction
def test_any_pdf_extraction():
    print("=== Testing Any PDF Extraction ===")
    
    # Test any PDF URL
    pdf_url = "https://arxiv.org/pdf/2301.07041.pdf"
    result = extract_any_pdf(pdf_url)
    print(f"PDF content: {result[:1000]}...")
# Run test
test_any_pdf_extraction()

=== Testing Any PDF Extraction ===
PDF content: PDF content:

Title: 

URL Source: https://arxiv.org/pdf/2301.07041.pdf

Markdown Content:
> arXiv:2301.07041v2 [cs.CR] 11 Feb 2023

# Verifiable Fully Homomorphic Encryption 

Alexander Viand *, Christian Knabenhans *, Anwar Hithnawi 

ETH Zurich 

Abstract —Fully Homomorphic Encryption (FHE) is seeing in-creasing real-world deployment to protect data in use by allow-ing computation over encrypted data. However, the same mal-leability that enables homomorphic computations also raises 

integrity issues, which have so far been mostly overlooked. While FHE’s lack of integrity has obvious implications for correctness, it also has severe implications for confidentiality: a malicious server can leverage the lack of integrity to carry out interactive key-recovery attacks. As a result, virtually all FHE schemes and applications assume an honest-but-curious server who does not deviate from the protocol. In practice, however, this assumption is i