From 356814b704d741d8595d0428c1319c1f4e99fe32 Mon Sep 17 00:00:00 2001 From: mahour Date: Mon, 22 Sep 2025 12:40:02 +0530 Subject: [PATCH 1/4] added pdf 2 md docs --- README.md | 2 +- gatsby-browser.js | 7 + gatsby-config.js | 13 + src/pages/apis/index.md | 2 +- .../howtos/pdf-accessibility-checker-api.md | 3 +- .../howtos/pdf-to-markdown-api.md | 126 ++++ src/pages/resources/openapi.json | 673 +++++++++++++++++- 7 files changed, 805 insertions(+), 21 deletions(-) create mode 100644 src/pages/overview/pdf-services-api/howtos/pdf-to-markdown-api.md diff --git a/README.md b/README.md index 6b9c9048e..1928c4338 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ ## How to develop -For local development, simply use : +For local development, simply use: ```bash $ yarn install diff --git a/gatsby-browser.js b/gatsby-browser.js index 3d146e8c7..19967e7cf 100644 --- a/gatsby-browser.js +++ b/gatsby-browser.js @@ -262,6 +262,13 @@ export const onRouteUpdate = ({ location, prevLocation }) => { ) { pageHeadTittle = "PDF Services API Extract PDF"; } else if ( + window.location.pathname.indexOf( + "pdf-services-api/howtos/pdf-to-markdown-api/" + ) >= 0 + ) { + pageHeadTittle = "PDF Services API PDF to Markdown API"; + } + else if ( window.location.pathname.indexOf( "pdf-services-api/howtos/pdf-properties/" ) >= 0 diff --git a/gatsby-config.js b/gatsby-config.js index 1a6250c89..a95e93273 100644 --- a/gatsby-config.js +++ b/gatsby-config.js @@ -33,6 +33,11 @@ module.exports = { description: 'Create, combine and export PDFs', path: '../document-services/apis/pdf-services/' }, + { + title: 'PDF to Markdown', + description: 'Convert PDF documents to Markdown format', + path: '../document-services/apis/pdf-to-markdown/' + }, { title: 'PDF Accessibility Auto-Tag', description: 'Auto-tag PDF content to improve accessibility', @@ -229,6 +234,10 @@ module.exports = { title: 'Extract PDF', path: 'overview/pdf-services-api/howtos/extract-pdf.md' }, + { + title: 'PDF to Markdown API', + path: 'overview/pdf-services-api/howtos/pdf-to-markdown-api.md' + }, { title: 'Get PDF Properties', path: 'overview/pdf-services-api/howtos/pdf-properties.md' @@ -716,6 +725,10 @@ module.exports = { title: 'Extract PDF', path: 'overview/legacy-documentation/pdf-services-api/howtos/extract-pdf.md' }, + { + title: 'PDF to Markdown API', + path: 'overview/legacy-documentation/pdf-services-api/howtos/pdf-to-markdown-api.md' + }, { title: 'Get PDF Properties', path: 'overview/legacy-documentation/pdf-services-api/howtos/pdf-properties.md' diff --git a/src/pages/apis/index.md b/src/pages/apis/index.md index 76ea35820..558ac5029 100644 --- a/src/pages/apis/index.md +++ b/src/pages/apis/index.md @@ -1,6 +1,6 @@ --- title: Adobe PDF Services Open API spec description: The OpenAPI spec for Adobe PDF Services API endpoints, parameters, and responses. -openAPISpec: https://raw.githubusercontent.com/AdobeDocs/pdfservices-api-documentation/main/src/pages/resources/openapi.json +openAPISpec: https://raw.githubusercontent.com/AdobeDocs/pdfservices-api-documentation/pdf2md/src/pages/resources/openapi.json --- -[] diff --git a/src/pages/overview/pdf-services-api/howtos/pdf-accessibility-checker-api.md b/src/pages/overview/pdf-services-api/howtos/pdf-accessibility-checker-api.md index a9fca7714..2422ad1d1 100644 --- a/src/pages/overview/pdf-services-api/howtos/pdf-accessibility-checker-api.md +++ b/src/pages/overview/pdf-services-api/howtos/pdf-accessibility-checker-api.md @@ -3,7 +3,7 @@ title: PDF Accessibility Checker | How Tos | PDF Services API | Adobe PDF Servic --- # PDF Accessibility Checker -The Accessibility Checker API verifies if PDF files meet the machine-verifiable requirements of PDF/UA and WCAG 2.0. It generates a report summarizing the findings of the accessibility checks. Additional human remediation may be required to ensure the reading order of elements is correct and that alternative text tags properly convey the meaning of images. The report contains links to documentation that assists in manually fixing problems using Adobe Acrobat Pro. +The Accessibility Checker API verifies if PDF files meet the machine-verifiable requirements of PDF/UA and WCAG. It generates a report summarizing the findings of the accessibility checks. Additional human remediation may be required to ensure the reading order of elements is correct and that alternative text tags properly convey the meaning of images. The report contains links to documentation that assists in manually fixing problems using Adobe Acrobat Pro. ## API Parameters @@ -316,7 +316,6 @@ curl --location --request POST 'https://pdf-services.adobe.io/operation/accessib }' ``` - ## Check accessibility for specified pages The sample below performs an accessibility check operation for specified pages of a given PDF. diff --git a/src/pages/overview/pdf-services-api/howtos/pdf-to-markdown-api.md b/src/pages/overview/pdf-services-api/howtos/pdf-to-markdown-api.md new file mode 100644 index 000000000..0926b0a22 --- /dev/null +++ b/src/pages/overview/pdf-services-api/howtos/pdf-to-markdown-api.md @@ -0,0 +1,126 @@ +--- +title: PDF to Markdown API | Adobe PDF Services +description: Learn about the PDF to Markdown API service that converts PDF documents into well-formatted Markdown text. +--- + +# PDF to Markdown API + +The PDF to Markdown API (included with the PDF Services API) is a cloud-based web service that automatically converts PDF documents – native or scanned – into well-formatted Markdown text. This service preserves the document's structure and formatting while converting it into a format that's widely used for LLM flows, content authoring and documentation. + +## Structured Information Output Format + +The output of a PDF to Markdown operation includes: + +- A primary `.md` file containing the converted Markdown content + +### Output Structure + +The following is a summary of key elements in the converted Markdown: + +#### Elements + +Ordered list of semantic elements converted from the PDF document, preserving the natural reading order and document structure. The conversion handles: + +- Text content with proper Markdown syntax +- Document hierarchy and structure +- Inline formatting and emphasis +- Links and references +- Images and figures +- Tables and complex layouts + +#### Content Types + +The API processes various content types as follows: + +##### Text Elements + +- **Headings**: Converted to appropriate Markdown heading levels (H1-H6) +- **Paragraphs**: Preserved with proper spacing and formatting +- **Lists**: Both ordered and unordered lists with proper nesting +- **Text Emphasis**: Bold, italic, and other text formatting +- **Links**: Preserved with proper Markdown link syntax + +##### Images and Figures + +- Provided as base64-embedded images in the Markdown output +- Referenced correctly in the Markdown output +- Original quality preserved +- Proper alt text and captions maintained + +##### Tables + +- Converted to Markdown table syntax +- Column alignment preserved +- Cell content formatting maintained +- Complex table structures supported + +#### Element Types and Paths + +The API recognizes and converts the following structural elements: + +| Category | Element Type | Description | +| --------- | ----------------- | --------------------------------------------------------- | +| Aside | Aside | Content which is not part of regular content flow | +| Figure | Figure | Non-reflowable constructs like graphs, images, flowcharts | +| Footnote | Footnote | Footnote | +| Headings | H, H1, H2, etc | Heading levels | +| List | L, Li, Lbl, Lbody | List and list item elements | +| Paragraph | P, ParagraphSpan | Paragraphs and paragraph segments | +| Reference | Reference | Links | +| Section | Sect | Logical section of the document | +| StyleSpan | StyleSpan | Styling variations within text | +| Table | Table, TD, TH, TR | Table elements | +| Title | Title | Document title | + +### Reading Order + +The reading order in the output Markdown maintains: + +- Natural document flow +- Proper content hierarchy +- Column-based layouts +- Page transitions +- Inline elements and references + +## Use Cases + +The PDF to Markdown API is particularly valuable for: + +- LLM-friendly content ingestion and prompt creation +- Training/Fine-tuning LLM with PDFs +- Content migration from PDF to documentation platforms +- Legacy document conversion +- Content repurposing for modern documentation systems +- Integration with Markdown-based workflows +- Automated document processing pipelines +- Searchable internal knowledge repositories + +## API Limitations + +### File Constraints + +- **File Size**: Maximum of 100MB per file +- **Page Count**: + - Non-scanned PDFs: Up to 400 pages + - Scanned PDFs: Up to 150 pages +- **Page Dimensions**: Between 6" and 17.5" in either dimension + +### Processing Limits + +- **Rate Limits**: Maximum 25 requests per minute +- **Language Support**: Optimized for English, supports other Latin-based languages +- **OCR Quality**: Dependent on scan quality (minimum 200 DPI recommended) + +### Document Requirements + +- Files must be unprotected or allow content copying +- No support for: + - Hidden objects (JavaScript, OCG) + - XFA and fillable forms + - Complex annotations + - CAD drawings or vector art + - Password-protected content + +## REST API + +See our public API Reference for [PDF to Markdown API](../../../apis/#tag/PDF-To-Markdown). diff --git a/src/pages/resources/openapi.json b/src/pages/resources/openapi.json index 93066ebf8..801cd9cb3 100644 --- a/src/pages/resources/openapi.json +++ b/src/pages/resources/openapi.json @@ -38,6 +38,10 @@ "name": "Extract PDF", "description": "Extract content from PDF documents and output it in a structured JSON format, along with tables and figures" }, + { + "name": "PDF To Markdown", + "description": "Convert PDF documents to Markdown format" + }, { "name": "Html to PDF", "description": "Convert HTML Resources to a PDF File" @@ -92,7 +96,7 @@ }, { "name": "PDF Accessibility Checker", - "description": "Accessibility Checker API will check PDF files to see if they meet the machine-verifiable requirements of PDF/UA and WCAG 2.0." + "description": "Accessibility Checker API will check PDF files to see if they meet the machine-verifiable requirements of PDF/UA and WCAG." }, { "name": "PDF Watermark", @@ -130,6 +134,7 @@ "Export PDF", "Export PDF Form Data", "Extract PDF", + "PDF To Markdown", "Html to PDF", "Import PDF Form Data", "Linearize PDF", @@ -938,14 +943,392 @@ } } }, - "/operation/documentgeneration": { + "/operation/documentgeneration": { + "post": { + "tags": [ + "Document Generation" + ], + "summary": "Merge Word based templates with input JSON data to create Word and PDF documents", + "description": "Merges the input JSON data with Word based templates to create dynamic documents. To learn more about document generation and document templates, please see the documentation.", + "operationId": "pdfoperations.documentgeneration", + "parameters": [ + { + "name": "Authorization", + "in": "header", + "description": "Bearer + Token (Learn more about getting the access token)", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "x-api-key", + "in": "header", + "description": "The clientId from the generated credentials", + "required": true, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "description": "Params for Document Generation Operation. Refer to the External Section below for using external storage with Document Generation API", + "content": { + "application/json": { + "schema": { + "oneOf": [ + { + "$ref": "#/components/schemas/DocumentGenerationInternal" + }, + { + "$ref": "#/components/schemas/DocumentGenerationExternal" + } + ] + } + } + }, + "required": true + }, + "responses": { + "201": { + "description": "Request creation for the operation and status uri generated, which can be found in the 'location' header.", + "headers": { + "location": { + "description": "Job status URI for polling the results", + "schema": { + "type": "string" + } + }, + "x-request-id": { + "description": "A unique value to identify this request. If x-request-id is specified in the POST request, this value will be the same as the input.", + "schema": { + "type": "string" + } + } + } + }, + "400": { + "description": "Bad Request. The request was invalid or cannot be otherwise served.", + "headers": { + "content-type": { + "description": "The content type of the POST API call response", + "schema": { + "type": "string" + } + }, + "x-request-id": { + "description": "A unique value to identify this request. If x-request-id is specified in the POST request, this value will be the same as the input.", + "schema": { + "type": "string" + } + } + }, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/400" + } + } + } + }, + "404": { + "description": "Resource Not Found.", + "headers": { + "content-type": { + "description": "The content type of the POST API call response", + "schema": { + "type": "string" + } + }, + "x-request-id": { + "description": "A unique value to identify this request. If x-request-id is specified in the POST request, this value will be the same as the input.", + "schema": { + "type": "string" + } + } + }, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/404" + } + } + } + }, + "401": { + "description": "Unauthorized.", + "headers": { + "content-type": { + "description": "The content type of the error JSON response.", + "schema": { + "type": "string" + } + }, + "x-request-id": { + "description": "A unique value to identify this request. If x-request-id is specified in the POST request, this value will be the same as the input.", + "schema": { + "type": "string" + } + } + }, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/401" + } + } + } + }, + "429": { + "description": "Caller doesn't have sufficient quota for this operation.", + "headers": { + "content-type": { + "description": "The content type of the POST API call response", + "schema": { + "type": "string" + } + }, + "x-request-id": { + "description": "A unique value to identify this request. If x-request-id is specified in the POST request, this value will be the same as the input.", + "schema": { + "type": "string" + } + } + }, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/429" + } + } + } + }, + "500": { + "description": "Internal Server Error. The server has encountered an error and is unable to process your request at this time.", + "headers": { + "content-type": { + "description": "The content type of the POST API call response", + "schema": { + "type": "string" + } + }, + "x-request-id": { + "description": "A unique value to identify this request. If x-request-id is specified in the POST request, this value will be the same as the input.", + "schema": { + "type": "string" + } + } + }, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/500" + } + } + } + } + }, + "x-codegen-request-body-name": "body" + } + }, + "/operation/documentgeneration/{jobID}/status": { + "get": { + "tags": [ + "Document Generation" + ], + "summary": "Poll the document generation job for completion", + "operationId": "pdfoperations.documentgeneration.jobstatus", + "parameters": [ + { + "name": "jobID", + "in": "path", + "description": "Job ID of the request", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "Authorization", + "in": "header", + "description": "Bearer + Token (Learn more about getting the access token)", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "x-api-key", + "in": "header", + "description": "The clientId from the generated credentials", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "A response with job information.", + "headers": { + "content-type": { + "description": "The content type of the status call response.", + "schema": { + "type": "string" + } + }, + "x-request-id": { + "description": "A unique value to identify this request. If x-request-id is specified in the POST request, this value will be the same as the input.", + "schema": { + "type": "string" + } + } + }, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DocumentGenerationJobStatus" + } + } + } + }, + "400": { + "description": "Bad Request. The request was invalid or cannot be otherwise served.", + "headers": { + "content-type": { + "description": "The content type of the status call response", + "schema": { + "type": "string" + } + }, + "x-request-id": { + "description": "A unique value to identify this request. If x-request-id is specified in the POST request, this value will be the same as the input.", + "schema": { + "type": "string" + } + } + }, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Status400" + } + } + } + }, + "404": { + "description": "Resource Not Found.", + "headers": { + "content-type": { + "description": "The content type of the POST API call response", + "schema": { + "type": "string" + } + }, + "x-request-id": { + "description": "A unique value to identify this request. If x-request-id is specified in the POST request, this value will be the same as the input.", + "schema": { + "type": "string" + } + } + }, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Satus404" + } + } + } + }, + "401": { + "description": "Unauthorized.", + "headers": { + "content-type": { + "description": "The content type of the POST API call response", + "schema": { + "type": "string" + } + }, + "x-server": { + "description": "The name and version of the server.", + "schema": { + "type": "string" + } + }, + "x-request-id": { + "description": "A unique value to identify this request. If x-request-id is specified in the POST request, this value will be the same as the input.", + "schema": { + "type": "string" + } + } + }, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Satus401" + } + } + } + }, + "429": { + "description": "Caller doesn't have sufficient quota for this operation.", + "headers": { + "content-type": { + "description": "The content type of the POST API call response", + "schema": { + "type": "string" + } + }, + "x-request-id": { + "description": "A unique value to identify this request. If x-request-id is specified in the POST request, this value will be the same as the input.", + "schema": { + "type": "string" + } + } + }, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Satus429" + } + } + } + }, + "500": { + "description": "Internal Server Error. The server has encountered an error and is unable to process your request at this time.", + "headers": { + "content-type": { + "description": "The content type of the POST API call response", + "schema": { + "type": "string" + } + }, + "x-request-id": { + "description": "A unique value to identify this request. If x-request-id is specified in the POST request, this value will be the same as the input.", + "schema": { + "type": "string" + } + } + }, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Satus500" + } + } + } + } + } + } + }, + "/operation/pdftomarkdown": { "post": { "tags": [ - "Document Generation" + "PDF To Markdown" ], - "summary": "Merge Word based templates with input JSON data to create Word and PDF documents", - "description": "Merges the input JSON data with Word based templates to create dynamic documents. To learn more about document generation and document templates, please see the documentation.", - "operationId": "pdfoperations.documentgeneration", + "summary": "Extract content from PDF documents and output it in a markdown format, along with tables and figures", + "description": "Extract PDF Content, Tables content and Tables/Figures renditions from a PDF document. Various available options are: \n\n1. Extract figures or images in base64 format", + "operationId": "pdfoperations.pdftomarkdown", "parameters": [ { "name": "Authorization", @@ -967,16 +1350,16 @@ } ], "requestBody": { - "description": "Params for Document Generation Operation. Refer to the External Section below for using external storage with Document Generation API", + "description": "Params for PDF to Markdown Operation. Refer to the External Section below for using external storage with PDF to Markdown Operation.", "content": { "application/json": { "schema": { "oneOf": [ { - "$ref": "#/components/schemas/DocumentGenerationInternal" + "$ref": "#/components/schemas/PDFToMarkdownInternal" }, { - "$ref": "#/components/schemas/DocumentGenerationExternal" + "$ref": "#/components/schemas/PDFToMarkdownExternal" } ] } @@ -1059,6 +1442,12 @@ "type": "string" } }, + "x-server": { + "description": "The name and version of the server.", + "schema": { + "type": "string" + } + }, "x-request-id": { "description": "A unique value to identify this request. If x-request-id is specified in the POST request, this value will be the same as the input.", "schema": { @@ -1126,13 +1515,13 @@ "x-codegen-request-body-name": "body" } }, - "/operation/documentgeneration/{jobID}/status": { + "/operation/pdftomarkdown/{jobID}/status": { "get": { "tags": [ - "Document Generation" + "PDF To Markdown" ], - "summary": "Poll the document generation job for completion", - "operationId": "pdfoperations.documentgeneration.jobstatus", + "summary": "Poll the PDF to markdown job for completion", + "operationId": "pdfoperations.pdftomarkdown.jobstatus", "parameters": [ { "name": "jobID", @@ -1182,7 +1571,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/DocumentGenerationJobStatus" + "$ref": "#/components/schemas/PDFToMarkdownJobStatus" } } } @@ -7388,7 +7777,7 @@ "PDF Accessibility Checker" ], "summary": "PDF Accessibility Checker Operation", - "description": "Accessibility Checker API will check PDF files to see if they meet the machine-verifiable requirements of PDF/UA and WCAG 2.0. It will generate a report that summarizes the findings of the accessibility checks. Additional human remediation may be required to ensure that the reading order of elements is correct and that alternative text tags properly convey the meaning of an image. The report contains links to documentation that assist in manually fixing problems using Adobe Acrobat Pro.", + "description": "Accessibility Checker API will check PDF files to see if they meet the machine-verifiable requirements of PDF/UA and WCAG. It will generate a report that summarizes the findings of the accessibility checks. Additional human remediation may be required to ensure that the reading order of elements is correct and that alternative text tags properly convey the meaning of an image. The report contains links to documentation that assist in manually fixing problems using Adobe Acrobat Pro.", "operationId": "pdfoperations.accessibilitychecker", "parameters": [ { @@ -8897,6 +9286,109 @@ }, "components": { "schemas": { + "PDFToMarkdownInternal": { + "title": "Internal", + "description": "Params for Extract PDF Operation", + "type": "object", + "required": [ + "assetID" + ], + "properties": { + "assetID": { + "description": "A file assetID. For more details click here .", + "type": "string" + }, + "getFigures": { + "description": "Extract figures or images in base64 format", + "type": "boolean", + "default": "false" + }, + "notifiers": { + "$ref": "#/components/schemas/notifiers" + } + }, + "example": { + "assetID": "urn:aaid:AS:UE1:23c30ee0-2c4d-46d6-87f2-087832fca718", + "getfigures": false, + "notifiers": [ + { + "type": "CALLBACK", + "data": { + "url": "https://dummy.callback.org/", + "headers": { + "x-api-key": "dummykey", + "access-token": "dummytoken" + } + } + } + ] + }, + "additionalProperties": false + }, + "PDFToMarkdownExternal": { + "title": "External", + "description": "Params for Extract PDF Operation using external storage.", + "type": "object", + "required": [ + "input" + ], + "properties": { + "input": { + "$ref": "#/components/schemas/ExternalStorageInput" + }, + "output": { + "$ref": "#/components/schemas/ExternalStorageOutputZip" + }, + "params": { + "description": "Params for Extract PDF processing", + "type": "object", + "properties": { + "tagEncapsulatedText": { + "description": "List of elements from which to extract encapsulated text content.", + "type": "array", + "items": { + "type": "string", + "enum": [ + "Figure" + ] + }, + "default": [ + "Figure" + ] + } + } + }, + "notifiers": { + "$ref": "#/components/schemas/notifiers" + } + }, + "example": { + "input": { + "uri": "https://dcplatformstorageservice-dev-us-east-1.s3-accelerate.amazonaws.com/dc-platformService-automation_dc-platformService-automation%40AdobeID/1c4f4674-ce8d-4b21-a69d-60aeae35bf43?X-Amz-Security-Token=FwoGZXIvYXdzEBkaDK%2By2wxl94khIbkxzCLTAQn6n6Wo0vFSul%2FpXW66aFX4T%2BPxtuOy%2Bz8eTxrnexeJRvMreBHNQm1myLwp20MkE%2Bb0H%2BwYgOhFaepi9AMml1aLNxXn1UPnEWJ7y8llhvsrXHimEfWvb3TMAkZddgUIDBue8oGUYqm4f2s0sMvPWBCxI45zM0%2F37EK%2B4JnIo1SlrKNm0GSZ44AEiOAhXupQ8ih6KoUbUciD3Biile6CwTMVIhME3mJiRSgVK6W91EaDn8%2Ba3mU%2BVvU1K9sgDSPZ%2F81DOpj25pvMW%2B1cMuCtUNsu9KUo7dHvpAYyLYiy%2FPGEmO9EquKjfMPRr17PAjeunD1QdgbRss4ysG%2B6XF2Has8zsGqX1sQalA%3D%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20230628T081557Z&X-Amz-SignedHeaders=host&X-Amz-Expires=3599&X-Amz-Credential=ASIAU5PA7W47IMX73XEA%2F20230628%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Signature=ac6998566dbbde22509b128fe94d1cb5d3146cd3fb8ba78d7068e10d61302ec2", + "storage": "S3" + }, + "output": { + "uri": "https://dcplatformstorageservice-dev-us-east-1.s3-accelerate.amazonaws.com/dc-platformService-automation_dc-platformService-automation%40AdobeID/f02f9927-4971-4589-8fdf-41ff56c2d520?X-Amz-Security-Token=FwoGZXIvYXdzEBkaDK%2By2wxl94khIbkxzCLTAQn6n6Wo0vFSul%2FpXW66aFX4T%2BPxtuOy%2Bz8eTxrnexeJRvMreBHNQm1myLwp20MkE%2Bb0H%2BwYgOhFaepi9AMml1aLNxXn1UPnEWJ7y8llhvsrXHimEfWvb3TMAkZddgUIDBue8oGUYqm4f2s0sMvPWBCxI45zM0%2F37EK%2B4JnIo1SlrKNm0GSZ44AEiOAhXupQ8ih6KoUbUciD3Biile6CwTMVIhME3mJiRSgVK6W91EaDn8%2Ba3mU%2BVvU1K9sgDSPZ%2F81DOpj25pvMW%2B1cMuCtUNsu9KUo7dHvpAYyLYiy%2FPGEmO9EquKjfMPRr17PAjeunD1QdgbRss4ysG%2B6XF2Has8zsGqX1sQalA%3D%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20230628T081559Z&X-Amz-SignedHeaders=content-type%3Bhost&X-Amz-Expires=3600&X-Amz-Credential=ASIAU5PA7W47IMX73XEA%2F20230628%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Signature=4f765277eb6e36bd5f7bf9d42be24244440092b5a705eadf178c78a9a9fb5d71", + "storage": "S3" + }, + "params": { + "getfigures": true + }, + "notifiers": [ + { + "type": "CALLBACK", + "data": { + "url": "https://dummy.callback.org/", + "headers": { + "x-api-key": "dummykey", + "access-token": "dummytoken" + } + } + } + ] + }, + "additionalProperties": false + }, "Token": { "description": "Params for generating the Access Token.", "type": "object", @@ -13005,6 +13497,19 @@ } ] }, + "PDFToMarkdownJobStatus": { + "oneOf": [ + { + "$ref": "#/components/schemas/inprogress" + }, + { + "$ref": "#/components/schemas/done" + }, + { + "$ref": "#/components/schemas/failed" + } + ] + }, "HtmlToPdfJobStatus": { "oneOf": [ { @@ -14098,6 +14603,17 @@ } ] }, + "PDFToMarkdownDone": { + "title": "done", + "oneOf": [ + { + "$ref": "#/components/schemas/PDFToMarkdownDoneInternal" + }, + { + "$ref": "#/components/schemas/PDFToMarkdownDoneExternal" + } + ] + }, "ExtractPDFDoneInternal": { "title": "Internal", "description": "Response in case of 'done' status", @@ -14193,17 +14709,140 @@ } } }, - "ExtractPDFDoneExternal": { + "PDFToMarkdownDoneInternal": { + "title": "Internal", + "description": "Response in case of 'done' status", + "type": "object", + "properties": { + "status": { + "description": "Job Status", + "type": "string", + "enum": [ + "done" + ] + }, + "content": { + "type": "object", + "description": "Asset of json file containig extracted content of pdf file. For more details click here .", + "properties": { + "metadata": { + "type": "object", + "description": "metadata details of output asset.", + "properties": { + "size": { + "description": "The size of the Resource in bytes. This value helps in making range requests.", + "type": "integer" + }, + "type": { + "description": "The media type of the Resource.", + "type": "string", + "enum": [ + "application/json" + ] + } + } + }, + "assetID": { + "description": "An asset ID identifying an asset that is globally unique and never reused.", + "type": "string" + }, + "downloadUri": { + "description": "The URL used to download the Resource directly to the cloud provider.", + "type": "string" + } + } + }, + "resource": { + "type": "object", + "description": "Asset of zip file containg generated resources from extract operation. For more details click here .", + "properties": { + "metadata": { + "type": "object", + "description": "metadata details of output asset.", + "properties": { + "size": { + "description": "The size of the Resource in bytes. This value helps in making range requests.", + "type": "integer" + }, + "type": { + "description": "The media type of the Resource.", + "type": "string", + "enum": [ + "application/json" + ] + } + } + }, + "assetID": { + "description": "An asset ID identifying an asset that is globally unique and never reused.", + "type": "string" + }, + "downloadUri": { + "description": "The URL used to download the Resource directly to the cloud provider.", + "type": "string" + } + } + } + }, + "example": { + "status": "done", + "content": { + "metadata": { + "type": "application/json", + "size": 200791 + }, + "downloadUri": "https://dcplatformstorageservice-dev-us-east-1.s3-accelerate.amazonaws.com/6bb12fd8-3233-4340-916b-4835917857be?X-Amz-Security-Token=FwoGZXIvYXdzEE0aDOLg7PFwVB1bjEMxQCLTAe3pjf%2Fgl2Pj%2FcjOaY%2BHfduju9SXTp1Pn8C4GapIXm%2F8tuR4cGPYGC0goU21qZxCq9R%2F8z2bOmB2EL%2BZrhbPLbaNcpzf5Vud%2B3Bmn61MEJiBdU%2BhZqasX5YhVxdnzhfpl5KfKeq2kwROVMqJcyHGdxw5h0%2Bi0sD2I8sqkbPmnBi0WOtYNwz7TQq42oe8W5KYHpq6WMya9OQgx0u7qg0inYwBnQu5UQ9NJJQY2MSU11IuZ0uE%2B%2FNAPuq3VfEIn3txK%2FFfIxGz9%2BInehhHroKjBFULzy0olY2rlwYyLQDwHHr2eyuy%2BaoGVaq%2BSUNH8T0OKIicbbZfQ5wjF0hK2FzVXwfebtDG4qq%2BiA%3D%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20220803T190422Z&X-Amz-SignedHeaders=host&X-Amz-Expires=3600&X-Amz-Credential=ASIAU5PA7W47AH3PA2JV%2F20220803%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Signature=5d8c3f421c68c009b4471919a319ba460495c72afe51674d23266bc124fe9a56", + "assetID": "urn:aaid:AS:UE1:23c30ee0-2c4d-46d6-87f2-087832fca718" + }, + "resource": { + "metadata": { + "type": "application/zip", + "size": 200791 + }, + "downloadUri": "https://dcplatformstorageservice-dev-us-east-1.s3-accelerate.amazonaws.com/6bb12fd8-3233-4340-916b-4835917857be?X-Amz-Security-Token=FwoGZXIvYXdzEE0aDOLg7PFwVB1bjEMxQCLTAe3pjf%2Fgl2Pj%2FcjOaY%2BHfduju9SXTp1Pn8C4GapIXm%2F8tuR4cGPYGC0goU21qZxCq9R%2F8z2bOmB2EL%2BZrhbPLbaNcpzf5Vud%2B3Bmn61MEJiBdU%2BhZqasX5YhVxdnzhfpl5KfKeq2kwROVMqJcyHGdxw5h0%2Bi0sD2I8sqkbPmnBi0WOtYNwz7TQq42oe8W5KYHpq6WMya9OQgx0u7qg0inYwBnQu5UQ9NJJQY2MSU11IuZ0uE%2B%2FNAPuq3VfEIn3txK%2FFfIxGz9%2BInehhHroKjBFULzy0olY2rlwYyLQDwHHr2eyuy%2BaoGVaq%2BSUNH8T0OKIicbbZfQ5wjF0hK2FzVXwfebtDG4qq%2BiA%3D%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20220803T190422Z&X-Amz-SignedHeaders=host&X-Amz-Expires=3600&X-Amz-Credential=ASIAU5PA7W47AH3PA2JV%2F20220803%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Signature=5d8c3f421c68c009b4471919a319ba460495c72afe51674d23266bc124fe9a56", + "assetID": "urn:aaid:AS:UE1:23c30ee0-32s3-46d6-87f2-087832fca718" + } + } + }, + "PDFToMarkdownDoneExternal": { "title": "External", "oneOf": [ { - "$ref": "#/components/schemas/ExtractPDFDoneExternalWithoutOutput" + "$ref": "#/components/schemas/PDFToMarkdownDoneExternalWithoutOutput" }, { "$ref": "#/components/schemas/external" } ] }, + "PDFToMarkdownDoneExternalWithoutOutput": { + "title": "External without an output URI in the request", + "description": "Response in case of 'done' status when ouput uri is not passed in the rquest", + "type": "object", + "properties": { + "status": { + "description": "Job Status", + "type": "string", + "enum": [ + "done" + ] + }, + "resource": { + "$ref": "#/components/schemas/responseAssetZip" + } + }, + "example": { + "status": "done", + "resource": { + "metadata": { + "type": "application/pdf", + "size": 200791 + }, + "downloadUri": "https://dcplatformstorageservice-dev-us-east-1.s3-accelerate.amazonaws.com/6bb12fd8-3233-4340-916b-4835917857be?X-Amz-Security-Token=FwoGZXIvYXdzEE0aDOLg7PFwVB1bjEMxQCLTAe3pjf%2Fgl2Pj%2FcjOaY%2BHfduju9SXTp1Pn8C4GapIXm%2F8tuR4cGPYGC0goU21qZxCq9R%2F8z2bOmB2EL%2BZrhbPLbaNcpzf5Vud%2B3Bmn61MEJiBdU%2BhZqasX5YhVxdnzhfpl5KfKeq2kwROVMqJcyHGdxw5h0%2Bi0sD2I8sqkbPmnBi0WOtYNwz7TQq42oe8W5KYHpq6WMya9OQgx0u7qg0inYwBnQu5UQ9NJJQY2MSU11IuZ0uE%2B%2FNAPuq3VfEIn3txK%2FFfIxGz9%2BInehhHroKjBFULzy0olY2rlwYyLQDwHHr2eyuy%2BaoGVaq%2BSUNH8T0OKIicbbZfQ5wjF0hK2FzVXwfebtDG4qq%2BiA%3D%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20220803T190422Z&X-Amz-SignedHeaders=host&X-Amz-Expires=3600&X-Amz-Credential=ASIAU5PA7W47AH3PA2JV%2F20220803%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Signature=5d8c3f421c68c009b4471919a319ba460495c72afe51674d23266bc124fe9a56", + "assetID": "urn:aaid:AS:UE1:23c30ee0-2c4d-46d6-87f2-087832fca718" + } + } + }, "ExtractPDFDoneExternalWithoutOutput" : { "title": "External without an output URI in the request", "description": "Response in case of 'done' status when ouput uri is not passed in the rquest", From bac5919a3a2b405e8559e50227e5a8ea6c4b93ec Mon Sep 17 00:00:00 2001 From: mahour Date: Mon, 22 Sep 2025 12:55:48 +0530 Subject: [PATCH 2/4] added pdf 2 md docs --- src/pages/resources/openapi.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pages/resources/openapi.json b/src/pages/resources/openapi.json index 801cd9cb3..ebad1ed81 100644 --- a/src/pages/resources/openapi.json +++ b/src/pages/resources/openapi.json @@ -1326,7 +1326,7 @@ "tags": [ "PDF To Markdown" ], - "summary": "Extract content from PDF documents and output it in a markdown format, along with tables and figures", + "summary": "Extract content from PDF documents and output it in a md format, along with tables and figures", "description": "Extract PDF Content, Tables content and Tables/Figures renditions from a PDF document. Various available options are: \n\n1. Extract figures or images in base64 format", "operationId": "pdfoperations.pdftomarkdown", "parameters": [ From 88acf0fc42fbec671b0488e8c649c7e8e899bcab Mon Sep 17 00:00:00 2001 From: mahour Date: Mon, 22 Sep 2025 13:03:52 +0530 Subject: [PATCH 3/4] added pdf 2 md docs --- src/pages/apis/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pages/apis/index.md b/src/pages/apis/index.md index 558ac5029..d6835613c 100644 --- a/src/pages/apis/index.md +++ b/src/pages/apis/index.md @@ -1,6 +1,6 @@ --- title: Adobe PDF Services Open API spec description: The OpenAPI spec for Adobe PDF Services API endpoints, parameters, and responses. -openAPISpec: https://raw.githubusercontent.com/AdobeDocs/pdfservices-api-documentation/pdf2md/src/pages/resources/openapi.json +openAPISpec: https://raw.githubusercontent.com/AdobeDocs/pdfservices-api-documentation/develop/src/pages/resources/openapi.json --- -[] From 1c5e3bd43d50b8335d8339ad2266160450683ac9 Mon Sep 17 00:00:00 2001 From: mahour Date: Mon, 22 Sep 2025 13:18:46 +0530 Subject: [PATCH 4/4] added pdf 2 md docs --- src/pages/resources/openapi.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pages/resources/openapi.json b/src/pages/resources/openapi.json index ebad1ed81..801cd9cb3 100644 --- a/src/pages/resources/openapi.json +++ b/src/pages/resources/openapi.json @@ -1326,7 +1326,7 @@ "tags": [ "PDF To Markdown" ], - "summary": "Extract content from PDF documents and output it in a md format, along with tables and figures", + "summary": "Extract content from PDF documents and output it in a markdown format, along with tables and figures", "description": "Extract PDF Content, Tables content and Tables/Figures renditions from a PDF document. Various available options are: \n\n1. Extract figures or images in base64 format", "operationId": "pdfoperations.pdftomarkdown", "parameters": [