-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathdoc.ts
29 lines (25 loc) · 850 Bytes
/
doc.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
// source/parsers/docx.ts
// The text extracter for DOCX files.
import { type Buffer } from 'buffer/'
import { extractRawText as parseWordFile } from 'mammoth'
import type { TextExtractionMethod } from '../lib.js'
export class DocExtractor implements TextExtractionMethod {
/**
* The type(s) of input acceptable to this method.
*/
mimes = [
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
]
/**
* Extract text from a DOCX file if possible.
*
* @param payload The input and its type.
* @returns The text extracted from the input.
*/
apply = async (input: Buffer): Promise<string> => {
// Convert the DOCX to text and return the text.
// @ts-expect-error: see feross/buffer#353, the types are incomplete.
const parsedDocx = await parseWordFile({ buffer: input })
return parsedDocx.value
}
}