diff --git a/.travis.yml b/.travis.yml index 6479235..de72154 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,6 @@ language: node_js node_js: - - "12" + - "14" install: - npm install --global codecov diff --git a/src/extractors.spec.ts b/src/extractors.spec.ts index 1bcb286..893a79a 100644 --- a/src/extractors.spec.ts +++ b/src/extractors.spec.ts @@ -1,10 +1,10 @@ import { FieldConfig } from './types'; import { extract } from './extractors'; -import { parseDOM } from 'htmlparser2'; +import { parseDocument } from 'htmlparser2'; describe('Extractors', () => { it('should work when extract = text', () => { - const nodes = parseDOM(`

Title

`); + const nodes = parseDocument(`

Title

`).children; const config = { extractor: { name: 'text', args: [] }, @@ -16,7 +16,7 @@ describe('Extractors', () => { expect(result).toEqual('Title'); }); it('should work when extract = text and when selector do not match anything', () => { - const nodes = parseDOM(`

Title

`); + const nodes = parseDocument(`

Title

`).children; const config = { extractor: { name: 'text', args: [] }, @@ -28,7 +28,7 @@ describe('Extractors', () => { expect(result).toEqual(''); }); it('should work when extract = prop', () => { - const nodes = parseDOM(`Link`); + const nodes = parseDocument(`Link`).children; const config = { extractor: { name: 'prop', args: ['href'] }, @@ -40,9 +40,9 @@ describe('Extractors', () => { expect(result).toEqual('a-super-link'); }); it('should work when extract = html', () => { - const nodes = parseDOM( + const nodes = parseDocument( `
Link
`, - ); + ).children; const config = { extractor: { name: 'html', args: [] }, @@ -54,9 +54,9 @@ describe('Extractors', () => { expect(result).toEqual('Link'); }); it('should work when extract = html and when selector do not match anything', () => { - const nodes = parseDOM( + const nodes = parseDocument( `
Link
`, - ); + ).children; const config = { extractor: { name: 'html', args: [] }, @@ -68,9 +68,9 @@ describe('Extractors', () => { expect(result).toEqual(''); }); it('should work when extract = outerHtml', () => { - const nodes = parseDOM( + const nodes = parseDocument( `
Link
`, - ); + ).children; const config = { extractor: { name: 'outerHtml', args: [] }, @@ -82,9 +82,9 @@ describe('Extractors', () => { expect(result).toEqual('
Link
'); }); it('should work when extract = outerHtml and when selector do not match anything', () => { - const nodes = parseDOM( + const nodes = parseDocument( `
Link
`, - ); + ).children; const config = { extractor: { name: 'outerHtml', args: [] }, @@ -96,7 +96,7 @@ describe('Extractors', () => { expect(result).toEqual(''); }); it('should work when extract = css', () => { - const nodes = parseDOM(`
`); + const nodes = parseDocument(`
`).children; const config = { extractor: { name: 'css', args: ['color'] }, @@ -108,7 +108,7 @@ describe('Extractors', () => { expect(result).toEqual('white'); }); it('should work when extract = css and no style', () => { - const nodes = parseDOM(`
`); + const nodes = parseDocument(`
`).children; const config = { extractor: { name: 'css', args: ['color'] }, @@ -120,7 +120,7 @@ describe('Extractors', () => { expect(result).toEqual(''); }); it('should throw when extract = not existing', () => { - const nodes = parseDOM(`

Title

`); + const nodes = parseDocument(`

Title

`).children; const config = { extractor: { name: 'not exising', args: ['color'] }, diff --git a/src/formators.ts b/src/formators.ts index b4ee5bc..73dde2b 100644 --- a/src/formators.ts +++ b/src/formators.ts @@ -1,7 +1,7 @@ import { FormatTypes, IPipe } from './types'; import { enumAsString, urlJoin } from './utils'; -import { parseDOM } from 'htmlparser2'; -import { getText } from 'domutils'; +import { parseDocument } from 'htmlparser2'; +import { textContent } from 'domutils'; const formattorsMap = { [FormatTypes.STRING]: ignoreUndefined(formatString), @@ -52,7 +52,7 @@ function formatHtmlToText(rawValue: string): string { .replace(/

(.*?)<\/p>/g, (_, match) => `\n${match}\n`) .replace(/

(.*?)<\/div>/g, (_, match) => `\n${match}\n`); - return getText(parseDOM(sanitizedHtml)); + return textContent(parseDocument(sanitizedHtml)); } function formatOneLineString(rawValue: string): string { diff --git a/src/parsers.ts b/src/parsers.ts index 2b9fa33..f3cdf85 100644 --- a/src/parsers.ts +++ b/src/parsers.ts @@ -1,6 +1,6 @@ import { selectAll } from 'css-select'; import { Element, Node, NodeWithChildren } from "domhandler"; -import { parseDOM, ElementType } from 'htmlparser2'; +import { parseDocument, ElementType } from 'htmlparser2'; import { parseConfig } from './config-parsers'; import { extract } from './extractors'; @@ -21,15 +21,15 @@ import { export function parse(html: string, config: T): EbriScrapData { const parsedConfig = parseConfig(config); - const nodes = parseDOM(html, { decodeEntities: true }); - return genericParse(nodes, parsedConfig, null, ''); + const doc = parseDocument(html, { decodeEntities: true }); + return genericParse(doc.children, parsedConfig, null, ''); } export function parseWithDebug(html: string, config: T): EbriscrapDebugResult { const parsedConfig = parseConfig(config); - const nodes = parseDOM(html, { decodeEntities: true }); + const doc = parseDocument(html, { decodeEntities: true }); const debug: DebugStep[] = []; - const result = genericParse(nodes, parsedConfig, debug, ''); + const result = genericParse(doc.children, parsedConfig, debug, ''); return { result, debug: parseDebug(debug) }; }