diff --git a/.travis.yml b/.travis.yml
index 6479235..de72154 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,6 +1,6 @@
language: node_js
node_js:
- - "12"
+ - "14"
install:
- npm install --global codecov
diff --git a/src/extractors.spec.ts b/src/extractors.spec.ts
index 1bcb286..893a79a 100644
--- a/src/extractors.spec.ts
+++ b/src/extractors.spec.ts
@@ -1,10 +1,10 @@
import { FieldConfig } from './types';
import { extract } from './extractors';
-import { parseDOM } from 'htmlparser2';
+import { parseDocument } from 'htmlparser2';
describe('Extractors', () => {
it('should work when extract = text', () => {
- const nodes = parseDOM(`
Title
`);
+ const nodes = parseDocument(`Title
`).children;
const config = {
extractor: { name: 'text', args: [] },
@@ -16,7 +16,7 @@ describe('Extractors', () => {
expect(result).toEqual('Title');
});
it('should work when extract = text and when selector do not match anything', () => {
- const nodes = parseDOM(`Title
`);
+ const nodes = parseDocument(`Title
`).children;
const config = {
extractor: { name: 'text', args: [] },
@@ -28,7 +28,7 @@ describe('Extractors', () => {
expect(result).toEqual('');
});
it('should work when extract = prop', () => {
- const nodes = parseDOM(`Link`);
+ const nodes = parseDocument(`Link`).children;
const config = {
extractor: { name: 'prop', args: ['href'] },
@@ -40,9 +40,9 @@ describe('Extractors', () => {
expect(result).toEqual('a-super-link');
});
it('should work when extract = html', () => {
- const nodes = parseDOM(
+ const nodes = parseDocument(
``,
- );
+ ).children;
const config = {
extractor: { name: 'html', args: [] },
@@ -54,9 +54,9 @@ describe('Extractors', () => {
expect(result).toEqual('Link');
});
it('should work when extract = html and when selector do not match anything', () => {
- const nodes = parseDOM(
+ const nodes = parseDocument(
``,
- );
+ ).children;
const config = {
extractor: { name: 'html', args: [] },
@@ -68,9 +68,9 @@ describe('Extractors', () => {
expect(result).toEqual('');
});
it('should work when extract = outerHtml', () => {
- const nodes = parseDOM(
+ const nodes = parseDocument(
``,
- );
+ ).children;
const config = {
extractor: { name: 'outerHtml', args: [] },
@@ -82,9 +82,9 @@ describe('Extractors', () => {
expect(result).toEqual('');
});
it('should work when extract = outerHtml and when selector do not match anything', () => {
- const nodes = parseDOM(
+ const nodes = parseDocument(
``,
- );
+ ).children;
const config = {
extractor: { name: 'outerHtml', args: [] },
@@ -96,7 +96,7 @@ describe('Extractors', () => {
expect(result).toEqual('');
});
it('should work when extract = css', () => {
- const nodes = parseDOM(``);
+ const nodes = parseDocument(``).children;
const config = {
extractor: { name: 'css', args: ['color'] },
@@ -108,7 +108,7 @@ describe('Extractors', () => {
expect(result).toEqual('white');
});
it('should work when extract = css and no style', () => {
- const nodes = parseDOM(``);
+ const nodes = parseDocument(``).children;
const config = {
extractor: { name: 'css', args: ['color'] },
@@ -120,7 +120,7 @@ describe('Extractors', () => {
expect(result).toEqual('');
});
it('should throw when extract = not existing', () => {
- const nodes = parseDOM(`Title
`);
+ const nodes = parseDocument(`Title
`).children;
const config = {
extractor: { name: 'not exising', args: ['color'] },
diff --git a/src/formators.ts b/src/formators.ts
index b4ee5bc..73dde2b 100644
--- a/src/formators.ts
+++ b/src/formators.ts
@@ -1,7 +1,7 @@
import { FormatTypes, IPipe } from './types';
import { enumAsString, urlJoin } from './utils';
-import { parseDOM } from 'htmlparser2';
-import { getText } from 'domutils';
+import { parseDocument } from 'htmlparser2';
+import { textContent } from 'domutils';
const formattorsMap = {
[FormatTypes.STRING]: ignoreUndefined(formatString),
@@ -52,7 +52,7 @@ function formatHtmlToText(rawValue: string): string {
.replace(/(.*?)<\/p>/g, (_, match) => `\n${match}\n`)
.replace(/
(.*?)<\/div>/g, (_, match) => `\n${match}\n`);
- return getText(parseDOM(sanitizedHtml));
+ return textContent(parseDocument(sanitizedHtml));
}
function formatOneLineString(rawValue: string): string {
diff --git a/src/parsers.ts b/src/parsers.ts
index 2b9fa33..f3cdf85 100644
--- a/src/parsers.ts
+++ b/src/parsers.ts
@@ -1,6 +1,6 @@
import { selectAll } from 'css-select';
import { Element, Node, NodeWithChildren } from "domhandler";
-import { parseDOM, ElementType } from 'htmlparser2';
+import { parseDocument, ElementType } from 'htmlparser2';
import { parseConfig } from './config-parsers';
import { extract } from './extractors';
@@ -21,15 +21,15 @@ import {
export function parse(html: string, config: T): EbriScrapData {
const parsedConfig = parseConfig(config);
- const nodes = parseDOM(html, { decodeEntities: true });
- return genericParse(nodes, parsedConfig, null, '');
+ const doc = parseDocument(html, { decodeEntities: true });
+ return genericParse(doc.children, parsedConfig, null, '');
}
export function parseWithDebug(html: string, config: T): EbriscrapDebugResult {
const parsedConfig = parseConfig(config);
- const nodes = parseDOM(html, { decodeEntities: true });
+ const doc = parseDocument(html, { decodeEntities: true });
const debug: DebugStep[] = [];
- const result = genericParse(nodes, parsedConfig, debug, '');
+ const result = genericParse(doc.children, parsedConfig, debug, '');
return { result, debug: parseDebug(debug) };
}