Skip to content

Commit

Permalink
Merge pull request #233 from Inist-CNRS/add-importer-corpus
Browse files Browse the repository at this point in the history
[RFR] support .corpus files
  • Loading branch information
djhi committed Apr 4, 2017
2 parents a859cf1 + a722c04 commit 57e29b9
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 1 deletion.
3 changes: 2 additions & 1 deletion package.json
Expand Up @@ -43,7 +43,8 @@
"csv-parse": "1.1.10",
"expect": "1.20.2",
"ezs": "1.1.3",
"ezs-basics": "2.2.5",
"ezs-basics": "2.3.0",
"ezs-istex": "2.0.1",
"file-loader": "0.9.0",
"file-saver": "1.3.3",
"html-webpack-plugin": "2.26.0",
Expand Down
22 changes: 22 additions & 0 deletions src/api/loaders/corpus.js
@@ -0,0 +1,22 @@
import ezs from 'ezs';
import ezsBasics from 'ezs-basics';
import ezsISTEX from 'ezs-istex';

ezs.use(ezsBasics);
ezs.use(ezsISTEX);

const output = 'corpusName,author,language,abstract,title,pmid,'
+ 'genre,host,publicationDate,copyrightDate,id,'
+ 'score,serie,fulltext';


export default config => stream =>
stream
.pipe(ezs('stringify'))
.pipe(ezs('concat'))
.pipe(ezs('ISTEXCorpus'))
.pipe(ezs('ISTEXQuery', { params: { output, ...config } }))
.pipe(ezs('ISTEXHarvest'))
.pipe(ezs('ISTEXRequest'))
.pipe(ezs('ISTEXHits'))
.pipe(ezs('flatten'));
2 changes: 2 additions & 0 deletions src/api/loaders/index.js
Expand Up @@ -4,6 +4,7 @@ import tsv from './tsv'; // eslint-disable-line
import skos from './skos'; // eslint-disable-line
import json from './json'; // eslint-disable-line
import xml from './xml'; // eslint-disable-line
import corpus from './corpus'; // eslint-disable-line

export default {
CSV,
Expand All @@ -16,4 +17,5 @@ export default {
atom: xml,
mods: xml,
tei: xml,
corpus,
};

0 comments on commit 57e29b9

Please sign in to comment.