From ef7b8ab1a628c3fa8aefb3fd5da2ffc17afe8e53 Mon Sep 17 00:00:00 2001 From: Suhyma Date: Thu, 9 Feb 2023 13:38:25 -0500 Subject: [PATCH 01/13] Created data-search, translating search.sh to JS --- src/data-search.js | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 src/data-search.js diff --git a/src/data-search.js b/src/data-search.js new file mode 100644 index 0000000..617c8b3 --- /dev/null +++ b/src/data-search.js @@ -0,0 +1,20 @@ +#! /usr/bin/env node + +// here we will convert the search.sh script into JS +import { format, sub } from 'date-fns'; +import fs from 'fs'; +import { download, search } from './cli.js'; + +const CATEGORY_ID = 'alzheimers-disease'; +const DATA_DIRECTORY = '../example-data'; + +const MEDRXIV_SOURCE = 'medrxiv'; +const BIORXIV_SOURCE = 'biorxiv'; + +const now = new Date(); +const startOffset = { months: 1 }; +const START_DATE = format(sub(now, startOffset), 'yyyy-MM-dd'); +const END_DATE = format(now, 'yyyy-MM-dd'); + +const DATA_FILE = `${DATA_DIRECTORY}/${END_DATE}.json`; +const dataFile = fs.openSync(DATA_FILE, 'w'); From e69d2034e01fa1862b3eb6a8e3e2e3b1fb9d11fc Mon Sep 17 00:00:00 2001 From: Suhyma Date: Thu, 9 Feb 2023 14:38:46 -0500 Subject: [PATCH 02/13] added some minor changes --- src/data-search.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/data-search.js b/src/data-search.js index 617c8b3..f5f4b01 100644 --- a/src/data-search.js +++ b/src/data-search.js @@ -16,5 +16,9 @@ const startOffset = { months: 1 }; const START_DATE = format(sub(now, startOffset), 'yyyy-MM-dd'); const END_DATE = format(now, 'yyyy-MM-dd'); +console.log(`Fetching from ${BIORXIV_SOURCE} between ${START_DATE} and ${END_DATE}`); +download(START_DATE, END_DATE, BIORXIV_SOURCE); + +console.log('Combining results...'); const DATA_FILE = `${DATA_DIRECTORY}/${END_DATE}.json`; const dataFile = fs.openSync(DATA_FILE, 'w'); From 3179ef126af2d379223ba297dbb7fbbf3d5e3f02 Mon Sep 17 00:00:00 2001 From: Suhyma Date: Fri, 10 Feb 2023 16:06:35 -0500 Subject: [PATCH 03/13] environment / script improvements --- .vscode/launch.json | 10 ++++++++++ scripts/date.js | 2 +- src/data-search.js | 17 +++++++++++++++-- 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index b39990a..6bd55bc 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -52,6 +52,16 @@ "--pretty" ], "console": "integratedTerminal" + }, + { + "type": "node", + "request": "launch", + "name": "DataSearch", + "skipFiles": [ + "/**" + ], + "program": "${workspaceFolder}/src/data-search.js", + "console": "integratedTerminal" } ] } \ No newline at end of file diff --git a/scripts/date.js b/scripts/date.js index cfba5c2..96fb7cd 100644 --- a/scripts/date.js +++ b/scripts/date.js @@ -1,6 +1,6 @@ import { format, sub } from 'date-fns'; -const startOffset = { months: 1 }; +const startOffset = { days: 1 }; const arg = process.argv[2]; const now = new Date(); diff --git a/src/data-search.js b/src/data-search.js index f5f4b01..7152dd6 100644 --- a/src/data-search.js +++ b/src/data-search.js @@ -3,6 +3,7 @@ // here we will convert the search.sh script into JS import { format, sub } from 'date-fns'; import fs from 'fs'; +import { options } from 'preact'; import { download, search } from './cli.js'; const CATEGORY_ID = 'alzheimers-disease'; @@ -17,8 +18,20 @@ const START_DATE = format(sub(now, startOffset), 'yyyy-MM-dd'); const END_DATE = format(now, 'yyyy-MM-dd'); console.log(`Fetching from ${BIORXIV_SOURCE} between ${START_DATE} and ${END_DATE}`); -download(START_DATE, END_DATE, BIORXIV_SOURCE); +download(START_DATE, END_DATE, BIORXIV_SOURCE); // not sure if the 3rd param is correct here +// is just calling the download function enough? do we add something to account for the --output part in the script + +console.log(`Fetching from ${MEDRXIV_SOURCE} between ${START_DATE} and ${END_DATE}`); +download(START_DATE, END_DATE, MEDRXIV_SOURCE); // not sure if the 3rd param is correct here +// is just calling the download function enough? do we add something to account for the --output part in the script console.log('Combining results...'); const DATA_FILE = `${DATA_DIRECTORY}/${END_DATE}.json`; -const dataFile = fs.openSync(DATA_FILE, 'w'); +const dataFile = fs.openSync(DATA_FILE, 'w'); // unsure about this solution as well to create a new file + +const QUERY = 'alzheimer'; +const OUTPUT_FILE = `${DATA_DIRECTORY}/${CATEGORY_ID}.json`; +console.log(`Searching for ${QUERY}`); +const searchHits = search(QUERY, true); // also unsure about param 2 +const numSearchHits = Object.keys(searchHits).length; +console.log(`Found ${numSearchHits} hits`); From e9f4d1ff9735bc62982d4637a0a9cdb8e61d49c1 Mon Sep 17 00:00:00 2001 From: Suhyma Date: Mon, 13 Feb 2023 18:00:15 -0500 Subject: [PATCH 04/13] edits to cli.js, allows debugging of data-search --- src/cli.js | 6 +++--- src/data-search.js | 34 +++++++++++++++++----------------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/cli.js b/src/cli.js index 89a5875..cbf1c58 100755 --- a/src/cli.js +++ b/src/cli.js @@ -17,7 +17,7 @@ const writeText = async (text, file) => await writeFile(file, text); const printText = text => console.log(text); const getPrettyText = (articles, queryString, options) => prettyArticles(articles, queryString, options); -async function search (queryString, options) { +export async function search (queryString, options) { const searcher = new Search(); const articles = await getInput(options); @@ -31,7 +31,7 @@ async function search (queryString, options) { await sendOutput(res, options, queryString); } -async function download (startDate, endDate, options) { +export async function download (startDate, endDate, options) { const source = options.source ?? 'biorxiv'; const res = await performDownload(source, startDate, endDate); @@ -103,4 +103,4 @@ async function main () { await program.parseAsync(); } -main(); +// main(); diff --git a/src/data-search.js b/src/data-search.js index 7152dd6..870c5e1 100644 --- a/src/data-search.js +++ b/src/data-search.js @@ -2,9 +2,9 @@ // here we will convert the search.sh script into JS import { format, sub } from 'date-fns'; -import fs from 'fs'; +// import fs from 'fs'; import { options } from 'preact'; -import { download, search } from './cli.js'; +import { download } from './cli.js'; const CATEGORY_ID = 'alzheimers-disease'; const DATA_DIRECTORY = '../example-data'; @@ -13,25 +13,25 @@ const MEDRXIV_SOURCE = 'medrxiv'; const BIORXIV_SOURCE = 'biorxiv'; const now = new Date(); -const startOffset = { months: 1 }; +const startOffset = { days: 1 }; const START_DATE = format(sub(now, startOffset), 'yyyy-MM-dd'); const END_DATE = format(now, 'yyyy-MM-dd'); console.log(`Fetching from ${BIORXIV_SOURCE} between ${START_DATE} and ${END_DATE}`); download(START_DATE, END_DATE, BIORXIV_SOURCE); // not sure if the 3rd param is correct here -// is just calling the download function enough? do we add something to account for the --output part in the script +// // is just calling the download function enough? do we add something to account for the --output part in the script console.log(`Fetching from ${MEDRXIV_SOURCE} between ${START_DATE} and ${END_DATE}`); -download(START_DATE, END_DATE, MEDRXIV_SOURCE); // not sure if the 3rd param is correct here -// is just calling the download function enough? do we add something to account for the --output part in the script - -console.log('Combining results...'); -const DATA_FILE = `${DATA_DIRECTORY}/${END_DATE}.json`; -const dataFile = fs.openSync(DATA_FILE, 'w'); // unsure about this solution as well to create a new file - -const QUERY = 'alzheimer'; -const OUTPUT_FILE = `${DATA_DIRECTORY}/${CATEGORY_ID}.json`; -console.log(`Searching for ${QUERY}`); -const searchHits = search(QUERY, true); // also unsure about param 2 -const numSearchHits = Object.keys(searchHits).length; -console.log(`Found ${numSearchHits} hits`); +// download(START_DATE, END_DATE, MEDRXIV_SOURCE); // not sure if the 3rd param is correct here +// // is just calling the download function enough? do we add something to account for the --output part in the script + +// console.log('Combining results...'); +// const DATA_FILE = `${DATA_DIRECTORY}/${END_DATE}.json`; +// const dataFile = fs.openSync(DATA_FILE, 'w'); // unsure about this solution as well to create a new file + +// const QUERY = 'alzheimer'; +// const OUTPUT_FILE = `${DATA_DIRECTORY}/${CATEGORY_ID}.json`; +// console.log(`Searching for ${QUERY}`); +// const searchHits = search(QUERY, true); // also unsure about param 2 +// const numSearchHits = Object.keys(searchHits).length; +// console.log(`Found ${numSearchHits} hits`); From 0d821c902d5429b138bc142761b011d8b1a6349c Mon Sep 17 00:00:00 2001 From: Suhyma Date: Tue, 14 Feb 2023 18:12:15 -0500 Subject: [PATCH 05/13] Can create & write JSONs for both preprint sources --- src/data-search.js | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/src/data-search.js b/src/data-search.js index 870c5e1..0563143 100644 --- a/src/data-search.js +++ b/src/data-search.js @@ -2,12 +2,11 @@ // here we will convert the search.sh script into JS import { format, sub } from 'date-fns'; -// import fs from 'fs'; -import { options } from 'preact'; +import fs from 'fs'; import { download } from './cli.js'; const CATEGORY_ID = 'alzheimers-disease'; -const DATA_DIRECTORY = '../example-data'; +const DATA_DIRECTORY = 'example-data'; const MEDRXIV_SOURCE = 'medrxiv'; const BIORXIV_SOURCE = 'biorxiv'; @@ -17,17 +16,31 @@ const startOffset = { days: 1 }; const START_DATE = format(sub(now, startOffset), 'yyyy-MM-dd'); const END_DATE = format(now, 'yyyy-MM-dd'); +// Getting latest articles from BiorXiv console.log(`Fetching from ${BIORXIV_SOURCE} between ${START_DATE} and ${END_DATE}`); -download(START_DATE, END_DATE, BIORXIV_SOURCE); // not sure if the 3rd param is correct here -// // is just calling the download function enough? do we add something to account for the --output part in the script - +const options = { + source: 'biorxiv', + output: `${DATA_DIRECTORY}/${END_DATE}_${BIORXIV_SOURCE}.json` +}; +fs.open(options.output, 'w', function (err, file) { // consider changing the callback function if needed + if (err) throw err; + console.log('Saved!'); +}); +download(START_DATE, END_DATE, options); + +// Getting latest articles from MedrXiv +options.source = 'medrxiv'; +options.output = `${DATA_DIRECTORY}/${END_DATE}_${MEDRXIV_SOURCE}.json`; console.log(`Fetching from ${MEDRXIV_SOURCE} between ${START_DATE} and ${END_DATE}`); -// download(START_DATE, END_DATE, MEDRXIV_SOURCE); // not sure if the 3rd param is correct here -// // is just calling the download function enough? do we add something to account for the --output part in the script +fs.open(options.output, 'w', function (err, file) { // consider changing the callback function if needed + if (err) throw err; + console.log('Saved!'); +}); +download(START_DATE, END_DATE, options); // console.log('Combining results...'); // const DATA_FILE = `${DATA_DIRECTORY}/${END_DATE}.json`; -// const dataFile = fs.openSync(DATA_FILE, 'w'); // unsure about this solution as well to create a new file +// const dataFile = fs.openSync(DATA_FILE, 'w'); // const QUERY = 'alzheimer'; // const OUTPUT_FILE = `${DATA_DIRECTORY}/${CATEGORY_ID}.json`; From d16ebf5805d7d78fc6b2fc78927a16cdd0b2b055 Mon Sep 17 00:00:00 2001 From: Suhyma Date: Wed, 15 Feb 2023 17:17:35 -0500 Subject: [PATCH 06/13] Both JSON files correctly populated --- src/data-search.js | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/src/data-search.js b/src/data-search.js index 0563143..b1cfca2 100644 --- a/src/data-search.js +++ b/src/data-search.js @@ -16,29 +16,34 @@ const startOffset = { days: 1 }; const START_DATE = format(sub(now, startOffset), 'yyyy-MM-dd'); const END_DATE = format(now, 'yyyy-MM-dd'); +const BIORXIV_FILE = `${DATA_DIRECTORY}/${END_DATE}_${BIORXIV_SOURCE}.json`; +const MEDRXIV_FILE = `${DATA_DIRECTORY}/${END_DATE}_${MEDRXIV_SOURCE}.json`; + +// Creating files for each data source +fs.open(BIORXIV_FILE, 'w', function (err, file) { // consider changing the callback function if needed + if (err) throw err; + console.log('Saved!'); +}); +fs.open(MEDRXIV_FILE, 'w', function (err, file) { // consider changing the callback function if needed + if (err) throw err; + console.log('Saved!'); +}); + // Getting latest articles from BiorXiv console.log(`Fetching from ${BIORXIV_SOURCE} between ${START_DATE} and ${END_DATE}`); const options = { - source: 'biorxiv', - output: `${DATA_DIRECTORY}/${END_DATE}_${BIORXIV_SOURCE}.json` + source: BIORXIV_SOURCE, + output: BIORXIV_FILE }; -fs.open(options.output, 'w', function (err, file) { // consider changing the callback function if needed - if (err) throw err; - console.log('Saved!'); -}); -download(START_DATE, END_DATE, options); +await download(START_DATE, END_DATE, options); // Getting latest articles from MedrXiv -options.source = 'medrxiv'; -options.output = `${DATA_DIRECTORY}/${END_DATE}_${MEDRXIV_SOURCE}.json`; +options.source = MEDRXIV_SOURCE; +options.output = MEDRXIV_FILE; console.log(`Fetching from ${MEDRXIV_SOURCE} between ${START_DATE} and ${END_DATE}`); -fs.open(options.output, 'w', function (err, file) { // consider changing the callback function if needed - if (err) throw err; - console.log('Saved!'); -}); -download(START_DATE, END_DATE, options); +await download(START_DATE, END_DATE, options); -// console.log('Combining results...'); +console.log('Combining results...'); // const DATA_FILE = `${DATA_DIRECTORY}/${END_DATE}.json`; // const dataFile = fs.openSync(DATA_FILE, 'w'); From 041935603d045e0ce6694ff74f26084d2c7b21fa Mon Sep 17 00:00:00 2001 From: Suhyma Date: Thu, 16 Feb 2023 14:15:42 -0500 Subject: [PATCH 07/13] Added different objects to correct download task --- src/data-search.js | 43 ++++++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/src/data-search.js b/src/data-search.js index b1cfca2..63b68f2 100644 --- a/src/data-search.js +++ b/src/data-search.js @@ -18,38 +18,47 @@ const END_DATE = format(now, 'yyyy-MM-dd'); const BIORXIV_FILE = `${DATA_DIRECTORY}/${END_DATE}_${BIORXIV_SOURCE}.json`; const MEDRXIV_FILE = `${DATA_DIRECTORY}/${END_DATE}_${MEDRXIV_SOURCE}.json`; +const COMBINED_FILE = `${DATA_DIRECTORY}/${END_DATE}.json`; +const OUTPUT_FILE = `${DATA_DIRECTORY}/${CATEGORY_ID}.json`; -// Creating files for each data source +// Getting all latest articles from BiorXiv +console.log(`Fetching from ${BIORXIV_SOURCE} between ${START_DATE} and ${END_DATE}`); fs.open(BIORXIV_FILE, 'w', function (err, file) { // consider changing the callback function if needed if (err) throw err; console.log('Saved!'); }); -fs.open(MEDRXIV_FILE, 'w', function (err, file) { // consider changing the callback function if needed - if (err) throw err; - console.log('Saved!'); -}); - -// Getting latest articles from BiorXiv -console.log(`Fetching from ${BIORXIV_SOURCE} between ${START_DATE} and ${END_DATE}`); -const options = { +const bioInfo = { source: BIORXIV_SOURCE, output: BIORXIV_FILE }; -await download(START_DATE, END_DATE, options); +download(START_DATE, END_DATE, bioInfo); -// Getting latest articles from MedrXiv -options.source = MEDRXIV_SOURCE; -options.output = MEDRXIV_FILE; +// Getting all latest articles from MedrXiv console.log(`Fetching from ${MEDRXIV_SOURCE} between ${START_DATE} and ${END_DATE}`); -await download(START_DATE, END_DATE, options); +fs.open(MEDRXIV_FILE, 'w', function (err, file) { + if (err) throw err; + console.log('Saved!'); +}); +const medInfo = { + source: MEDRXIV_SOURCE, + output: MEDRXIV_FILE +}; +download(START_DATE, END_DATE, medInfo); +// Creating a JSON with all the results, both sources combined console.log('Combining results...'); -// const DATA_FILE = `${DATA_DIRECTORY}/${END_DATE}.json`; -// const dataFile = fs.openSync(DATA_FILE, 'w'); +fs.open(COMBINED_FILE, 'w', function (err, file) { + if (err) throw err; + console.log('Saved!'); +}); +// Search for the QUERY keyword in all the downloaded articles & compile the related articles // const QUERY = 'alzheimer'; -// const OUTPUT_FILE = `${DATA_DIRECTORY}/${CATEGORY_ID}.json`; // console.log(`Searching for ${QUERY}`); // const searchHits = search(QUERY, true); // also unsure about param 2 // const numSearchHits = Object.keys(searchHits).length; // console.log(`Found ${numSearchHits} hits`); +fs.open(OUTPUT_FILE, 'w', function (err, file) { + if (err) throw err; + console.log('Saved!'); +}); From 8734bb121a26aba7d45992d36267a8b7d7443cc0 Mon Sep 17 00:00:00 2001 From: Suhyma Date: Fri, 17 Feb 2023 19:48:38 -0500 Subject: [PATCH 08/13] Creating file output for combined results --- src/cli.js | 2 +- src/data-search.js | 22 +++++++++++++++------- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/cli.js b/src/cli.js index cbf1c58..ae6d66c 100755 --- a/src/cli.js +++ b/src/cli.js @@ -39,7 +39,7 @@ export async function download (startDate, endDate, options) { await sendOutput(res, options); } -async function sendOutput (res, options, queryString) { +export async function sendOutput (res, options, queryString) { if (options.reverse) { res = res.reverse(); } diff --git a/src/data-search.js b/src/data-search.js index 63b68f2..1652042 100644 --- a/src/data-search.js +++ b/src/data-search.js @@ -3,7 +3,7 @@ // here we will convert the search.sh script into JS import { format, sub } from 'date-fns'; import fs from 'fs'; -import { download } from './cli.js'; +import { download, sendOutput } from './cli.js'; const CATEGORY_ID = 'alzheimers-disease'; const DATA_DIRECTORY = 'example-data'; @@ -31,7 +31,7 @@ const bioInfo = { source: BIORXIV_SOURCE, output: BIORXIV_FILE }; -download(START_DATE, END_DATE, bioInfo); +const bioData = await download(START_DATE, END_DATE, bioInfo); // Getting all latest articles from MedrXiv console.log(`Fetching from ${MEDRXIV_SOURCE} between ${START_DATE} and ${END_DATE}`); @@ -43,7 +43,7 @@ const medInfo = { source: MEDRXIV_SOURCE, output: MEDRXIV_FILE }; -download(START_DATE, END_DATE, medInfo); +const medData = await download(START_DATE, END_DATE, medInfo); // Creating a JSON with all the results, both sources combined console.log('Combining results...'); @@ -51,6 +51,14 @@ fs.open(COMBINED_FILE, 'w', function (err, file) { if (err) throw err; console.log('Saved!'); }); +const combinedData = { + ...bioData, + ...medData +}; +const combinedInfo = { + output: COMBINED_FILE +}; +sendOutput(combinedData, combinedInfo); // Search for the QUERY keyword in all the downloaded articles & compile the related articles // const QUERY = 'alzheimer'; @@ -58,7 +66,7 @@ fs.open(COMBINED_FILE, 'w', function (err, file) { // const searchHits = search(QUERY, true); // also unsure about param 2 // const numSearchHits = Object.keys(searchHits).length; // console.log(`Found ${numSearchHits} hits`); -fs.open(OUTPUT_FILE, 'w', function (err, file) { - if (err) throw err; - console.log('Saved!'); -}); +// fs.open(OUTPUT_FILE, 'w', function (err, file) { +// if (err) throw err; +// console.log('Saved!'); +// }); From d8650829fc712d20bfdf5573dcd8f1c11dfbef36 Mon Sep 17 00:00:00 2001 From: Suhyma Date: Tue, 21 Feb 2023 13:41:12 -0500 Subject: [PATCH 09/13] Correct output for combined file --- src/cli.js | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/cli.js b/src/cli.js index ae6d66c..3f3c0f7 100755 --- a/src/cli.js +++ b/src/cli.js @@ -32,11 +32,18 @@ export async function search (queryString, options) { } export async function download (startDate, endDate, options) { - const source = options.source ?? 'biorxiv'; + try { + const source = options.source ?? 'biorxiv'; - const res = await performDownload(source, startDate, endDate); + const res = await performDownload(source, startDate, endDate); - await sendOutput(res, options); + await sendOutput(res, options); + + return res; + } catch (err) { + console.error(`Error in download: ${err}`); + throw err; + } } export async function sendOutput (res, options, queryString) { From 10eabad0dbeb56f02d8c55bb5131ef258d3656ae Mon Sep 17 00:00:00 2001 From: Suhyma Date: Wed, 22 Feb 2023 12:03:19 -0500 Subject: [PATCH 10/13] Adding keyword search --- src/data-search.js | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/src/data-search.js b/src/data-search.js index 1652042..da2e7e5 100644 --- a/src/data-search.js +++ b/src/data-search.js @@ -3,7 +3,7 @@ // here we will convert the search.sh script into JS import { format, sub } from 'date-fns'; import fs from 'fs'; -import { download, sendOutput } from './cli.js'; +import { download, search, sendOutput } from './cli.js'; const CATEGORY_ID = 'alzheimers-disease'; const DATA_DIRECTORY = 'example-data'; @@ -27,11 +27,11 @@ fs.open(BIORXIV_FILE, 'w', function (err, file) { // consider changing the callb if (err) throw err; console.log('Saved!'); }); -const bioInfo = { +const bioOptions = { source: BIORXIV_SOURCE, output: BIORXIV_FILE }; -const bioData = await download(START_DATE, END_DATE, bioInfo); +const bioData = await download(START_DATE, END_DATE, bioOptions); // Getting all latest articles from MedrXiv console.log(`Fetching from ${MEDRXIV_SOURCE} between ${START_DATE} and ${END_DATE}`); @@ -39,11 +39,11 @@ fs.open(MEDRXIV_FILE, 'w', function (err, file) { if (err) throw err; console.log('Saved!'); }); -const medInfo = { +const medOptions = { source: MEDRXIV_SOURCE, output: MEDRXIV_FILE }; -const medData = await download(START_DATE, END_DATE, medInfo); +const medData = await download(START_DATE, END_DATE, medOptions); // Creating a JSON with all the results, both sources combined console.log('Combining results...'); @@ -55,18 +55,22 @@ const combinedData = { ...bioData, ...medData }; -const combinedInfo = { +const combinedOptions = { output: COMBINED_FILE }; -sendOutput(combinedData, combinedInfo); +sendOutput(combinedData, combinedOptions); // Search for the QUERY keyword in all the downloaded articles & compile the related articles -// const QUERY = 'alzheimer'; -// console.log(`Searching for ${QUERY}`); -// const searchHits = search(QUERY, true); // also unsure about param 2 -// const numSearchHits = Object.keys(searchHits).length; -// console.log(`Found ${numSearchHits} hits`); -// fs.open(OUTPUT_FILE, 'w', function (err, file) { -// if (err) throw err; -// console.log('Saved!'); -// }); +const QUERY = 'alzheimer'; +fs.open(OUTPUT_FILE, 'w', function (err, file) { + if (err) throw err; + console.log('Saved!'); +}); +const outputOptions = { + input: COMBINED_FILE, + output: OUTPUT_FILE +}; +console.log(`Searching for ${QUERY}`); +const searchHits = await search(QUERY, outputOptions); +const numSearchHits = Object.keys(searchHits).length; +console.log(`Found ${numSearchHits} hits`); From 8240f494ee0559265f95b28be01a98ebbf20409d Mon Sep 17 00:00:00 2001 From: Suhyma Date: Wed, 22 Feb 2023 13:25:20 -0500 Subject: [PATCH 11/13] Fixed JSON.parse() error --- src/data-search.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data-search.js b/src/data-search.js index da2e7e5..3ade0ca 100644 --- a/src/data-search.js +++ b/src/data-search.js @@ -58,7 +58,7 @@ const combinedData = { const combinedOptions = { output: COMBINED_FILE }; -sendOutput(combinedData, combinedOptions); +await sendOutput(combinedData, combinedOptions); // Search for the QUERY keyword in all the downloaded articles & compile the related articles const QUERY = 'alzheimer'; From 1e9ea5a6841691f91336b157f9c942f05fe62269 Mon Sep 17 00:00:00 2001 From: Suhyma Date: Wed, 22 Feb 2023 18:01:03 -0500 Subject: [PATCH 12/13] Completed data-search.js --- src/cli.js | 21 ++++++++++++++------- src/data-search.js | 7 ++----- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/src/cli.js b/src/cli.js index 3f3c0f7..0f66e01 100755 --- a/src/cli.js +++ b/src/cli.js @@ -18,17 +18,24 @@ const printText = text => console.log(text); const getPrettyText = (articles, queryString, options) => prettyArticles(articles, queryString, options); export async function search (queryString, options) { - const searcher = new Search(); + try { + const searcher = new Search(); + + const articles = await getInput(options); - const articles = await getInput(options); + await searcher.articles(articles); - await searcher.articles(articles); + const res = await searcher.search(queryString, { + combineWith: options.strict ? 'AND' : 'OR' + }); - const res = await searcher.search(queryString, { - combineWith: options.strict ? 'AND' : 'OR' - }); + await sendOutput(res, options, queryString); - await sendOutput(res, options, queryString); + return res; + } catch (err) { + console.error(`Error in search: ${err}`); + throw err; + } } export async function download (startDate, endDate, options) { diff --git a/src/data-search.js b/src/data-search.js index 3ade0ca..af8d687 100644 --- a/src/data-search.js +++ b/src/data-search.js @@ -51,10 +51,7 @@ fs.open(COMBINED_FILE, 'w', function (err, file) { if (err) throw err; console.log('Saved!'); }); -const combinedData = { - ...bioData, - ...medData -}; +const combinedData = bioData.concat(medData); const combinedOptions = { output: COMBINED_FILE }; @@ -72,5 +69,5 @@ const outputOptions = { }; console.log(`Searching for ${QUERY}`); const searchHits = await search(QUERY, outputOptions); -const numSearchHits = Object.keys(searchHits).length; +const numSearchHits = searchHits.length; console.log(`Found ${numSearchHits} hits`); From 775d08a301566bc43329502936e07c7b9c905092 Mon Sep 17 00:00:00 2001 From: Suhyma Date: Fri, 24 Feb 2023 18:15:47 -0500 Subject: [PATCH 13/13] Using promises to open new files --- src/data-search.js | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/src/data-search.js b/src/data-search.js index af8d687..eaf4f14 100644 --- a/src/data-search.js +++ b/src/data-search.js @@ -2,7 +2,7 @@ // here we will convert the search.sh script into JS import { format, sub } from 'date-fns'; -import fs from 'fs'; +import fs from 'fs/promises'; import { download, search, sendOutput } from './cli.js'; const CATEGORY_ID = 'alzheimers-disease'; @@ -23,10 +23,7 @@ const OUTPUT_FILE = `${DATA_DIRECTORY}/${CATEGORY_ID}.json`; // Getting all latest articles from BiorXiv console.log(`Fetching from ${BIORXIV_SOURCE} between ${START_DATE} and ${END_DATE}`); -fs.open(BIORXIV_FILE, 'w', function (err, file) { // consider changing the callback function if needed - if (err) throw err; - console.log('Saved!'); -}); +fs.open(BIORXIV_FILE, 'w'); const bioOptions = { source: BIORXIV_SOURCE, output: BIORXIV_FILE @@ -35,10 +32,7 @@ const bioData = await download(START_DATE, END_DATE, bioOptions); // Getting all latest articles from MedrXiv console.log(`Fetching from ${MEDRXIV_SOURCE} between ${START_DATE} and ${END_DATE}`); -fs.open(MEDRXIV_FILE, 'w', function (err, file) { - if (err) throw err; - console.log('Saved!'); -}); +fs.open(MEDRXIV_FILE, 'w'); const medOptions = { source: MEDRXIV_SOURCE, output: MEDRXIV_FILE @@ -47,10 +41,7 @@ const medData = await download(START_DATE, END_DATE, medOptions); // Creating a JSON with all the results, both sources combined console.log('Combining results...'); -fs.open(COMBINED_FILE, 'w', function (err, file) { - if (err) throw err; - console.log('Saved!'); -}); +fs.open(COMBINED_FILE, 'w'); const combinedData = bioData.concat(medData); const combinedOptions = { output: COMBINED_FILE @@ -59,10 +50,7 @@ await sendOutput(combinedData, combinedOptions); // Search for the QUERY keyword in all the downloaded articles & compile the related articles const QUERY = 'alzheimer'; -fs.open(OUTPUT_FILE, 'w', function (err, file) { - if (err) throw err; - console.log('Saved!'); -}); +fs.open(OUTPUT_FILE, 'w'); const outputOptions = { input: COMBINED_FILE, output: OUTPUT_FILE