add flag for cleaning up language data files, updated readme

Signed-off-by: Daniel Hsing <hsing.daniel@gmail.com>
Arthelon · May 13, 2017 · c8c93f7 · c8c93f7
1 parent fb944e4
commit c8c93f7
Show file tree

Hide file tree

Showing 2 changed files with 20 additions and 8 deletions.
diff --git a/README.md b/README.md
@@ -1,14 +1,16 @@
 # imgclip
 
-Command line utility that extracts text from an image into the system clipboard. Uses the [tesseract](https://github.com/naptha/tesseract.js) OCR
+Command line utility that extracts text from an image into the system clipboard. Uses the [tesseract.js](https://github.com/naptha/tesseract.js) OCR wrapper
 
 [![asciicast](https://asciinema.org/a/1n7wfprarthnh9htkavu3trkl.png)](https://asciinema.org/a/1n7wfprarthnh9htkavu3trkl)
 
 ### Installation
 
     npm install -g imgclip
 
-NOTE: Compatible only with node v6.8.0+
+### Notes
+- Only compatible with Node v6.8.0+
+- Downloads a `lang`.traineddata file needed to perform the image recognition into the current working directory. (use the `--clean-up` flag to remove it after execution)
 
 ### Usage
 
@@ -19,6 +21,7 @@ NOTE: Compatible only with node v6.8.0+
     -h, --help             output usage information
     -V, --version          output the version number
     -l, --lang [language]  language of the text in the image.
+    -c, --clean-up         removes the generated language data file (.traineddata) after the image recognition job has finished
     -p, --print            prints out the text in the image.
 
 Full language list can be found [here](https://github.com/naptha/tesseract.js/blob/master/docs/tesseract_lang_list.md)
diff --git a/index.js b/index.js
@@ -12,6 +12,7 @@ program
 	.description(PkgJson.description)
 	.version(PkgJson.version)
 	.option("-l, --lang [language]", "language of the text in the image.")
+	.option("-c, --clean-up", "removes the generated language data file (.traineddata) after the image recognition job has finished")	
 	.option("-p, --print", "prints out the text in the image.\n\nFull language list can be found here: \nhttps://github.com/naptha/tesseract.js/blob/master/docs/tesseract_lang_list.md")
 	.parse(process.argv)
 
@@ -21,24 +22,29 @@ if (errorMessage) {
 	program.help()
 	return
 }
-recognize(program.args[0], program.lang, program.print)
+recognize({
+	imagePath: program.args[0], // file path
+	lang: program.lang,
+	printResult: program.print,
+	cleanup: program.cleanUp,
+})
 
 function validateArgs(args) {
 	if (!args.args.length || !args.args[0]){
 		return "No Path Specified"
 	}
 
-	if (args.lang && langs.indexOf(args.lang) === -1) {
-		return "Invalid Language!"
-	}
-
 	if (!fs.existsSync(args.args[0])) {
 		return `File path not found: ${args.args[0]}`;
 	}
+
+	if (args.lang && langs.indexOf(args.lang) === -1) {
+		return "Invalid Language!"
+	}
 	return null;
 }
 
-function recognize(imagePath, lang = 'eng', printResult = false) {
+function recognize({ imagePath, lang = 'eng', printResult = false, cleanup = false }) {
 	const bar = new Progress("recognizing [:bar] :percent :elapseds", {total: 100})
 	let prev = 0
 	Tesseract.recognize(imagePath, {
@@ -59,6 +65,9 @@ function recognize(imagePath, lang = 'eng', printResult = false) {
 		if (prev < 100) {
 			bar.tick(100 - prev)
 		}
+		if (cleanup) {
+			fs.unlinkSync(`${lang}.traineddata`)
+		}
 		copyPaste.copy(result.text, () => {
 			if(printResult) {
 				console.log("\nResult:\n" + result.text.slice(0, result.text.length - 1))