Skip to content

Commit

Permalink
add flag for cleaning up language data files, updated readme
Browse files Browse the repository at this point in the history
Signed-off-by: Daniel Hsing <hsing.daniel@gmail.com>
  • Loading branch information
Arthelon committed May 13, 2017
1 parent fb944e4 commit c8c93f7
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 8 deletions.
7 changes: 5 additions & 2 deletions README.md
@@ -1,14 +1,16 @@
# imgclip

Command line utility that extracts text from an image into the system clipboard. Uses the [tesseract](https://github.com/naptha/tesseract.js) OCR
Command line utility that extracts text from an image into the system clipboard. Uses the [tesseract.js](https://github.com/naptha/tesseract.js) OCR wrapper

[![asciicast](https://asciinema.org/a/1n7wfprarthnh9htkavu3trkl.png)](https://asciinema.org/a/1n7wfprarthnh9htkavu3trkl)

### Installation

npm install -g imgclip

NOTE: Compatible only with node v6.8.0+
### Notes
- Only compatible with Node v6.8.0+
- Downloads a `lang`.traineddata file needed to perform the image recognition into the current working directory. (use the `--clean-up` flag to remove it after execution)

### Usage

Expand All @@ -19,6 +21,7 @@ NOTE: Compatible only with node v6.8.0+
-h, --help output usage information
-V, --version output the version number
-l, --lang [language] language of the text in the image.
-c, --clean-up removes the generated language data file (.traineddata) after the image recognition job has finished
-p, --print prints out the text in the image.

Full language list can be found [here](https://github.com/naptha/tesseract.js/blob/master/docs/tesseract_lang_list.md)
21 changes: 15 additions & 6 deletions index.js
Expand Up @@ -12,6 +12,7 @@ program
.description(PkgJson.description)
.version(PkgJson.version)
.option("-l, --lang [language]", "language of the text in the image.")
.option("-c, --clean-up", "removes the generated language data file (.traineddata) after the image recognition job has finished")
.option("-p, --print", "prints out the text in the image.\n\nFull language list can be found here: \nhttps://github.com/naptha/tesseract.js/blob/master/docs/tesseract_lang_list.md")
.parse(process.argv)

Expand All @@ -21,24 +22,29 @@ if (errorMessage) {
program.help()
return
}
recognize(program.args[0], program.lang, program.print)
recognize({
imagePath: program.args[0], // file path
lang: program.lang,
printResult: program.print,
cleanup: program.cleanUp,
})

function validateArgs(args) {
if (!args.args.length || !args.args[0]){
return "No Path Specified"
}

if (args.lang && langs.indexOf(args.lang) === -1) {
return "Invalid Language!"
}

if (!fs.existsSync(args.args[0])) {
return `File path not found: ${args.args[0]}`;
}

if (args.lang && langs.indexOf(args.lang) === -1) {
return "Invalid Language!"
}
return null;
}

function recognize(imagePath, lang = 'eng', printResult = false) {
function recognize({ imagePath, lang = 'eng', printResult = false, cleanup = false }) {
const bar = new Progress("recognizing [:bar] :percent :elapseds", {total: 100})
let prev = 0
Tesseract.recognize(imagePath, {
Expand All @@ -59,6 +65,9 @@ function recognize(imagePath, lang = 'eng', printResult = false) {
if (prev < 100) {
bar.tick(100 - prev)
}
if (cleanup) {
fs.unlinkSync(`${lang}.traineddata`)
}
copyPaste.copy(result.text, () => {
if(printResult) {
console.log("\nResult:\n" + result.text.slice(0, result.text.length - 1))
Expand Down

0 comments on commit c8c93f7

Please sign in to comment.