Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

update to new version, minor fixes.

  • Loading branch information...
commit b66884ca66b6c435278f128aa8d235a270be16a8 1 parent 0f8207a
@KirillTemnov authored
Showing with 167 additions and 32 deletions.
  1. +3 −3 Makefile
  2. +11 −4 Readme.md
  3. +134 −25 lib/app.coffee
  4. +19 −0 package.json
View
6 Makefile
@@ -1,7 +1,7 @@
-.PHONY: all
+.PHONY: all clean
all:
- coffee -b -c lib/*.coffee
+ coffee -b -c lib/*.coffee
clean:
- rm lib/*.js
+ rm lib/*.js
View
15 Readme.md
@@ -4,6 +4,7 @@ node-ocr обертка для ABBYY Cloud API ( [http://ocrsdk.com/](http://ocr
# Установка
Работает с nodejs 0.6.x.
+
```
npm install node-ocr
@@ -13,10 +14,14 @@ node-ocr обертка для ABBYY Cloud API ( [http://ocrsdk.com/](http://ocr
## Создать объект для доступа к api
+
```coffee-script
- ocr = require("node-ocr").createWrapper()
+ ocr = require("node-ocr").createWrapper(appId, appPwd)
```
+`appId` и `appPwd` - опциональные параметы, если не заданы, идентификатор и пароль
+беруться из переменных окружения `ABBYY_APPID` и `ABBYY_PWD` соответственно.
+
## Получить список заданий
@@ -91,7 +96,7 @@ node-ocr обертка для ABBYY Cloud API ( [http://ocrsdk.com/](http://ocr
```coffee-script
sys = require "util"
- ocr = reqire("node-ocr").createWrapper()
+ ocr = require("node-ocr").createWrapper()
opts = outputFormat: "xml"
@@ -120,7 +125,7 @@ node-ocr обертка для ABBYY Cloud API ( [http://ocrsdk.com/](http://ocr
```javascript
sys = require("util")
- ocr = reqire("node-ocr").createWrapper()
+ ocr = require("node-ocr").createWrapper()
opts = {outputFormat: "txt"} # с русским текстом пока проблемы
filename = "/tmp/filewithtext.png"
@@ -142,11 +147,13 @@ node-ocr обертка для ABBYY Cloud API ( [http://ocrsdk.com/](http://ocr
console.log("error :\n#" + sys.inspect(err));
}
})
-
```
# Changelog
+## v 0.1.1
+ - Ключи API можно изменить при вызове `createWrapper`
+
## v 0.1.0
- Загрузка изображений
View
159 lib/app.coffee
@@ -3,8 +3,8 @@ Main wrapper module
###
OcrHost = "cloud.ocrsdk.com"
-AppId = process.env.ABBYY_APPID || "app-id-here"
-AppPass = process.env.ABBYY_PWD || "app-passwd-here"
+AppId = process.env.ABBYY_APPID
+AppPass = process.env.ABBYY_PWD
Port = "443"
http = require "http"
@@ -12,10 +12,25 @@ fs = require "fs"
sys = require "util"
xml2json = require "xml2json"
url = require "url"
+#iconv = require "iconv"
-
+###
+Class that implement api calls to abbyy cloud api.
+###
class OCR
+ constructor: (@appId=AppId, @appPass=AppPass) ->
+ @version = "0.1.1"
+ ###
+ Create options, that passed to request object.
+
+ @param {String} path Relative path, starting from "/"
+ @param {"POST"|"GET"|"PUT"} method Http method
+ @param {Object} headers Client headers, authorization will be added forcibly.
+ @param {String} port Port :default "80"
+ @param {String} host Hotsname :default "cloud.ocrsdk.com"
+ @return {Object} options Options dict
+ ###
_createOptions: (path, method, headers={}, port="80", host=OcrHost) ->
opts =
host : host
@@ -25,21 +40,58 @@ class OCR
headers : headers
unless opts.headers.Authorization?
- opts.headers.Authorization = "Basic " + new Buffer("#{AppId}:#{AppPass}").toString "base64"
+ opts.headers.Authorization = "Basic " + new Buffer("#{@appId}:#{@appPass}").toString "base64"
opts
- _createOtionsFromUrl: (fullpath, method, headers) ->
+ ###
+ Create options, that passed to request object.
+ @see `_createOptions`
+
+ @param {String} path Full path, starting from "http"
+ @param {"POST"|"GET"|"PUT"} method Http method
+ @param {Object} headers Client headers, authorization will be added forcibly.
+ @return {Object} options Options dict
+ ###
+ _createOptionsFromUrl: (fullpath, method, headers) ->
method ||= "GET"
parsedUrl = url.parse fullpath
port = if parsedUrl.protocol is "https" then "443" else "80"
@_createOptions parsedUrl.path, method, headers, port, parsedUrl.host
+ ###
+ Get binary data from server. This method is buggy.
+
+ @param {Object} opts Request options, @see _createOptionsFromUrl
+ @param {Function} fn Callback function, that accept 1) error (or null) 2) data.
+ ###
+ _getServerBinaryAnswer: (opts, fn) ->
+ resData = new Buffer 10485760
+ ind = 0
+ req = http.request opts, (res) ->
+
+ res.setEncoding "binary"
+ res.on "data", (chunk) ->
+ ind += resData.write(chunk, ind)
+ res.on "end", ->
+ fn null, resData
+ res.on "error", (err) -> fn err
+ req.end()
+
+
+ ###
+ Post/get data to server and retrieve answer.
+ If opts method set to "GET", request will be sended authomatically, othervise, you need
+ to forse request after calling this method by applying `res.end()`.
+
+ @param {Object} opts Request options, @see _createOptionsFromUrl
+ @param {Function} fn Callback function, that accept 1) error (or null) 2) data.
+ @return {Object} req Request object, that can be used to post data and perform "post"/"put" requests.
+ ###
_getServerAnswer: (opts, fn) ->
resData = ""
req = http.request opts, (res) ->
- unless opts.noEncoding
- res.setEncoding "utf8"
+ res.setEncoding "utf8"
res.on "data", (chunk) ->
resData += chunk
res.on "end", ->
@@ -49,14 +101,23 @@ class OCR
req.end()
req
+ ###
+ For post purposes.
+ ###
_generateBoundary: ->
"--------------------------------------------------#{Date.now()}--"
# --------------------------------------------------------------------------------
- # api wrapper
+ # public API
# --------------------------------------------------------------------------------
+ ###
+ Get task status by task id.
+
+ @param {String} taskId Task id
+ @param {Function} fn Callback function, accept 1) error, 2) task status in json format
+ ###
getTaskStatus: (taskId, fn) ->
getOpts = @_createOptions "/getTaskStatus?taskId=#{taskId}", "get"
@_getServerAnswer getOpts, (err, data) ->
@@ -69,7 +130,11 @@ class OCR
fn msg: "server error"
+ ###
+ Get list of tasks.
+ @param {Function} fn Callback function, accept 1) error, 2) list of tasks in json format
+ ###
listTasks: (fn) ->
getOpts = @_createOptions "/listTasks", "get"
@_getServerAnswer getOpts, (err, data) ->
@@ -82,13 +147,37 @@ class OCR
fn msg: "server error"
- applyToFile: (filename, opts={}, fn) ->
+ ###
+ Apply ocr to file on local filesystem.
+
+ @param {String} filename Absolute/relative path to file
+ @param {Object|Function} opts Options, of function, see next param.
+ opts.outputFormat - one of "txt", "rtf", "pdfSearchable", "docx", "xml"
+ opts.lang (String, Array of strings) - language, russian, english and maybe others
+ @param {Function} fn Callback function, accept 1) error, 2) new task id
+ ###
+ applyToFile: (filename, opts, fn) ->
+ if "function" is typeof opts
+ fn = opts
+ opts = null
+
+ opts ||= {}
+
try
buf = fs.readFileSync filename
@applyToBuffer buf, opts, fn
catch e
fn msg: "can't read file #{filename}"
+ ###
+ Apply OCR conversion to buffer, containing image file.
+
+ @param {Buffer} buffer Buffer with image
+ @param {Object|Function} opts Options, of function, see next param.
+ opts.outputFormat - one of "txt", "rtf", "pdfSearchable", "docx", "xml"
+ opts.lang (String, Array of strings) - language, russian, english and maybe others
+ @param {Function} fn Callback function, accept 1) error, 2) new task id
+ ###
applyToBuffer: (buffer, opts={}, fn) ->
opts.outputFormat ||= "txt"
timeout = opts.requestTimeout || 1000 # 1 second
@@ -119,6 +208,40 @@ class OCR
postReq.write boundary
postReq.end()
+
+ ###
+ Get text content after OCR from specified url.
+
+ @param {String} srcUrl Url with text content.
+ @param {Function} fn Callback function, accept 1) error 2) dictionary with 2 fields: `resultUrl` and `text`
+ ###
+ getTextFromUrl: (srcUrl, fn) ->
+ getOpts = @_createOptionsFromUrl srcUrl, "GET", no
+ delete getOpts.headers.Authorization
+
+ @_getServerBinaryAnswer getOpts, (err, data) ->
+ unless err
+ try
+ # conv = new iconv.Iconv "CP1251", "UTF8//IGNORE"
+ # text = conv.convert(data).toString()
+ fn null, resultUrl: srcUrl, text: data #text
+ catch e
+ fn msg: "error", resultUrl: srcUrl, error: e
+ else
+ fn msg: "error downloading file", resultUrl: srcUrl
+
+
+ ###
+ Wait till task end, then get url and content (for text data).
+
+ @param {String} taskId Task id
+ @param {Object|Function} opts Options, of function, see next param.
+ opts.outputFormat - one of "txt", "rtf", "pdfSearchable", "docx", "xml"
+ opts.lang (String, Array of strings) - language, russian, english and maybe others
+ @param {Function} fn Callback function, accept 1) error, 2) task object,
+ contain resultUrl and (if opts.outputFormat is set to "txt") text fields.
+
+ ###
waitTaskEnd: (taskId, opts, fn) ->
if "function" is typeof opts
fn = opts
@@ -141,20 +264,7 @@ class OCR
if resultUrl
# if textual format, download txt or return url
if format is "txt"
- getOpts = @_createOtionsFromUrl resultUrl, "GET", no
- delete getOpts.headers.Authorization
- getOpts.noEncoding = yes
-# console.log "\n\nGO = #{sys.inspect getOpts}"
- @_getServerAnswer getOpts, (err, data) ->
- unless err
- try
- conv = new iconv.Iconv "windows-1251", "utf8"
- body = conv.convert(new Buffer(data, 'binary')).toString()
- fn null, resultUrl: resultUrl, text: body
- catch e
- fn msg: "error", resultUrl: resultUrl
- else
- fn msg: "error downloading file", resultUrl: resultUrl
+ @getTextFromUrl resultUrl, fn
else
fn null, {resultUrl: resultUrl}
else if status?.task?.status?.toLowerCase() in ["queued" , "inprogress"]
@@ -168,5 +278,4 @@ class OCR
nextTick()
-exports.createWrapper = -> new OCR()
-
+exports.createWrapper = (appId, appPass) -> new OCR appId, appPass
View
19 package.json
@@ -0,0 +1,19 @@
+{
+ "name": "node-ocr",
+ "version": "0.1.1",
+ "description": "wrapper for abbyy finereader cloud api (now in beta).",
+ "engines": {
+ "node": ">= 0.4.0 < 0.7.0"
+ },
+ "dependencies": {"coffee-script": ">= 1.1.1", "xml2json": ">= 0.2.x"},
+ "scripts": {"postinstall": "make all"},
+ "main": "./lib/app",
+ "keywords": ["ocr", "text processing"],
+ "author": "Temnov Kirill <allselead@gmail.com>"
+}
+
+
+
+
+
+
Please sign in to comment.
Something went wrong with that request. Please try again.