Skip to content

Commit

Permalink
tesseract: 3.02.02 -> 3.04.01
Browse files Browse the repository at this point in the history
From the upstream changelog:

 * Tesseract development is now done with Git and hosted at github.com
   (Previously we used Subversion as a VCS and code.google.com for
   hosting).

So let's move over to the GitHub repository, where the organisation also
includes a full repository for tessdata, so we no longer need to fetch
it one-by-one.

The build also got significantly simpler, because we no longer need to
run autoconf, neither do we need to patch the configure script for
Leptonica headers.

This also has the advantage that we don't need to use the
enableLanguages attribute for the test runner anymore.

Full upstream changelog can be found at:

https://github.com/tesseract-ocr/tesseract/blob/c4d273d33cc36e/ChangeLog

Tested against all NixOS tests with enabled OCR (chromium, emacs-daemon,
installer.luksroot and lightdm).

Signed-off-by: aszlig <aszlig@redmoonstudios.org>
Cc: @viric
  • Loading branch information
aszlig committed Dec 19, 2016
1 parent f805209 commit 68bc260
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 41 deletions.
2 changes: 1 addition & 1 deletion nixos/lib/testing.nix
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ rec {

vms = map (m: m.config.system.build.vm) (lib.attrValues nodes);

ocrProg = tesseract.override { enableLanguages = [ "eng" ]; };
ocrProg = tesseract;

# Generate onvenience wrappers for running the test driver
# interactively with the specified network, and for starting the
Expand Down
58 changes: 18 additions & 40 deletions pkgs/applications/graphics/tesseract/default.nix
Original file line number Diff line number Diff line change
@@ -1,53 +1,31 @@
{ stdenv, fetchurl, autoconf, automake, libtool, leptonica, libpng, libtiff
, enableLanguages ? null
{ stdenv, fetchFromGitHub, pkgconfig, leptonica, libpng, libtiff
, icu, pango, opencl-headers
}:

with stdenv.lib;

let
majVersion = "3.02";
version = "${majVersion}.02";

mkLang = lang: sha256: let
src = fetchurl {
url = "http://tesseract-ocr.googlecode.com/files/tesseract-ocr-${majVersion}.${lang}.tar.gz";
inherit sha256;
};
in "tar xfvz ${src} -C $out/share/ --strip=1";

wantLang = name: const (enableLanguages == null || elem name enableLanguages);

extraLanguages = mapAttrsToList mkLang (filterAttrs wantLang {
cat = "0d1smiv1b3k9ay2s05sl7q08mb3ln4w5iiiymv2cs8g8333z8jl9";
rus = "059336mkhsj9m3hwfb818xjlxkcdpy7wfgr62qwz65cx914xl709";
spa = "1c9iza5mbahd9pa7znnq8yv09v5kz3gbd2sarcgcgc1ps1jc437l";
nld = "162acxp1yb6gyki2is3ay2msalmfcsnrlsd9wml2ja05k94m6bjy";
eng = "1y5xf794n832s3lymzlsdm2s9nlrd2v27jjjp0fd9xp7c2ah4461";
slv = "0rqng43435cly32idxm1lvxkcippvc3xpxbfizwq5j0155ym00dr";
jpn = "07v8pymd0iwyzh946lxylybda20gsw7p4fsb09jw147955x49gq9";
});
in

stdenv.mkDerivation rec {
name = "tesseract-${version}";
version = "3.04.01";

src = fetchurl {
url = "http://tesseract-ocr.googlecode.com/files/tesseract-ocr-${version}.tar.gz";
sha256 = "0g81m9y4iydp7kgr56mlkvjdwpp3mb01q385yhdnyvra7z5kkk96";
src = fetchFromGitHub {
owner = "tesseract-ocr";
repo = "tesseract";
rev = version;
sha256 = "0h1x4z1h86n2gwknd0wck6gykkp99bmm02lg4a47a698g4az6ybv";
};

buildInputs = [ autoconf automake libtool leptonica libpng libtiff ];
tessdata = fetchFromGitHub {
owner = "tesseract-ocr";
repo = "tessdata";
rev = "3cf1e2df1fe1d1da29295c9ef0983796c7958b7d";
sha256 = "1v4b63v5nzcxr2y3635r19l7lj5smjmc9vfk0wmxlryxncb4vpg7";
};

hardeningDisable = [ "format" ];
nativeBuildInputs = [ pkgconfig ];
buildInputs = [ leptonica libpng libtiff icu pango opencl-headers ];

preConfigure = ''
./autogen.sh
substituteInPlace "configure" \
--replace 'LIBLEPT_HEADERSDIR="/usr/local/include /usr/include"' \
'LIBLEPT_HEADERSDIR=${leptonica}/include'
'';
LIBLEPT_HEADERSDIR = "${leptonica}/include";

postInstall = concatStringsSep "; " extraLanguages;
postInstall = "cp -Rt \"$out/share/tessdata\" \"$tessdata/\"*";

meta = {
description = "OCR engine";
Expand Down

0 comments on commit 68bc260

Please sign in to comment.