/
tesseract.rb
92 lines (76 loc) 路 3.69 KB
/
tesseract.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
class Tesseract < Formula
desc "OCR (Optical Character Recognition) engine"
homepage "https://github.com/tesseract-ocr/"
url "https://github.com/tesseract-ocr/tesseract/archive/refs/tags/5.3.4.tar.gz"
sha256 "141afc12b34a14bb691a939b4b122db0d51bd38feda7f41696822bacea7710c7"
license "Apache-2.0"
revision 1
head "https://github.com/tesseract-ocr/tesseract.git", branch: "main"
livecheck do
url :stable
regex(/^v?(\d+(?:\.\d+)+)$/i)
end
bottle do
sha256 cellar: :any, arm64_sonoma: "6025ab29df5d7c96b4402964c393f02a3c8b12a1cced36d985ad26a0739c18d9"
sha256 cellar: :any, arm64_ventura: "f3781bcc17a408b0b595864460fbfb72f5c8792766c324fcfadf4300693eaade"
sha256 cellar: :any, arm64_monterey: "293eb4c6ae34966a4cbd869fa1ee4e8542d9c11e4daaf7249b1f13c82890d5b6"
sha256 cellar: :any, sonoma: "767999fd87af693c0505bd06e26507c15b063d03a852c68da74974fccf7c15b8"
sha256 cellar: :any, ventura: "80ce438228ee16bc9693b9b5d7eb1eb67b3385e9f6b256941539737ce9a75670"
sha256 cellar: :any, monterey: "11977a77350497b9b260966fa0b2f746e2f3e27646fe2896d87c97a1f84f2c9f"
sha256 cellar: :any_skip_relocation, x86_64_linux: "5cac1de32545da7765422c9b9bcef0bb9e8b4eb9f719ec52fe01fc68807a77af"
end
depends_on "autoconf" => :build
depends_on "automake" => :build
depends_on "libtool" => :build
depends_on "pkg-config" => :build
depends_on "cairo"
depends_on "icu4c"
depends_on "leptonica"
depends_on "libarchive"
depends_on "pango"
fails_with gcc: "5"
resource "eng" do
url "https://github.com/tesseract-ocr/tessdata_fast/raw/4.1.0/eng.traineddata"
sha256 "7d4322bd2a7749724879683fc3912cb542f19906c83bcc1a52132556427170b2"
end
resource "osd" do
url "https://github.com/tesseract-ocr/tessdata_fast/raw/4.1.0/osd.traineddata"
sha256 "9cf5d576fcc47564f11265841e5ca839001e7e6f38ff7f7aacf46d15a96b00ff"
end
resource "snum" do
url "https://github.com/USCDataScience/counterfeit-electronics-tesseract/raw/319a6eeacff181dad5c02f3e7a3aff804eaadeca/Training%20Tesseract/snum.traineddata"
sha256 "36f772980ff17c66a767f584a0d80bf2302a1afa585c01a226c1863afcea1392"
end
resource "test_resource" do
url "https://raw.githubusercontent.com/tesseract-ocr/test/6dd816cdaf3e76153271daf773e562e24c928bf5/testing/eurotext.tif"
sha256 "7b9bd14aba7d5e30df686fbb6f71782a97f48f81b32dc201a1b75afe6de747d6"
end
def install
# explicitly state leptonica header location, as the makefile defaults to /usr/local/include,
# which doesn't work for non-default homebrew location
ENV["LIBLEPT_HEADERSDIR"] = HOMEBREW_PREFIX/"include"
ENV.cxx11
system "./autogen.sh"
system "./configure", "--prefix=#{prefix}",
"--disable-dependency-tracking",
"--datarootdir=#{HOMEBREW_PREFIX}/share"
system "make", "training"
# make install in the local share folder to avoid permission errors
system "make", "install", "training-install", "datarootdir=#{share}"
resource("snum").stage { mv "snum.traineddata", share/"tessdata" }
resource("eng").stage { mv "eng.traineddata", share/"tessdata" }
resource("osd").stage { mv "osd.traineddata", share/"tessdata" }
end
def caveats
<<~EOS
This formula contains only the "eng", "osd", and "snum" language data files.
If you need any other supported languages, run `brew install tesseract-lang`.
EOS
end
test do
resource("test_resource").stage do
system bin/"tesseract", "./eurotext.tif", "./output", "-l", "eng"
assert_match "The (quick) [brown] {fox} jumps!\n", File.read("output.txt")
end
end
end