Skip to content

Commit

Permalink
Merge pull request pulibrary#398 from pulibrary/ocr_language
Browse files Browse the repository at this point in the history
Set OCR Language
  • Loading branch information
escowles committed Feb 1, 2016
2 parents f7a12fc + c725c60 commit 0a74ffa
Show file tree
Hide file tree
Showing 28 changed files with 300 additions and 71 deletions.
4 changes: 3 additions & 1 deletion Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,11 @@ gem "omniauth-cas"
gem 'ezid-client'
gem 'sprockets-es6'
gem 'sprockets-rails', '~> 2.3.3'
gem 'browse-everything', github: 'projecthydra-labs/browse-everything'
gem 'browse-everything', github: 'projecthydra-labs/browse-everything', branch: 'bootstrap-sprockets'
gem 'aasm'
gem 'newrelic_rpm'
gem 'iso-639'
source 'https://rails-assets.org' do
gem 'rails-assets-babel-polyfill'
gem 'rails-assets-bootstrap-select', '1.9.4'
end
58 changes: 26 additions & 32 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ GIT

GIT
remote: git://github.com/projecthydra-labs/browse-everything.git
revision: 7f547a23d8fc20bd249b8f607e009b03aa112c5d
revision: 3c3f60472c2b31a3d5a07e8f2bf50f09f8220ed5
branch: bootstrap-sprockets
specs:
browse-everything (0.9.1)
bootstrap-sass
Expand Down Expand Up @@ -325,34 +326,26 @@ GEM
faraday (0.9.2)
multipart-post (>= 1.2, < 3)
ffi (1.9.10)
font-awesome-rails (4.4.0.0)
font-awesome-rails (4.5.0.0)
railties (>= 3.2, < 5.0)
globalid (0.3.6)
activesupport (>= 4.1.0)
google-api-client (0.8.6)
activesupport (>= 3.2)
addressable (~> 2.3)
autoparse (~> 0.3)
extlib (~> 0.9)
faraday (~> 0.9)
googleauth (~> 0.3)
launchy (~> 2.4)
multi_json (~> 1.10)
retriable (~> 1.4)
signet (~> 0.6)
google_drive (1.0.2)
google-api-client (0.7.1)
addressable (>= 2.3.2)
autoparse (>= 0.3.3)
extlib (>= 0.9.15)
faraday (>= 0.9.0)
jwt (>= 0.1.5)
launchy (>= 2.1.1)
multi_json (>= 1.0.0)
retriable (>= 1.4)
signet (>= 0.5.0)
uuidtools (>= 2.1.0)
google_drive (1.0.5)
google-api-client (>= 0.7.0, < 0.9)
minitest (>= 5.1.0)
nokogiri (>= 1.4.4, != 1.5.2, != 1.5.1)
oauth (>= 0.3.6)
oauth2 (>= 0.5.0)
googleauth (0.4.2)
faraday (~> 0.9)
jwt (~> 1.4)
logging (~> 2.0)
memoist (~> 0.12)
multi_json (~> 1.11)
signet (~> 0.6)
haml (4.0.7)
tilt
hashdiff (0.2.2)
Expand Down Expand Up @@ -401,6 +394,7 @@ GEM
activesupport (>= 3.2.18)
faraday (~> 0.9.0)
json
iso-639 (0.2.5)
jasmine-core (2.3.4)
jasmine-jquery-rails (2.0.3)
jasmine-rails (0.12.2)
Expand All @@ -417,7 +411,7 @@ GEM
i18n
logger
rubyzip
jquery-rails (4.0.5)
jquery-rails (4.1.0)
rails-dom-testing (~> 1.0)
railties (>= 4.2.0)
thor (>= 0.14, < 2.0)
Expand Down Expand Up @@ -460,22 +454,17 @@ GEM
rdf-xsd (~> 1.1, >= 1.1.5)
sparql (~> 1.99)
sparql-client (~> 1.99)
little-plugger (1.1.4)
logger (1.2.8)
logging (2.0.0)
little-plugger (~> 1.1)
multi_json (~> 1.10)
loofah (2.0.3)
nokogiri (>= 1.5.9)
mail (2.6.3)
mime-types (>= 1.16, < 3)
marc (1.0.0)
scrub_rb (>= 1.0.1, < 2)
unf
memoist (0.12.0)
method_source (0.8.2)
mime-types (2.99)
mimemagic (0.3.0)
mimemagic (0.3.1)
mini_magick (4.3.6)
mini_portile (0.6.2)
minitest (5.8.3)
Expand Down Expand Up @@ -563,6 +552,9 @@ GEM
railties (= 4.2.4)
sprockets-rails
rails-assets-babel-polyfill (0.0.1)
rails-assets-bootstrap-select (1.9.4)
rails-assets-jquery (>= 1.8)
rails-assets-jquery (2.2.0)
rails-deprecated_sanitizer (1.0.3)
activesupport (>= 4.2.0.alpha)
rails-dom-testing (1.0.7)
Expand Down Expand Up @@ -652,7 +644,7 @@ GEM
http-cookie (>= 1.0.2, < 2.0)
mime-types (>= 1.16, < 3.0)
netrc (~> 0.7)
retriable (1.4.1)
retriable (2.1.0)
rsolr (1.0.13)
builder (>= 2.1.2)
rspec-core (3.3.2)
Expand Down Expand Up @@ -705,9 +697,8 @@ GEM
json (~> 1.0)
redis (~> 3.2, >= 3.2.1)
redis-namespace (~> 1.5, >= 1.5.2)
signet (0.6.1)
signet (0.7.2)
addressable (~> 2.3)
extlib (~> 0.9)
faraday (~> 0.9)
jwt (~> 1.5)
multi_json (~> 1.10)
Expand Down Expand Up @@ -782,6 +773,7 @@ GEM
unf (0.1.4)
unf_ext
unf_ext (0.0.7.1)
uuidtools (2.1.5)
vcr (2.9.3)
vegas (0.1.11)
rack (>= 1.0.0)
Expand Down Expand Up @@ -825,6 +817,7 @@ DEPENDENCIES
hydra-role-management (= 0.1.0)
hydra-works!
iiif-presentation
iso-639
jasmine-jquery-rails
jasmine-rails
jbuilder (~> 2.0)
Expand All @@ -845,6 +838,7 @@ DEPENDENCIES
pul_metadata_services!
rails (= 4.2.4)
rails-assets-babel-polyfill!
rails-assets-bootstrap-select (= 1.9.4)!
rsolr (~> 1.0.6)
rspec-rails
rubocop
Expand Down
1 change: 1 addition & 0 deletions app/assets/javascripts/application.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
//= require blacklight/blacklight
//= require browse_everything
//= require nestedSortable/jquery.mjs.nestedSortable
//= require bootstrap-select

//= require_tree .

Expand Down
4 changes: 4 additions & 0 deletions app/assets/javascripts/bootstrap_select_initialize.es6
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
jQuery(() => {
$("select[multiple='multiple']").selectpicker({
})
})
2 changes: 1 addition & 1 deletion app/assets/javascripts/bulk_label.es6
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class BulkLabeler {
}
track_resource_form() {
let master = this
this.element.find("input.resource-radio-button").change(function() {
this.element.find(".resource-radio-button").change(function() {
$("#resource-form").attr("changed", "true")
master.check_save_button()
})
Expand Down
1 change: 1 addition & 0 deletions app/assets/stylesheets/application.css.scss
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,4 @@
@import "browse_everything";

@import "components/breadcrumb";
@import "bootstrap-select";
20 changes: 5 additions & 15 deletions app/controllers/concerns/curation_concerns/remote_metadata.rb
Original file line number Diff line number Diff line change
@@ -1,24 +1,14 @@
module CurationConcerns::RemoteMetadata
extend ActiveSupport::Concern

included do
def curation_concern
if wants_to_update_remote_metadata?
decorated_concern
else
@curation_concern
end
end
end

private

def decorated_concern
decorator.new(@curation_concern)
end

def decorator
UpdatesMetadata
if wants_to_update_remote_metadata?
CompositeDecorator.new(UpdatesMetadata, super)
else
super
end
end

def wants_to_update_remote_metadata?
Expand Down
8 changes: 8 additions & 0 deletions app/controllers/concerns/curation_concerns/update_ocr.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
module CurationConcerns
module UpdateOCR
extend ActiveSupport::Concern
def decorator
::UpdatesOCR
end
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@ class CurationConcerns::CurationConcernsController < ApplicationController
include CurationConcerns::Collectible
include CurationConcerns::Manifest
include CurationConcerns::MemberManagement
include CurationConcerns::UpdateOCR
include CurationConcerns::RemoteMetadata

def curation_concern_name
curation_concern.class.name.underscore
end

def update
authorize!(:complete, @curation_concern, message: 'Unable to mark resource complete') if @curation_concern.state != 'complete' && params[curation_concern_name][:state] == 'complete'
authorize!(:complete, curation_concern, message: 'Unable to mark resource complete') if curation_concern.state != 'complete' && params[curation_concern_name][:state] == 'complete'
add_to_collections(params[curation_concern_name].delete(:collection_ids))
super
end
Expand Down Expand Up @@ -41,6 +42,17 @@ def browse_everything_files

private

def curation_concern
@decorated_concern ||=
begin
@curation_concern = decorator.new(@curation_concern)
end
end

def decorator
CompositeDecorator.new(super, NullDecorator)
end

def selected_files_params
params[:selected_files]
end
Expand Down
13 changes: 13 additions & 0 deletions app/decorators/composite_decorator.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
class CompositeDecorator
attr_reader :decorators
def initialize(*decorators)
@decorators = decorators.flatten.compact
end

def new(obj)
decorators.each do |decorator|
obj = decorator.new(obj)
end
obj
end
end
3 changes: 3 additions & 0 deletions app/decorators/decorator.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
class Decorator < SimpleDelegator
delegate :class, :is_a?, :instance_of?, to: :__getobj__
end
7 changes: 7 additions & 0 deletions app/decorators/null_decorator.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
class NullDecorator
class << self
def new(obj)
obj
end
end
end
4 changes: 1 addition & 3 deletions app/decorators/updates_metadata.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
class UpdatesMetadata < SimpleDelegator
delegate :class, to: :__getobj__

class UpdatesMetadata < Decorator
def save
apply_remote_metadata
super
Expand Down
18 changes: 18 additions & 0 deletions app/decorators/updates_ocr.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
class UpdatesOCR < Decorator
delegate :class, to: :__getobj__

def save
changed = ocr_language_changed?
super.tap do
regenerate_derivatives if changed
end
end

private

def regenerate_derivatives
file_set_ids.each do |f|
RunOCRJob.perform_later(f)
end
end
end
2 changes: 1 addition & 1 deletion app/forms/curation_concerns/curation_concerns_form.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module CurationConcerns
class CurationConcernsForm < CurationConcerns::Forms::WorkForm
self.terms += [:access_policy, :holding_location, :rights_statement, :rights_note, :source_metadata_identifier, :portion_note, :description, :state, :workflow_note, :collection_ids]
self.terms += [:access_policy, :holding_location, :rights_statement, :rights_note, :source_metadata_identifier, :portion_note, :description, :state, :workflow_note, :collection_ids, :ocr_language]

def notable_rights_statement?
RightsStatementService.notable?(model.rights_statement)
Expand Down
8 changes: 8 additions & 0 deletions app/jobs/run_ocr_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
class RunOCRJob < ActiveJob::Base
queue_as :default

def perform(file_set_id)
file_set = FileSet.find(file_set_id)
OCRRunner.new(file_set).from_datastream
end
end
1 change: 1 addition & 0 deletions app/models/concerns/common_metadata.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ module CommonMetadata
property :holding_location, predicate: ::RDF::Vocab::Bibframe.heldBy, multiple: false do |index|
index.as :stored_searchable
end
property :ocr_language, predicate: ::PULTerms.ocr_language

# IIIF
apply_schema IIIFBookSchema, ActiveFedora::SchemaIndexingStrategy.new(
Expand Down
21 changes: 5 additions & 16 deletions app/models/file_set.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,7 @@ def create_derivatives(filename)
url: derivative_url('intermediate_file')
]
)
OCRCreator.create(
filename,
outputs: [
label: 'ocr',
url: ocr_file,
format: :hocr
]
)
OCRRunner.new(self).from_file(filename)
end
super
end
Expand All @@ -52,14 +45,10 @@ def ocr_file
end

def ocr_text
@ocr_text ||=
begin
if persisted? && File.exist?(ocr_file.gsub("file:", ""))
file = File.open(ocr_file.gsub("file:", ""))
ocr_doc = HOCRDocument.new(file)
ocr_doc.text.strip
end
end
return unless persisted? && File.exist?(ocr_file.gsub("file:", ""))
file = File.open(ocr_file.gsub("file:", ""))
ocr_doc = HOCRDocument.new(file)
ocr_doc.text.strip
end

# The destination_name parameter has to match up with the file parameter
Expand Down
1 change: 1 addition & 0 deletions app/models/vocab/pul_terms.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ class PULTerms < RDF::StrictVocabulary('http://library.princeton.edu/terms/')
term :exhibit_id, label: 'Exhibit ID'.freeze, type: 'rdf:Property'.freeze
term :metadata_id, label: 'Metadata ID'.freeze, type: 'rdf:Property'.freeze
term :source_metadata, label: 'Source Metadata'.freeze, type: 'rdf:Property'.freeze
term :ocr_language, label: "OCR Language".freeze, type: 'rdf:Property'.freeze
end
Loading

0 comments on commit 0a74ffa

Please sign in to comment.