Skip to content

Commit

Permalink
Merge branch 'master' of github.com:GlobalNamesArchitecture/dwc-archive
Browse files Browse the repository at this point in the history
  • Loading branch information
dimus committed Dec 31, 2013
2 parents cdcb855 + 22be407 commit 318f307
Show file tree
Hide file tree
Showing 13 changed files with 265 additions and 134 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
rvm:
- 1.9.3-p448
- 1.9.3-p484
- 2.0.0-p353
before_install:
- sudo apt-get update
Expand Down
1 change: 1 addition & 0 deletions lib/dwc-archive.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
require 'digest'
require 'csv'
require 'logger'
require 'nokogiri'
require_relative 'dwc-archive/xml_reader'
require_relative 'dwc-archive/ingester'
require_relative 'dwc-archive/errors'
Expand Down
2 changes: 1 addition & 1 deletion lib/dwc-archive/archive.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
require 'nokogiri'
class DarwinCore
class Archive
attr_reader :meta, :eml

def initialize(archive_path, tmp_dir)
@archive_path = archive_path
@tmp_dir = tmp_dir
Expand Down
23 changes: 11 additions & 12 deletions lib/dwc-archive/classification_normalizer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -72,21 +72,11 @@ def add_vernacular_name_string(name_string)
end

def name_strings(opts = {})
opts = { with_hash: false }.merge(opts)
if !!opts[:with_hash]
@name_strings
else
@name_strings.keys
end
process_strings(@name_strings, opts)
end

def vernacular_name_strings(opts = {})
opts = { with_hash: false }.merge(opts)
if !!opts[:with_hash]
@vernacular_name_strings
else
@vernacular_name_strings.keys
end
process_strings(@vernacular_name_strings, opts)
end

def normalize(opts = {})
Expand All @@ -110,6 +100,15 @@ def normalize(opts = {})

private

def process_strings(strings, opts)
opts = { with_hash: false }.merge(opts)
if !!opts[:with_hash]
strings
else
strings.keys
end
end

def get_canonical_name(a_scientific_name)
if @with_canonical_names
canonical_name = @parser.parse(a_scientific_name,
Expand Down
4 changes: 3 additions & 1 deletion lib/dwc-archive/core.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,18 @@ class DarwinCore
class Core
include DarwinCore::Ingester
attr_reader :id

def initialize(dwc)
@dwc = dwc
@archive = @dwc.archive
@path = @archive.files_path
root_key = @archive.meta.keys[0]
@data = @archive.meta[root_key][:core]
raise DarwinCore::CoreFileError.
new("Cannot find core in meta.xml, is meta.xml valid?") unless @data
new('Cannot find core in meta.xml, is meta.xml valid?') unless @data
@id = @data[:id][:attributes]
get_attributes(DarwinCore::CoreFileError)
end

end
end
1 change: 1 addition & 0 deletions lib/dwc-archive/expander.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
class DarwinCore
class Expander

def initialize(archive_path, tmp_dir)
@archive_path = archive_path
@tmp_dir = tmp_dir
Expand Down
25 changes: 13 additions & 12 deletions lib/dwc-archive/generator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,7 @@ def clean
def add_core(data, file_name, keep_headers = true)
c = CSV.open(File.join(@path,file_name), @write)
header = data.shift
fields = header.map do |f|
f.strip!
err = 'No header in core data, or header fields are not urls'
raise DarwinCore::GeneratorError.new(err) unless f.match(/^http:\/\//)
f.split('/')[-1]
end
fields = get_fields(header, 'core')
data.unshift(fields) if keep_headers
ignore_header_lines = keep_headers ? 1 : 0
@meta_xml_data[:core] = { fields: header,
Expand All @@ -41,12 +36,7 @@ def add_extension(data, file_name,
row_type = 'http://rs.tdwg.org/dwc/terms/Taxon')
c = CSV.open(File.join(@path,file_name), @write)
header = data.shift
fields = header.map do |f|
f.strip!
err = 'No header in core data, or header fields are not urls'
raise DarwinCore::GeneratorError.new(err) unless f.match(/^http:\/\//)
f.split('/')[-1]
end
fields = get_fields(header, 'extension')
data.unshift(fields) if keep_headers
ignore_header_lines = keep_headers ? 1 : 0
@meta_xml_data[:extensions] << { fields: header,
Expand Down Expand Up @@ -81,5 +71,16 @@ def pack
a = "cd #{@path}; tar -zcf #{@dwc_path} *"
system(a)
end

private

def get_fields(header, file_type)
header.map do |f|
f.strip!
err = "No header in %s data, or header fields are not urls" % file_type
raise DarwinCore::GeneratorError.new(err) unless f.match(/^http:\/\//)
f.split('/')[-1]
end
end
end
end
113 changes: 68 additions & 45 deletions lib/dwc-archive/generator_eml_xml.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,51 +22,9 @@ def create
:'xmlns:res' => 'eml://ecoinformatics.org/resource-2.1.1',
:'xmlns:dc' => 'http://purl.org/dc/terms/',
:'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
:'xsi:schemaLocation' => 'eml_uri') do
xml.dataset(id: @data[:id]) do
xml.title(@data[:title])
xml.license(@data[:license])
contacts = []
@data[:authors].each_with_index do |a, i|
creator_id = i + 1
contacts << creator_id
xml.creator(id: creator_id, scope: 'document') do
xml.individualName do
xml.givenName(a[:first_name])
xml.surName(a[:last_name])
end
xml.organizationName(a[:organization]) if a[:organization]
xml.positionName(a[:position]) if a[:position]
xml.onlineUrl(a[:url]) if a[:url]
xml.electronicMailAddress(a[:email])
end
end
@data[:metadata_providers].each_with_index do |a, i|
xml.metadataProvider do
xml.individualName do
xml.givenName(a[:first_name])
xml.surName(a[:last_name])
end
xml.organizationName(a[:organization]) if a[:organization]
xml.positionName(a[:position]) if a[:position]
xml.onlineUrl(a[:url]) if a[:url]
xml.electronicMailAddress(a[:email])
end
end if @data[:metadata_providers]
xml.pubDate(Time.now.to_s)
xml.abstract() do
xml.para(@data[:abstract])
end
contacts.each do |contact|
xml.contact { xml.references(contact) }
end
end
xml.additionalMetadata do
xml.metadata do
xml.citation(@data[:citation])
xml.resourceLogoUrl(@data[:logo_url]) if @data[:logo_url]
end
end
:'xsi:schemaLocation' => 'eml_uri') do
build_dataset(xml)
build_additional_metadata(xml)
xml.parent.namespace = xml.parent.namespace_definitions.first
end
end
Expand All @@ -77,6 +35,71 @@ def create
end

private

def build_dataset(xml)
xml.dataset(id: @data[:id]) do
xml.title(@data[:title])
xml.license(@data[:license])
contacts = []
build_authors(xml, contacts)
build_metadata_providers(xml)
xml.pubDate(Time.now.to_s)
build_abstract(xml)
build_contacts(xml, contacts)
end
end

def build_abstract(xml)
xml.abstract() do
xml.para(@data[:abstract])
end
end

def build_contacts(xml, contacts)
contacts.each do |contact|
xml.contact { xml.references(contact) }
end
end

def build_metadata_providers(xml)
@data[:metadata_providers].each_with_index do |a, i|
xml.metadataProvider do
build_person(xml, a)
end
end if @data[:metadata_providers]
end

def build_authors(xml, contacts)
@data[:authors].each_with_index do |a, i|
creator_id = i + 1
contacts << creator_id
xml.creator(id: creator_id, scope: 'document') do
build_person(xml, a)
end
end
end

def build_additional_metadata(xml)
xml.additionalMetadata do
xml.metadata do
xml.citation(@data[:citation])
xml.resourceLogoUrl(@data[:logo_url]) if @data[:logo_url]
end
end
end

def build_person(xml, data)
a = data
xml.individualName do
xml.givenName(a[:first_name])
xml.surName(a[:last_name])
end
xml.organizationName(a[:organization]) if a[:organization]
xml.positionName(a[:position]) if a[:position]
xml.onlineUrl(a[:url]) if a[:url]
xml.electronicMailAddress(a[:email])
end

def timestamp
t = Time.now.getutc.to_a[0..5].reverse
t[0..2].join('-') + '::' + t[-3..-1].join(':')
Expand Down
53 changes: 33 additions & 20 deletions lib/dwc-archive/generator_meta_xml.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,26 +16,7 @@ def create
fieldsEnclosedBy: '"',
linesTerminatedBy: "\n",
rowType: 'http://rs.tdwg.org/dwc/terms/Taxon' }
xml.archive(xmlns: 'http://rs.tdwg.org/dwc/text/',
:'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
:'xsi:schemaLocation' => schema_uri) do
xml.core(opts.merge(ignoreHeaderLines:
@data[:core][:ignoreHeaderLines])) do
xml.files { xml.location(@data[:core][:location]) }
taxon_id, fields = find_taxon_id(@data[:core][:fields])
xml.id_(index: taxon_id[1])
fields.each { |f| xml.field(term: f[0], index: f[1]) }
end
@data[:extensions].each do |e|
xml.extension(opts.merge(ignoreHeaderLines: e[:ignoreHeaderLines],
rowType: e[:rowType])) do
xml.files { xml.location(e[:location]) }
taxon_id, fields = find_taxon_id(e[:fields])
xml.coreid(index: taxon_id[1])
fields.each { |f| xml.field(term: f[0], index: f[1]) }
end
end
end
build_archive(xml, opts, schema_uri)
end
meta_xml_data = builder.to_xml
meta_file = open(File.join(@path, 'meta.xml'), @write)
Expand All @@ -44,6 +25,38 @@ def create
end

private

def build_archive(xml, opts, schema_uri)
xml.archive(xmlns: 'http://rs.tdwg.org/dwc/text/',
:'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
:'xsi:schemaLocation' => schema_uri) do
build_core(xml, opts)
build_extensions(xml, opts)
end
end

def build_core(xml, opts)
xml.core(opts.merge(ignoreHeaderLines:
@data[:core][:ignoreHeaderLines])) do
xml.files { xml.location(@data[:core][:location]) }
taxon_id, fields = find_taxon_id(@data[:core][:fields])
xml.id_(index: taxon_id[1])
fields.each { |f| xml.field(term: f[0], index: f[1]) }
end
end

def build_extensions(xml, opts)
@data[:extensions].each do |e|
xml.extension(opts.merge(ignoreHeaderLines: e[:ignoreHeaderLines],
rowType: e[:rowType])) do
xml.files { xml.location(e[:location]) }
taxon_id, fields = find_taxon_id(e[:fields])
xml.coreid(index: taxon_id[1])
fields.each { |f| xml.field(term: f[0], index: f[1]) }
end
end
end

def find_taxon_id(data)
fields = []
data.each_with_index { |f, i| fields << [f.strip, i] }
Expand Down
Loading

0 comments on commit 318f307

Please sign in to comment.