Skip to content

Commit

Permalink
improve slugification logic
Browse files Browse the repository at this point in the history
  • Loading branch information
Darren Hardy committed Dec 14, 2017
1 parent 234b20d commit 75f722e
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 23 deletions.
3 changes: 2 additions & 1 deletion Gemfile
Expand Up @@ -3,4 +3,5 @@ source 'https://rubygems.org'
# Specify your gem's dependencies in geo_combine.gemspec
gemspec

gem 'coveralls', require: false
gem 'coveralls', require: false
gem 'byebug'
7 changes: 4 additions & 3 deletions lib/geo_combine/formatting.rb
Expand Up @@ -15,7 +15,7 @@ def sanitize(text)
# @param [String] text
# @return [String]
def remove_lines(text)
text.gsub(/\n/, '')
text.delete("\n")
end

##
Expand All @@ -26,8 +26,9 @@ def sanitize_and_remove_lines(text)
remove_lines(sanitize(text))
end

def sluggify(text)
URI.encode(text.tr('.', '-').tr('_', '-').downcase)
# slugs should be lowercase and only have a-z, A-Z, 0-9, and -
def sluggify(slug)
slug.gsub(/[^a-zA-Z0-9\-]/, '-').gsub(/[\-]+/, '-').downcase
end
end
end
59 changes: 52 additions & 7 deletions lib/geo_combine/ogp.rb
@@ -1,15 +1,35 @@
module GeoCombine
# Data model for OpenGeoPortal metadata
class OGP
class InvalidMetadata < RuntimeError; end
include GeoCombine::Formatting
attr_reader :metadata

##
# Initializes an OGP object for parsing
# @param [String] metadata a valid serialized JSON string from an ESRI Open
# Data portal
# @param [String] metadata a valid serialized JSON string from OGP instance
# @raise [InvalidMetadata]
def initialize(metadata)
@metadata = JSON.parse(metadata)
raise InvalidMetadata unless valid?
end

OGP_REQUIRED_FIELDS = %w[
Access
Institution
LayerDisplayName
LayerId
MaxX
MaxY
MinX
MinY
Name
].freeze

##
# Runs validity checks on OGP metadata to ensure fields are present
def valid?
OGP_REQUIRED_FIELDS.all? { |k| metadata[k].present? }
end

##
Expand Down Expand Up @@ -103,16 +123,17 @@ def envelope
raise ArgumentError unless west >= -180 && west <= 180 &&
east >= -180 && east <= 180 &&
north >= -90 && north <= 90 &&
south >= -90 && south <= 90
south >= -90 && south <= 90 &&
west <= east && south <= north
"ENVELOPE(#{west}, #{east}, #{north}, #{south})"
end

def subjects
fgdc.metadata.xpath('//themekey').map { |k| k.text } if fgdc
fgdc.metadata.xpath('//themekey').map(&:text) if fgdc
end

def placenames
fgdc.metadata.xpath('//placekey').map { |k| k.text } if fgdc
fgdc.metadata.xpath('//placekey').map(&:text) if fgdc
end

def fgdc
Expand Down Expand Up @@ -161,11 +182,35 @@ def institution
end

def identifier
URI.encode(metadata['LayerId'])
CGI.escape(metadata['LayerId']) # TODO: why are we using CGI.escape?
end

def slug
sluggify(metadata['LayerId'])
name = metadata['LayerId'] || metadata['Name'] || ''
name = [institution, name].join('-') if institution.present? &&
!name.downcase.start_with?(institution.downcase)
sluggify(filter_name(name))
end

SLUG_BLACKLIST = %w[
SDE_DATA.
SDE.
SDE2.
GISPORTAL.GISOWNER01.
GISDATA.
MORIS.
].freeze

def filter_name(name)
# strip out schema and usernames
SLUG_BLACKLIST.each do |blacklisted|
name.sub!(blacklisted, '')
end
unless name.size > 1
# use first word of title is empty name
name = metadata['LayerDisplayName'].split.first
end
name
end
end
end
10 changes: 2 additions & 8 deletions spec/lib/geo_combine/formatting_spec.rb
Expand Up @@ -21,14 +21,8 @@
end
describe '#sluggify' do
let(:preslug) { 'HARVARD...Co_0l' }
it 'replaces . and _ for -' do
expect(subject.sluggify(preslug)).to_not include '.'
expect(subject.sluggify(preslug)).to_not include '_'
expect(subject.sluggify(preslug)).to include '---'
expect(subject.sluggify(preslug)).to include 'co-0l'
end
it 'downcases everything' do
expect(subject.sluggify(preslug)).to_not include 'HARVARD', 'C'
it 'handles multiple . and _ and uppercase' do
expect(subject.sluggify(preslug)).to eq 'harvard-co-0l'
end
end
end
8 changes: 4 additions & 4 deletions spec/lib/geo_combine/ogp_spec.rb
Expand Up @@ -23,10 +23,10 @@
describe '#geoblacklight_terms' do
describe 'builds a hash which maps metadata' do
it 'with dc_identifier_s' do
expect(ogp.geoblacklight_terms).to include(dc_identifier_s: metadata['LayerId'])
expect(ogp.geoblacklight_terms).to include(dc_identifier_s: 'HARVARD.SDE2.G1059_W57_1654_PF_SH1')
end
it 'with dc_title_s' do
expect(ogp.geoblacklight_terms).to include(dc_title_s: metadata['LayerDisplayName'])
expect(ogp.geoblacklight_terms).to include(dc_title_s: 'World Map, 1654 (Raster Image)')
end
it 'with dc_description_s sanitized' do
expect(ogp.geoblacklight_terms).to include(dc_description_s: metadata['Abstract'])
Expand All @@ -35,7 +35,7 @@
expect(ogp.geoblacklight_terms).to include(dc_rights_s: 'Public')
end
it 'with dct_provenance_s' do
expect(ogp.geoblacklight_terms).to include(dct_provenance_s: metadata['Institution'])
expect(ogp.geoblacklight_terms).to include(dct_provenance_s: 'Harvard')
end
it 'with dct_references_s' do
expect(ogp.geoblacklight_terms).to include(:dct_references_s)
Expand All @@ -49,7 +49,7 @@
end
it 'with layer_slug_s' do
expect(ogp.geoblacklight_terms)
.to include(layer_slug_s: 'harvard-sde2-g1059-w57-1654-pf-sh1')
.to include(layer_slug_s: 'harvard-g1059-w57-1654-pf-sh1')
end
it 'with solr_geom' do
expect(ogp.geoblacklight_terms).to include(:solr_geom)
Expand Down
1 change: 1 addition & 0 deletions spec/spec_helper.rb
Expand Up @@ -24,6 +24,7 @@
require 'fixtures/json_docs'
require 'helpers'
require 'rspec-html-matchers'
require 'byebug'

RSpec.configure do |config|
config.include Helpers
Expand Down

0 comments on commit 75f722e

Please sign in to comment.