Skip to content

Commit

Permalink
SRCH-2300 Handle non-ASCII characters in URLs (#752)
Browse files Browse the repository at this point in the history
SRCH-2300 Handle non-ASCII characters in URLs
* Set encoding on tempfile.
* Overhauled specs and features a little.
  • Loading branch information
jmax-fearless authored Aug 6, 2021
1 parent 83ba83a commit b369917
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 104 deletions.
2 changes: 1 addition & 1 deletion app/services/bulk_url_uploader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def self.create_job(uploaded_file, user)
SearchgovUrlBulkUploaderJob.perform_later(
user,
uploaded_file.original_filename,
uploaded_file.tempfile.readlines
uploaded_file.tempfile.set_encoding('UTF-8').readlines
)
end

Expand Down
11 changes: 0 additions & 11 deletions spec/fixtures/searchgov_domain.yml

This file was deleted.

13 changes: 13 additions & 0 deletions spec/fixtures/searchgov_domains.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,16 @@ DEFAULTS: &DEFAULTS
basic_domain:
<<: *DEFAULTS
domain: foo.gov

agency_gov:
<<: *DEFAULTS
domain: agency.gov

www_agency_gov:
domain: www.agency.gov

www_consumerfinance_gov:
domain: www.consumerfinance.gov

www_medicare_gov:
domain: www.medicare.gov
2 changes: 2 additions & 0 deletions spec/fixtures/txt/non_ascii_urls.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
https://agency.gov/foo § 208.pdf?open
https://agency.gov/Asesoría.pdf
161 changes: 69 additions & 92 deletions spec/system/admin/bulk_url_upload_spec.rb
Original file line number Diff line number Diff line change
@@ -1,130 +1,107 @@
# frozen_string_literal: true

shared_examples 'a failed bulk upload with error' do |error_message|
it 'sends us back to the bulk upload page' do
do_bulk_upload
expect(page).to have_text('Bulk Search.gov URL Upload')
end

it 'shows an error message' do
do_bulk_upload
expect(page).to have_text(error_message)
end
end

describe 'Bulk URL upload' do
include ActiveJob::TestHelper

subject(:bulk_upload) do
subject(:do_bulk_upload) do
perform_enqueued_jobs do
visit url
attach_file('bulk_upload_urls', url_file)
attach_file('bulk_upload_urls', upload_file)
click_button('Upload')
end
end

let(:url) { '/admin/bulk_url_upload' }
let(:url_filedir) { 'txt' }
let(:url_filename) { 'good_url_file.txt' }
let(:url_file) { file_fixture("#{url_filedir}/#{url_filename}") }
let(:urls) { File.open(url_file).readlines.map(&:strip) }
let(:searchgov_domains) do
urls.reduce(Set.new) do |searchgov_domains, raw_url|
parsed_url = URI(raw_url)
domain = parsed_url.host
searchgov_domain = SearchgovDomain.find_by(domain: domain)
searchgov_domains << searchgov_domain if searchgov_domain
end
end
let(:upload_file) { file_fixture("txt/#{upload_filename}") }
let(:urls) { File.open(upload_file).readlines.map(&:strip) }
let(:searchgov_domain) { searchgov_domains(:agency_gov) }

before do
@reindexed_domains = Set.new
allow_any_instance_of(SearchgovDomain).to receive(:index_urls) do |searchgov_domain|
@reindexed_domains << searchgov_domain
end
end
before { allow(searchgov_domain).to receive(:index_urls) }

it_behaves_like 'a page restricted to super admins'

describe 'bulk uploading a file of URLs' do
context 'when a super admin is logged in' do
include_context 'log in super admin'

it 'sends us back to the bulk upload page' do
bulk_upload
expect(page).to have_text('Bulk Search.gov URL Upload')
end

it 'shows a confirmation message' do
bulk_upload
expect(page).to have_text(
<<~CONFIRMATION_MESSAGE
Successfully uploaded #{url_filename} for processing.
The results will be emailed to you.
CONFIRMATION_MESSAGE
)
end
describe 'bulk uploading a file of URLs' do
let(:upload_filename) { 'good_url_file.txt' }

it 'creates the URLs' do
bulk_upload
urls.each do |url|
expect(SearchgovUrl.find_by(url: url)).not_to be_blank
it 'sends us back to the bulk upload page' do
do_bulk_upload
expect(page).to have_text('Bulk Search.gov URL Upload')
end
end

it 're-indexes the domains for the URLs' do
bulk_upload
expect(@reindexed_domains).to eq(searchgov_domains)
end
end

describe 'trying to bulk upload a file of URLs when there is no file attached' do
include_context 'log in super admin'
it 'shows a confirmation message' do
do_bulk_upload
expect(page).to have_text(
<<~CONFIRMATION_MESSAGE
Successfully uploaded #{upload_filename} for processing.
The results will be emailed to you.
CONFIRMATION_MESSAGE
)
end

subject(:bulk_upload) do
visit url
click_button('Upload')
end
it 'creates the URLs' do
do_bulk_upload

it 'sends us back to the bulk upload page' do
bulk_upload
expect(page).to have_text('Bulk Search.gov URL Upload')
end
urls.each do |url|
expect(SearchgovUrl.find_by(url: url)).not_to be_blank
end
end

it 'shows an error message' do
bulk_upload
expect(page).to have_text(
<<~ERROR_MESSAGE
Please choose a file to upload.
ERROR_MESSAGE
)
it 're-indexes the domains for the URLs' do
reindexed_domains = Set.new
allow_any_instance_of(SearchgovDomain).to receive(:index_urls) do |searchgov_domain|
reindexed_domains << searchgov_domain
end
do_bulk_upload
expect(reindexed_domains).to eq(Set[searchgov_domain])
end
end
end

describe 'trying to bulk upload a file of URLs that is not a text file' do
include_context 'log in super admin'
context 'when the URLs contain non-ASCII characters' do
let(:upload_filename) { 'non_ascii_urls.txt' }

let(:url_file) { file_fixture('word/bogus_url_file.docx') }
it 'saves the encoded URLs' do
do_bulk_upload

it 'sends us back to the bulk upload page' do
bulk_upload
expect(page).to have_text('Bulk Search.gov URL Upload')
expect(SearchgovUrl.pluck(:url)).to include(
'https://agency.gov/foo%20%C2%A7%20208.pdf?open',
'https://agency.gov/Asesor%E2%88%9A%E2%89%A0a.pdf'
)
end
end

it 'shows an error message' do
bulk_upload
expect(page).to have_text(
<<~ERROR_MESSAGE
Files of type application/vnd.openxmlformats-officedocument.wordprocessingml.document are not supported
ERROR_MESSAGE
)
end
end
describe 'trying to bulk upload a file of URLs when there is no file attached' do
let(:upload_file) { nil }

describe 'trying to bulk upload a file of URLs that is too big' do
include_context 'log in super admin'
it_behaves_like 'a failed bulk upload with error', 'Please choose a file to upload'
end

let(:url_filename) { 'too_big_url_file.txt' }
describe 'trying to bulk upload a file of URLs that is not a text file' do
let(:upload_filename) { 'bogus_url_file.docx' }
let(:upload_file) { file_fixture('word/bogus_url_file.docx') }

it 'sends us back to the bulk upload page' do
bulk_upload
expect(page).to have_text('Bulk Search.gov URL Upload')
it_behaves_like 'a failed bulk upload with error',
'Files of type application/vnd.openxmlformats-officedocument.wordprocessingml.document are not supported'
end

it 'shows an error message' do
bulk_upload
expect(page).to have_text(
<<~ERROR_MESSAGE
#{url_filename} is too big; please split it.
ERROR_MESSAGE
)
describe 'trying to bulk upload a file of URLs that is too big' do
let(:upload_filename) { 'too_big_url_file.txt' }

it_behaves_like 'a failed bulk upload with error', 'too_big_url_file.txt is too big; please split it'
end
end
end

0 comments on commit b369917

Please sign in to comment.