-
Notifications
You must be signed in to change notification settings - Fork 39
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
SRCH-2300 Handle non-ASCII characters in URLs (#752)
SRCH-2300 Handle non-ASCII characters in URLs * Set encoding on tempfile. * Overhauled specs and features a little.
- Loading branch information
1 parent
83ba83a
commit b369917
Showing
5 changed files
with
85 additions
and
104 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
https://agency.gov/foo § 208.pdf?open | ||
https://agency.gov/Asesoría.pdf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,130 +1,107 @@ | ||
# frozen_string_literal: true | ||
|
||
shared_examples 'a failed bulk upload with error' do |error_message| | ||
it 'sends us back to the bulk upload page' do | ||
do_bulk_upload | ||
expect(page).to have_text('Bulk Search.gov URL Upload') | ||
end | ||
|
||
it 'shows an error message' do | ||
do_bulk_upload | ||
expect(page).to have_text(error_message) | ||
end | ||
end | ||
|
||
describe 'Bulk URL upload' do | ||
include ActiveJob::TestHelper | ||
|
||
subject(:bulk_upload) do | ||
subject(:do_bulk_upload) do | ||
perform_enqueued_jobs do | ||
visit url | ||
attach_file('bulk_upload_urls', url_file) | ||
attach_file('bulk_upload_urls', upload_file) | ||
click_button('Upload') | ||
end | ||
end | ||
|
||
let(:url) { '/admin/bulk_url_upload' } | ||
let(:url_filedir) { 'txt' } | ||
let(:url_filename) { 'good_url_file.txt' } | ||
let(:url_file) { file_fixture("#{url_filedir}/#{url_filename}") } | ||
let(:urls) { File.open(url_file).readlines.map(&:strip) } | ||
let(:searchgov_domains) do | ||
urls.reduce(Set.new) do |searchgov_domains, raw_url| | ||
parsed_url = URI(raw_url) | ||
domain = parsed_url.host | ||
searchgov_domain = SearchgovDomain.find_by(domain: domain) | ||
searchgov_domains << searchgov_domain if searchgov_domain | ||
end | ||
end | ||
let(:upload_file) { file_fixture("txt/#{upload_filename}") } | ||
let(:urls) { File.open(upload_file).readlines.map(&:strip) } | ||
let(:searchgov_domain) { searchgov_domains(:agency_gov) } | ||
|
||
before do | ||
@reindexed_domains = Set.new | ||
allow_any_instance_of(SearchgovDomain).to receive(:index_urls) do |searchgov_domain| | ||
@reindexed_domains << searchgov_domain | ||
end | ||
end | ||
before { allow(searchgov_domain).to receive(:index_urls) } | ||
|
||
it_behaves_like 'a page restricted to super admins' | ||
|
||
describe 'bulk uploading a file of URLs' do | ||
context 'when a super admin is logged in' do | ||
include_context 'log in super admin' | ||
|
||
it 'sends us back to the bulk upload page' do | ||
bulk_upload | ||
expect(page).to have_text('Bulk Search.gov URL Upload') | ||
end | ||
|
||
it 'shows a confirmation message' do | ||
bulk_upload | ||
expect(page).to have_text( | ||
<<~CONFIRMATION_MESSAGE | ||
Successfully uploaded #{url_filename} for processing. | ||
The results will be emailed to you. | ||
CONFIRMATION_MESSAGE | ||
) | ||
end | ||
describe 'bulk uploading a file of URLs' do | ||
let(:upload_filename) { 'good_url_file.txt' } | ||
|
||
it 'creates the URLs' do | ||
bulk_upload | ||
urls.each do |url| | ||
expect(SearchgovUrl.find_by(url: url)).not_to be_blank | ||
it 'sends us back to the bulk upload page' do | ||
do_bulk_upload | ||
expect(page).to have_text('Bulk Search.gov URL Upload') | ||
end | ||
end | ||
|
||
it 're-indexes the domains for the URLs' do | ||
bulk_upload | ||
expect(@reindexed_domains).to eq(searchgov_domains) | ||
end | ||
end | ||
|
||
describe 'trying to bulk upload a file of URLs when there is no file attached' do | ||
include_context 'log in super admin' | ||
it 'shows a confirmation message' do | ||
do_bulk_upload | ||
expect(page).to have_text( | ||
<<~CONFIRMATION_MESSAGE | ||
Successfully uploaded #{upload_filename} for processing. | ||
The results will be emailed to you. | ||
CONFIRMATION_MESSAGE | ||
) | ||
end | ||
|
||
subject(:bulk_upload) do | ||
visit url | ||
click_button('Upload') | ||
end | ||
it 'creates the URLs' do | ||
do_bulk_upload | ||
|
||
it 'sends us back to the bulk upload page' do | ||
bulk_upload | ||
expect(page).to have_text('Bulk Search.gov URL Upload') | ||
end | ||
urls.each do |url| | ||
expect(SearchgovUrl.find_by(url: url)).not_to be_blank | ||
end | ||
end | ||
|
||
it 'shows an error message' do | ||
bulk_upload | ||
expect(page).to have_text( | ||
<<~ERROR_MESSAGE | ||
Please choose a file to upload. | ||
ERROR_MESSAGE | ||
) | ||
it 're-indexes the domains for the URLs' do | ||
reindexed_domains = Set.new | ||
allow_any_instance_of(SearchgovDomain).to receive(:index_urls) do |searchgov_domain| | ||
reindexed_domains << searchgov_domain | ||
end | ||
do_bulk_upload | ||
expect(reindexed_domains).to eq(Set[searchgov_domain]) | ||
end | ||
end | ||
end | ||
|
||
describe 'trying to bulk upload a file of URLs that is not a text file' do | ||
include_context 'log in super admin' | ||
context 'when the URLs contain non-ASCII characters' do | ||
let(:upload_filename) { 'non_ascii_urls.txt' } | ||
|
||
let(:url_file) { file_fixture('word/bogus_url_file.docx') } | ||
it 'saves the encoded URLs' do | ||
do_bulk_upload | ||
|
||
it 'sends us back to the bulk upload page' do | ||
bulk_upload | ||
expect(page).to have_text('Bulk Search.gov URL Upload') | ||
expect(SearchgovUrl.pluck(:url)).to include( | ||
'https://agency.gov/foo%20%C2%A7%20208.pdf?open', | ||
'https://agency.gov/Asesor%E2%88%9A%E2%89%A0a.pdf' | ||
) | ||
end | ||
end | ||
|
||
it 'shows an error message' do | ||
bulk_upload | ||
expect(page).to have_text( | ||
<<~ERROR_MESSAGE | ||
Files of type application/vnd.openxmlformats-officedocument.wordprocessingml.document are not supported | ||
ERROR_MESSAGE | ||
) | ||
end | ||
end | ||
describe 'trying to bulk upload a file of URLs when there is no file attached' do | ||
let(:upload_file) { nil } | ||
|
||
describe 'trying to bulk upload a file of URLs that is too big' do | ||
include_context 'log in super admin' | ||
it_behaves_like 'a failed bulk upload with error', 'Please choose a file to upload' | ||
end | ||
|
||
let(:url_filename) { 'too_big_url_file.txt' } | ||
describe 'trying to bulk upload a file of URLs that is not a text file' do | ||
let(:upload_filename) { 'bogus_url_file.docx' } | ||
let(:upload_file) { file_fixture('word/bogus_url_file.docx') } | ||
|
||
it 'sends us back to the bulk upload page' do | ||
bulk_upload | ||
expect(page).to have_text('Bulk Search.gov URL Upload') | ||
it_behaves_like 'a failed bulk upload with error', | ||
'Files of type application/vnd.openxmlformats-officedocument.wordprocessingml.document are not supported' | ||
end | ||
|
||
it 'shows an error message' do | ||
bulk_upload | ||
expect(page).to have_text( | ||
<<~ERROR_MESSAGE | ||
#{url_filename} is too big; please split it. | ||
ERROR_MESSAGE | ||
) | ||
describe 'trying to bulk upload a file of URLs that is too big' do | ||
let(:upload_filename) { 'too_big_url_file.txt' } | ||
|
||
it_behaves_like 'a failed bulk upload with error', 'too_big_url_file.txt is too big; please split it' | ||
end | ||
end | ||
end |