Skip to content

Commit

Permalink
SRCH-38 index TXT files (#189)
Browse files Browse the repository at this point in the history
  • Loading branch information
MothOnMars committed Nov 28, 2018
1 parent b6eaa75 commit 731dd7a
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 4 deletions.
1 change: 0 additions & 1 deletion app/concerns/fetchable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ module Fetchable
swf
tar
tgz
txt
wav
wmv
wsdl
Expand Down
7 changes: 4 additions & 3 deletions app/models/searchgov_url.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@ class SearchgovUrl < ActiveRecord::Base
include ActionView::Helpers::NumberHelper

MAX_DOC_SIZE = 15.megabytes
SUPPORTED_CONTENT_TYPES = %w(
SUPPORTED_CONTENT_TYPES = %w[
text/html
text/plain
application/msword
application/pdf
application/vnd.ms-excel
application/vnd.openxmlformats-officedocument.wordprocessingml.document
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
)
]

attr_accessible :last_crawl_status, :last_crawled_at, :url, :lastmod
attr_reader :response, :document, :tempfile
Expand Down Expand Up @@ -167,7 +168,7 @@ def url_without_protocol

def parse_document
Rails.logger.info "[SearchgovUrl] Parsing document for #{url}"
if /^application/ === response.content_type.mime_type
if /^application|text\/plain/ === response.content_type.mime_type
ApplicationDocument.new(document: download.open, url: url)
else
HtmlDocument.new(document: response.to_s, url: url)
Expand Down
25 changes: 25 additions & 0 deletions spec/models/searchgov_url_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@

before do
allow(searchgov_url).to receive(:searchgov_domain).and_return(searchgov_domain)
allow(I14yDocument).to receive(:create)
end

context 'when the fetch is successful' do
Expand Down Expand Up @@ -404,6 +405,30 @@
end
end

context 'when the url points to a TXT doc (.txt)' do
let(:url) { 'https://www.irs.gov/test.txt' }

before do
stub_request(:get, url).
to_return(status: 200,
body: 'This is my text content.',
headers: { content_type: 'text/plain' })
end

it 'fetches and indexes the document' do
expect(I14yDocument).to receive(:create).
with(hash_including(
handle: 'searchgov',
path: 'https://www.irs.gov/test.txt',
title: 'test.txt',
description: nil,
content: 'This is my text content.',
language: 'en'
))
fetch
end
end

context 'when the request fails' do
before do
stub_request(:get, url).to_raise(StandardError.new('faaaaail'))
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 731dd7a

Please sign in to comment.