Permalink
Browse files

[Fix #49842141] Update URLs for CPSC recalls

- try to determine CPSC URL
- also update NHTSA URL
  • Loading branch information...
1 parent c7dcba9 commit 74ff28198b2e09418d9d0826fffdcb99bd7fd22f David Pranata committed May 15, 2013
@@ -1,10 +1,11 @@
class Recall < ActiveRecord::Base
- attr_accessible :organization, :recall_number, :recalled_on, :y2k
+ attr_accessible :organization, :recall_number, :recalled_on, :y2k, :url
has_many :recall_details, dependent: :destroy
has_many :auto_recalls, dependent: :destroy
has_one :food_recall, dependent: :destroy
validates_presence_of :organization, :recall_number
+ validates_presence_of :url, if: :cpsc?
FDA = 'FDA'.freeze
USDA = 'USDA'.freeze
@@ -238,9 +239,9 @@ def recall_url
when food_or_drug?
food_recall.url
when cpsc?
- "http://www.cpsc.gov/cpscpub/prerel/prhtml#{self.recall_number.to_s[0..1]}/#{self.recall_number}.html" unless self.recall_number.blank?
+ url
when nhtsa?
- "http://www-odi.nhtsa.dot.gov/recalls/recallresults.cfm?start=1&SearchType=QuickSearch&rcl_ID=#{self.recall_number}&summary=true&PrintVersion=YES"
+ "http://www-odi.nhtsa.dot.gov/owners/SearchResults?searchType=ID&targetCategory=R&searchCriteria.nhtsa_ids=#{recall_number}"
end
end
@@ -271,7 +272,7 @@ def description
def recall_details_hash
@recall_details_hash ||= begin
recall_details_hash = {}
- recall_details.order(:id).each do |rd|
+ recall_details.each do |rd|
key = rd.detail_type.underscore.to_sym
if recall_details_hash[key]
recall_details_hash[key] << rd.detail_value
@@ -0,0 +1,5 @@
+class AddUrlToRecalls < ActiveRecord::Migration
+ def change
+ add_column :recalls, :url, :string, null: true
+ end
+end
View
@@ -11,7 +11,7 @@
#
# It's strongly recommended to check this file into your version control system.
-ActiveRecord::Schema.define(:version => 20121124052409) do
+ActiveRecord::Schema.define(:version => 20130515210134) do
create_table "auto_recalls", :force => true do |t|
t.integer "recall_id"
@@ -58,6 +58,7 @@
t.string "organization", :limit => 10
t.datetime "created_at", :null => false
t.datetime "updated_at", :null => false
+ t.string "url"
end
add_index "recalls", ["recall_number"], :name => "index_recalls_on_recall_number"
@@ -1,4 +1,6 @@
-class CdcData
+module CdcData
+ extend Importer
+
def self.import_from_rss_feed(url, food_type, authoritative_source = false)
require 'rss/2.0'
begin
@@ -31,14 +33,6 @@ def self.fetch_source_url(url)
source_url
end
- def self.get_url_from_redirect(uri)
- res = Net::HTTP.get_response(uri)
- if res.code == '302'
- doc = Nokogiri::HTML(res.body)
- doc.css('a').first.attr(:href) if doc.css('a').present?
- end
- end
-
def self.extract_organization(url)
URI(url).host.upcase.scan(/([[:alnum:]]+)\.gov$/i).flatten.first
end
@@ -1,6 +1,8 @@
require 'rexml/document'
-class CpscData
+module CpscData
+ extend Importer
+
def self.import_from_xml_feed(url)
begin
REXML::Document.new(Net::HTTP.get(URI(url))).elements.each('message/results/result') do |element|
@@ -10,6 +12,8 @@ def self.import_from_xml_feed(url)
recall = Recall.where(organization: 'CPSC', recall_number: recall_number).first_or_initialize
recall.y2k = element.attributes['y2k']
recall.recalled_on = Date.parse(element.attributes['recDate']) rescue nil
+ recall_url = element.attributes['recallURL'].strip
+ recall.url = get_cpsc_url(recall_number, URI(recall_url)) || recall_url
attributes = {
manufacturer: element.attributes['manufacturer'],
@@ -39,4 +43,15 @@ def self.import_from_xml_feed(url)
Rails.logger.error(e.message)
end
end
+
+ def self.get_cpsc_url(recall_number, recall_url)
+ cpsc_url = get_url_from_redirect(URI(recall_url))
+ unless cpsc_url
+ legacy_url = "http://www.cpsc.gov/cpscpub/prerel/prhtml#{recall_number[0..1]}/#{recall_number}.html"
+ params = {query: legacy_url, OldURL: true, autodisplay: true }
+ search_url = "http://cs.cpsc.gov/ConceptDemo/SearchCPSC.aspx?#{params.to_param}"
+ cpsc_url = get_url_from_redirect(URI(search_url))
+ end
+ cpsc_url
+ end
end
@@ -0,0 +1,7 @@
+module Importer
+ def get_url_from_redirect(uri)
+ res = Net::HTTP.get_response(uri)
+ location = %w(301 302).include?(res.code) ? (res.get_fields('Location') || []) : []
+ location.first
+ end
+end
@@ -1,4 +1,4 @@
-class NhtsaData
+module NhtsaData
def self.import_from_tab_delimited_feed(url)
begin
file = Tempfile.new("nhtsa")
@@ -45,6 +45,7 @@
Recall.create!(organization: 'CPSC',
recall_number: '12345',
y2k: 12345,
+ url: 'http://www.cpsc.gov/en/Recalls/2010/Violation-of-Federal-Mattress-Flammability-Standard-Prompts-Recall-of-Ottoman-Bed-Mattresses-by-PBteen/',
recalled_on: Date.parse('2010-03-01')) do |r|
r.recall_details.build(detail_type: 'Manufacturer', detail_value: 'Acme Corp')
r.recall_details.build(detail_type: 'ProductType', detail_value: 'Dangerous Stuff')
@@ -57,6 +58,7 @@
Recall.create!(organization: 'CPSC',
recall_number: '10187',
y2k: 110187,
+ url: 'http://www.cpsc.gov/en/Recalls/2010/Crate-and-Barrel-Recalls-Glass-Water-Bottles-Due-to-Laceration-Hazard/',
recalled_on: Date.parse('2010-04-01')) do |r|
r.recall_details.build(detail_type: 'Manufacturer', detail_value: 'Crate & Barrel')
r.recall_details.build(detail_type: 'ProductType', detail_value: 'Bottles (Sports/Water/Thermos)')
@@ -157,7 +159,7 @@
item.should == { organization: 'CPSC',
recall_number: '10187',
recall_date: '2010-04-01',
- recall_url: 'http://www.cpsc.gov/cpscpub/prerel/prhtml10/10187.html',
+ recall_url: 'http://www.cpsc.gov/en/Recalls/2010/Crate-and-Barrel-Recalls-Glass-Water-Bottles-Due-to-Laceration-Hazard/',
manufacturers: ['Crate & Barrel'],
product_types: ['Bottles (Sports/Water/Thermos)'],
descriptions: ['Glass Water Bottles'],
@@ -169,7 +171,7 @@
item.should == { organization: 'CPSC',
recall_number: '12345',
recall_date: '2010-03-01',
- recall_url: 'http://www.cpsc.gov/cpscpub/prerel/prhtml12/12345.html',
+ recall_url: 'http://www.cpsc.gov/en/Recalls/2010/Violation-of-Federal-Mattress-Flammability-Standard-Prompts-Recall-of-Ottoman-Bed-Mattresses-by-PBteen/',
manufacturers: ['Acme Corp'],
product_types: ['Dangerous Stuff'],
descriptions: ['Baby Stroller can be dangerous to children'],
@@ -182,7 +184,7 @@
organization: Recall::NHTSA,
recall_number: '123456',
recall_date: '2010-01-01',
- recall_url: 'http://www-odi.nhtsa.dot.gov/recalls/recallresults.cfm?start=1&SearchType=QuickSearch&rcl_ID=123456&summary=true&PrintVersion=YES',
+ recall_url: 'http://www-odi.nhtsa.dot.gov/owners/SearchResults?searchType=ID&targetCategory=R&searchCriteria.nhtsa_ids=123456',
records: [{ component_description: 'comp desc1',
make: 'automaker1',
manufacturer: 'manufacturer1',
@@ -267,23 +269,23 @@ def parse_items(feed)
item = items[3]
item[:title].should == 'Glass Water Bottles'
item[:description].should == 'Bottles (Sports/Water/Thermos)'
- item[:link].should == 'http://www.cpsc.gov/cpscpub/prerel/prhtml10/10187.html'
+ item[:link].should == 'http://www.cpsc.gov/en/Recalls/2010/Crate-and-Barrel-Recalls-Glass-Water-Bottles-Due-to-Laceration-Hazard/'
item[:pub_date].should == 'Thu, 01 Apr 2010 00:00:00 +0000'
- item[:guid].should == 'http://www.cpsc.gov/cpscpub/prerel/prhtml10/10187.html'
+ item[:guid].should == 'http://www.cpsc.gov/en/Recalls/2010/Crate-and-Barrel-Recalls-Glass-Water-Bottles-Due-to-Laceration-Hazard/'
item = items[4]
item[:title].should == 'Baby Stroller can be dangerous to children'
item[:description].should == 'Dangerous Stuff'
- item[:link].should == 'http://www.cpsc.gov/cpscpub/prerel/prhtml12/12345.html'
+ item[:link].should == 'http://www.cpsc.gov/en/Recalls/2010/Violation-of-Federal-Mattress-Flammability-Standard-Prompts-Recall-of-Ottoman-Bed-Mattresses-by-PBteen/'
item[:pub_date].should == 'Mon, 01 Mar 2010 00:00:00 +0000'
- item[:guid].should == 'http://www.cpsc.gov/cpscpub/prerel/prhtml12/12345.html'
+ item[:guid].should == 'http://www.cpsc.gov/en/Recalls/2010/Violation-of-Federal-Mattress-Flammability-Standard-Prompts-Recall-of-Ottoman-Bed-Mattresses-by-PBteen/'
item = items[5]
item[:title].should == 'FUEL SYSTEM, GASOLINE:DELIVERY:FUEL PUMP compound FROM MONACO COACH CORPORATION'
item[:description].should == 'Recalls for: automaker1 / model1, automaker2 / model2'
- item[:link].should == 'http://www-odi.nhtsa.dot.gov/recalls/recallresults.cfm?start=1&SearchType=QuickSearch&rcl_ID=123456&summary=true&PrintVersion=YES'
+ item[:link].should == 'http://www-odi.nhtsa.dot.gov/owners/SearchResults?searchType=ID&targetCategory=R&searchCriteria.nhtsa_ids=123456'
item[:pub_date].should == 'Fri, 01 Jan 2010 00:00:00 +0000'
- item[:guid].should == 'http://www-odi.nhtsa.dot.gov/recalls/recallresults.cfm?start=1&SearchType=QuickSearch&rcl_ID=123456&summary=true&PrintVersion=YES'
+ item[:guid].should == 'http://www-odi.nhtsa.dot.gov/owners/SearchResults?searchType=ID&targetCategory=R&searchCriteria.nhtsa_ids=123456'
end
end
end
@@ -375,7 +377,7 @@ def parse_items(feed)
item.should == { organization: 'CPSC',
recall_number: '12345',
recall_date: '2010-03-01',
- recall_url: 'http://www.cpsc.gov/cpscpub/prerel/prhtml12/12345.html',
+ recall_url: 'http://www.cpsc.gov/en/Recalls/2010/Violation-of-Federal-Mattress-Flammability-Standard-Prompts-Recall-of-Ottoman-Bed-Mattresses-by-PBteen/',
manufacturers: ['Acme Corp'],
product_types: ['Dangerous Stuff'],
descriptions: ['Baby Stroller can be dangerous to children'],
@@ -396,7 +398,7 @@ def parse_items(feed)
item.should == { organization: 'CPSC',
recall_number: '12345',
recall_date: '2010-03-01',
- recall_url: 'http://www.cpsc.gov/cpscpub/prerel/prhtml12/12345.html',
+ recall_url: 'http://www.cpsc.gov/en/Recalls/2010/Violation-of-Federal-Mattress-Flammability-Standard-Prompts-Recall-of-Ottoman-Bed-Mattresses-by-PBteen/',
manufacturers: ['Acme Corp'],
product_types: ['Dangerous Stuff'],
descriptions: ["Baby \uE000Stroller\uE001 can be dangerous to children"],
@@ -426,7 +428,7 @@ def parse_items(feed)
organization: Recall::NHTSA,
recall_number: '123456',
recall_date: '2010-01-01',
- recall_url: 'http://www-odi.nhtsa.dot.gov/recalls/recallresults.cfm?start=1&SearchType=QuickSearch&rcl_ID=123456&summary=true&PrintVersion=YES',
+ recall_url: 'http://www-odi.nhtsa.dot.gov/owners/SearchResults?searchType=ID&targetCategory=R&searchCriteria.nhtsa_ids=123456',
records: [{ component_description: 'comp desc1',
make: 'automaker1',
manufacturer: 'manufacturer1',
@@ -473,7 +475,7 @@ def parse_items(feed)
organization: Recall::NHTSA,
recall_number: '123456',
recall_date: '2010-01-01',
- recall_url: 'http://www-odi.nhtsa.dot.gov/recalls/recallresults.cfm?start=1&SearchType=QuickSearch&rcl_ID=123456&summary=true&PrintVersion=YES',
+ recall_url: 'http://www-odi.nhtsa.dot.gov/owners/SearchResults?searchType=ID&targetCategory=R&searchCriteria.nhtsa_ids=123456',
records: [{ component_description: 'comp desc1',
make: 'automaker1',
manufacturer: 'manufacturer1',
@@ -1,8 +1,8 @@
<?xml version="1.0" encoding="utf-8"?>
<message outcome="success" transactionID="9BB7C3D1-108E-4C57-A08B-0A020889FAED">
<results>
- <result UPC="987654321" recallNo="10187" recallURL="http://www.cpsc.gov/cpscpub/prerel/prhtml10/10187.html" recDate="2010-04-01" y2k="110187" manufacturer="Crate &amp; Barrel" type="Bottles (Sports/Water/Thermos)" prname="Glass Water Bottles" hazard="Laceration" country_mfg="China" />
- <result UPC="876543219" recallNo="10187" recallURL="http://www.cpsc.gov/cpscpub/prerel/prhtml10/10187.html" recDate="2010-04-01" y2k="110187" manufacturer="Crate &amp; Barrel" type="Bottles (Sports/Water/Thermos)" prname="Glass Water Bottles" hazard="Laceration" country_mfg="China" />
- <result UPC="" recallNo="10727" recallURL="http://www.cpsc.gov/cpscpub/prerel/prhtml10/10727.html" recDate="2010-04-01" y2k="110187" manufacturer="PBteen" type="Beds/Cots" prname="PBteen Ottoman Beds" hazard="Fire &amp; Fire-Related Burn" country_mfg="Taiwan" />
+ <result UPC="987654321" recallNo="10187" recallURL="http://cs.cpsc.gov/ConceptDemo/SearchCPSC.aspx?SearchCategory=Recalls%20News%20Releases&amp;category=995,1098,990,991,992,993,994,1031&amp;autodisplay=true&amp;query=10187" recDate="2010-04-01" y2k="110187" manufacturer="Crate &amp; Barrel" type="Bottles (Sports/Water/Thermos)" prname="Glass Water Bottles" hazard="Laceration" country_mfg="China" />
+ <result UPC="876543219" recallNo="10187" recallURL="http://cs.cpsc.gov/ConceptDemo/SearchCPSC.aspx?SearchCategory=Recalls%20News%20Releases&amp;category=995,1098,990,991,992,993,994,1031&amp;autodisplay=true&amp;query=10187" recDate="2010-04-01" y2k="110187" manufacturer="Crate &amp; Barrel" type="Bottles (Sports/Water/Thermos)" prname="Glass Water Bottles" hazard="Laceration" country_mfg="China" />
+ <result UPC="" recallNo="10727" recallURL="http://cs.cpsc.gov/ConceptDemo/SearchCPSC.aspx?SearchCategory=Recalls%20News%20Releases&amp;category=995,1098,990,991,992,993,994,1031&amp;autodisplay=true&amp;query=10727" recDate="2010-04-01" y2k="110187" manufacturer="PBteen" type="Beds/Cots" prname="PBteen Ottoman Beds" hazard="Fire &amp; Fire-Related Burn" country_mfg="Taiwan" />
</results>
</message>
@@ -3,6 +3,7 @@
describe CdcData do
disconnect_sunspot
+
describe '.import_from_rss_feed' do
before { Recall.destroy_all }
@@ -17,11 +18,10 @@
with(URI('http://www2c.cdc.gov/podcasts/createrss.asp?c=146')).
and_return(feed_content)
- response = mock(Net::HTTPFound, code: '302', body: redirect_content)
- Net::HTTP.should_receive(:get_response).
+ CdcData.should_receive(:get_url_from_redirect).
at_least(:once).
with(URI('http://www2c.cdc.gov/podcasts/download.asp?af=h&f=8625997')).
- and_return(response)
+ and_return('http://www.fsis.usda.gov/fsis_recalls/RNR_067_2012/index.asp')
end
it 'should persist food recalls' do
@@ -111,14 +111,5 @@
end
end
- describe '.get_url_from_redirect' do
- context 'when the response body is blank' do
- it 'should return nil' do
- uri = URI('http://www2c.cdc.gov/podcasts/download.asp?af=h&f=8625997')
- response = mock(Net::HTTPFound, code: '302', body: '')
- Net::HTTP.should_receive(:get_response).with(uri).and_return(response)
- CdcData.get_url_from_redirect(uri).should be_nil
- end
- end
- end
+
end
@@ -2,6 +2,7 @@
describe CpscData do
disconnect_sunspot
+
describe '.import_from_xml_feed' do
let(:url) { 'http://www.cpsc.gov/cgibin/CPSCUpcWS/CPSCUpcSvc.asmx/getRecallByDate?endDate=2012-04-01&password=&startDate=2010-04-01&userId='.freeze }
@@ -14,6 +15,13 @@
at_least(:once).
with(URI(url)).
and_return(content)
+ CpscData.should_receive(:get_cpsc_url).
+ at_least(:once).
+ with('10187', URI('http://cs.cpsc.gov/ConceptDemo/SearchCPSC.aspx?SearchCategory=Recalls%20News%20Releases&category=995,1098,990,991,992,993,994,1031&autodisplay=true&query=10187')).
+ and_return('http://www.cpsc.gov/en/Recalls/2010/Crate-and-Barrel-Recalls-Glass-Water-Bottles-Due-to-Laceration-Hazard/')
+ CpscData.should_receive(:get_cpsc_url).
+ with('10727', URI('http://cs.cpsc.gov/ConceptDemo/SearchCPSC.aspx?SearchCategory=Recalls%20News%20Releases&category=995,1098,990,991,992,993,994,1031&autodisplay=true&query=10727')).
+ and_return(nil)
end
it 'should persist CPSC data' do
@@ -23,6 +31,7 @@
first_recall = Recall.find_by_recall_number('10187')
first_recall.y2k.should == 110187
first_recall.recalled_on.to_s(:db).should == '2010-04-01'
+ first_recall.url.should == 'http://www.cpsc.gov/en/Recalls/2010/Crate-and-Barrel-Recalls-Glass-Water-Bottles-Due-to-Laceration-Hazard/'
first_recall.recall_details.count.should == 7
recall_details = {}
@@ -44,6 +53,7 @@
recall = Recall.find_by_recall_number('10727')
recall.y2k.should == 110187
recall.recalled_on.to_s(:db).should == '2010-04-01'
+ recall.url.should == 'http://cs.cpsc.gov/ConceptDemo/SearchCPSC.aspx?SearchCategory=Recalls%20News%20Releases&category=995,1098,990,991,992,993,994,1031&autodisplay=true&query=10727'
recall_details = {}
recall.recall_details.each do |rd|
@@ -70,4 +80,20 @@
end
end
end
+
+ describe '.get_cpsc_url' do
+ context 'when recall_url does not redirect' do
+ it 'should try get_url_from_redirect using the old URL format' do
+ CpscData.should_receive(:get_url_from_redirect).
+ with(URI('http://cs.cpsc.gov/ConceptDemo/SearchCPSC.aspx?SearchCategory=Recalls%20News%20Releases&category=995,1098,990,991,992,993,994,1031&autodisplay=true&query=10187')).
+ and_return(nil)
+ CpscData.should_receive(:get_url_from_redirect).
+ with(URI('http://cs.cpsc.gov/ConceptDemo/SearchCPSC.aspx?OldURL=true&autodisplay=true&query=http%3A%2F%2Fwww.cpsc.gov%2Fcpscpub%2Fprerel%2Fprhtml10%2F10187.html')).
+ and_return('http://www.cpsc.gov/en/Recalls/2010/Crate-and-Barrel-Recalls-Glass-Water-Bottles-Due-to-Laceration-Hazard/')
+
+ CpscData.get_cpsc_url('10187', 'http://cs.cpsc.gov/ConceptDemo/SearchCPSC.aspx?SearchCategory=Recalls%20News%20Releases&category=995,1098,990,991,992,993,994,1031&autodisplay=true&query=10187').
+ should == 'http://www.cpsc.gov/en/Recalls/2010/Crate-and-Barrel-Recalls-Glass-Water-Bottles-Due-to-Laceration-Hazard/'
+ end
+ end
+ end
end
Oops, something went wrong.

0 comments on commit 74ff281

Please sign in to comment.