diff --git a/app/models/concerns/episode_ready.rb.bk b/app/models/concerns/episode_ready.rb.bk new file mode 100644 index 000000000..5a19d328c --- /dev/null +++ b/app/models/concerns/episode_ready.rb.bk @@ -0,0 +1,50 @@ +require "active_support/concern" + +module EpisodeReady + extend ActiveSupport::Concern + + included do + scope :with_category, ->(cats) do + + + end + + scope :for_feed, ->(feed) do + return all unless feed.present? + + # TODO: episode has_and_belongs_to_many feeds + + where(podcast_id: feed.podcast_id) + + # published date + published_by(feed.episode_offset_seconds.to_i) + + # include/exclude tags + + tags = match_tags.map { |cat| normalize_category(cat) } + cats = (ep || []).categories.map { |cat| normalize_category(cat) } + (tags & cats).length > 0 + + # normalize + cat.to_s.downcase.gsub(/[^ a-z0-9_-]/, "").gsub(/\s+/, " ").strip + + # limit + order(published_at: :desc).limit(feed.display_episodes_count.to_i) + + end + + # episodes (in a feed) with their media versions already cut + scope :feed_ready, ->(feed = nil) do + scope = feed ? + + end + + scope :feed_unready, ->(feed = nil) do + + end + end + + def is_feed?(feed) + end + +end diff --git a/app/models/concerns/text_sanitizer.rb b/app/models/concerns/text_sanitizer.rb index 8d6adfbe0..edaccec11 100644 --- a/app/models/concerns/text_sanitizer.rb +++ b/app/models/concerns/text_sanitizer.rb @@ -22,4 +22,16 @@ def sanitize_text_only(text) return nil if text.blank? Loofah.fragment(text).scrub!(:prune).text(encode_special_chars: false) end + + def sanitize_keywords(kws, strict) + Array(kws).map { |kw| sanitize_keyword(kw, kw.length, strict) }.uniq.reject(&:blank?) + end + + def sanitize_keyword(kw, max_length, strict) + if strict + kw.to_s.downcase.gsub(/[^ a-z0-9_-]/, "").gsub(/\s+/, " ").strip.slice(0, max_length) + else + kw.strip.slice(0, max_length) + end + end end diff --git a/app/models/imports/episode_rss_import.rb b/app/models/imports/episode_rss_import.rb index 8aeb3bb04..f97ab2846 100644 --- a/app/models/imports/episode_rss_import.rb +++ b/app/models/imports/episode_rss_import.rb @@ -81,7 +81,6 @@ def update_episode_with_entry! episode.published_at = entry[:published] episode.season_number = entry[:itunes_season] episode.subtitle = clean_string(episode_short_desc(entry)) - episode.categories = Array(entry[:categories]).map(&:strip).reject(&:blank?) episode.title = clean_title(entry[:title]) if entry[:itunes_summary] && entry_description_attribute(entry) != :itunes_summary @@ -93,11 +92,15 @@ def update_episode_with_entry! episode.original_guid = clean_string(entry[:entry_id]) episode.is_closed_captioned = closed_captioned?(entry) episode.is_perma_link = entry[:is_perma_link] - episode.keywords = (entry[:itunes_keywords] || "").split(",").map(&:strip) episode.position = entry[:itunes_order] episode.url = episode_url(entry) episode.itunes_type = entry[:itunes_episode_type] unless entry[:itunes_episode_type].blank? + # categories setter does the work of sanitizing these + cats = Array(entry[:categories]) + keys = (entry[:itunes_keywords] || "").split(",") + episode.categories = cats + keys + episode end diff --git a/app/models/imports/podcast_rss_import.rb b/app/models/imports/podcast_rss_import.rb index 1eb92eb02..03e3137c6 100644 --- a/app/models/imports/podcast_rss_import.rb +++ b/app/models/imports/podcast_rss_import.rb @@ -177,10 +177,8 @@ def build_podcast_attributes podcast_attributes[:owner_name] = owner[:name] podcast_attributes[:owner_email] = owner[:email] - podcast_attributes[:categories] = parse_categories(feed) podcast_attributes[:complete] = (clean_string(feed.itunes_complete) == "yes") podcast_attributes[:copyright] ||= clean_string(feed.media_copyright) - podcast_attributes[:keywords] = parse_keywords(feed) podcast_attributes[:serial_order] = feed.itunes_type && !!feed.itunes_type.match(/serial/i) podcast_attributes[:locked] = true # won't publish feed until this is set to false @@ -188,6 +186,12 @@ def build_podcast_attributes podcast_attributes[:subtitle] = clean_string(podcast_short_desc(feed)) podcast_attributes[:description] = feed_description(feed) + # categories setter does the work of sanitizing these + cats = Array(feed.categories) + ikeys = (feed.itunes_keywords || "").split(",") + mkeys = (feed.media_keywords || "").split(",") + podcast_attributes[:categories] = cats + ikeys + mkeys + podcast_attributes end @@ -248,18 +252,6 @@ def parse_itunes_categories(feed) [itunes_cats.keys.map { |n| ITunesCategory.new(name: n, subcategories: itunes_cats[n]) }.first].compact end - def parse_categories(feed) - mcat = Array(feed.media_categories).map(&:strip) - rcat = Array(feed.categories).map(&:strip) - (mcat + rcat).compact.uniq - end - - def parse_keywords(feed) - ikey = Array(feed.itunes_keywords).map(&:strip) - mkey = Array(feed.media_keywords).map(&:strip) - (ikey + mkey).compact.uniq - end - def podcast_short_desc(item) [item.itunes_subtitle, item.description, item.title].find do |field| !field.blank? && field.split.length < 50 diff --git a/test/fixtures/transistor_two.xml b/test/fixtures/transistor_two.xml index 461d1d4b5..ebd42623f 100644 --- a/test/fixtures/transistor_two.xml +++ b/test/fixtures/transistor_two.xml @@ -45,6 +45,11 @@ + + + + keyword1, keyword two + media one, keyword two @@ -69,6 +74,7 @@ + keyword1, architecture For the next few episodes, we’re featuring the Smithsonian’s new series, Sidedoor, about where science, art, history, and humanity unexpectedly overlap — just like in their museums. In this episode: an astronomer has turned the night sky into a symphony; an architecture firm has radically re-thought police stations; and an audiophile builds a successful record … <a href="https://transistor.prx.org/2017/01/sidedoor-from-the-smithsonian-shake-it-up/" class="more-link">Continue reading <span class="screen-reader-text">Sidedoor from the Smithsonian: Shake it Up</span></a> For the next few episodes, we’re featuring the Smithsonian’s new series, Sidedoor, about where science, art, history, and humanity unexpectedly overlap — just like in their museums.

diff --git a/test/models/imports/episode_rss_import_test.rb b/test/models/imports/episode_rss_import_test.rb index e1731660c..d15e6213f 100644 --- a/test/models/imports/episode_rss_import_test.rb +++ b/test/models/imports/episode_rss_import_test.rb @@ -43,16 +43,13 @@ f = episode_import.episode _(f.description).must_match(/For the next few episodes/) _(f.description).wont_match(/feedburner/) - _(f.categories).must_include "Indie Features" - f.categories.each do |tag| - _(tag).wont_match(/\n/) - _(tag).wont_be :blank? - end - _(f.categories).wont_include '\t' _(f.clean_title).must_equal "Sidedoor iTunes title" _(f.season_number).must_equal 2 _(f.episode_number).must_equal 4 + # categories and itunes:keywords are combined + _(f.categories).must_equal ["Indie Features", "science", "architecture", "keyword1"] + # It has the podcast set and the published_at date _(f.podcast_id).must_equal podcast.id _(f.published_at).must_equal Time.zone.parse("2017-01-20 03:04:12") diff --git a/test/models/imports/podcast_rss_import_test.rb b/test/models/imports/podcast_rss_import_test.rb index 9905fed08..8363d1388 100644 --- a/test/models/imports/podcast_rss_import_test.rb +++ b/test/models/imports/podcast_rss_import_test.rb @@ -66,6 +66,9 @@ _(importer.podcast.managing_editor_name).must_equal "PRX" _(importer.podcast.managing_editor_email).must_equal "prxwpadmin@prx.org" + # categories, itunes:keywords and media:keywords are combined + _(importer.podcast.categories).must_equal ["Some Category", "keyword1", "keyword two", "media one"] + _(sns.messages.count).must_equal 2 _(sns.messages.map { |m| m["Job"]["Tasks"].length }).must_equal [2, 2] _(sns.messages.map { |m| m["Job"]["Tasks"].map { |t| t["Type"] } }).must_equal [["Inspect", "Copy"], ["Inspect", "Copy"]]