# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [2]:
import feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [3]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [6]:
data = feedparser.parse(url)
data

{'feed': {'title': 'Radar',
  'title_detail': {'type': 'text/plain',
   'language': None,
   'base': 'http://feeds.feedburner.com/oreilly/radar/atom',
   'value': 'Radar'},
  'links': [{'rel': 'alternate',
    'type': 'text/html',
    'href': 'https://www.oreilly.com/radar'},
   {'rel': 'self',
    'type': 'application/rss+xml',
    'href': 'http://feeds.feedburner.com/oreilly/radar/atom'},
   {'rel': 'hub',
    'href': 'http://pubsubhubbub.appspot.com/',
    'type': 'text/html'}],
  'link': 'https://www.oreilly.com/radar',
  'subtitle': 'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology',
  'subtitle_detail': {'type': 'text/html',
   'language': None,
   'base': 'http://feeds.feedburner.com/oreilly/radar/atom',
   'value': 'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology'},
  'updated': 'Wed, 02 Oct 2019 13:14:37 +0000',
  'updated_parsed': time.struct_time(tm_year=2019, tm_mon=10, tm_m

### 2. Obtain a list of components (keys) that are available for this feed.

In [10]:
data.keys()

dict_keys(['feed', 'entries', 'bozo', 'headers', 'etag', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [11]:
data.feed.keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator', 'feedburner_info', 'geo_lat', 'geo_long', 'feedburner_emailserviceid', 'feedburner_feedburnerhostname'])

### 4. Extract and print the feed title, subtitle, author, and link.

In [18]:
title = data.feed.title
subtitle = data.feed.subtitle 
link = data.feed.link

[title, subtitle, link]

['Radar',
 'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology',
 'https://www.oreilly.com/radar']

### 5. Count the number of entries that are contained in this RSS feed.

In [22]:
len(list(data.keys()))

12

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [62]:
index = list(data.keys())
index


['feed',
 'entries',
 'bozo',
 'headers',
 'etag',
 'updated',
 'updated_parsed',
 'href',
 'status',
 'encoding',
 'version',
 'namespaces']

In [63]:
lists_from_index = [type(data[ind]) for ind in index]
lists_from_index

[feedparser.FeedParserDict,
 list,
 int,
 dict,
 str,
 str,
 time.struct_time,
 str,
 int,
 str,
 str,
 dict]

In [54]:
data['feed'].keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator', 'feedburner_info', 'geo_lat', 'geo_long', 'feedburner_emailserviceid', 'feedburner_feedburnerhostname'])

In [59]:
list(data.headers.keys())

['Content-Type',
 'ETag',
 'Last-Modified',
 'Content-Encoding',
 'Transfer-Encoding',
 'Date',
 'Expires',
 'Cache-Control',
 'X-Content-Type-Options',
 'X-XSS-Protection',
 'Server',
 'Connection']

### 7. Extract a list of entry titles.

In [66]:
data.entries

[{'title': 'Four short links: 2 October 2019',
  'title_detail': {'type': 'text/plain',
   'language': None,
   'base': 'http://feeds.feedburner.com/oreilly/radar/atom',
   'value': 'Four short links: 2 October 2019'},
  'links': [{'rel': 'alternate',
    'type': 'text/html',
    'href': 'http://feedproxy.google.com/~r/oreilly/radar/atom/~3/iA2TwNaZER0/'}],
  'link': 'http://feedproxy.google.com/~r/oreilly/radar/atom/~3/iA2TwNaZER0/',
  'comments': 'https://www.oreilly.com/radar/four-short-links-2-october-2019/#respond',
  'published': 'Wed, 02 Oct 2019 04:01:09 +0000',
  'published_parsed': time.struct_time(tm_year=2019, tm_mon=10, tm_mday=2, tm_hour=4, tm_min=1, tm_sec=9, tm_wday=2, tm_yday=275, tm_isdst=0),
  'authors': [{'email': 'jwebb@oreilly.com'}],
  'author': 'jwebb@oreilly.com',
  'author_detail': {'email': 'jwebb@oreilly.com'},
  'tags': [{'term': 'Four Short Links', 'scheme': None, 'label': None},
   {'term': 'Signals', 'scheme': None, 'label': None}],
  'id': 'https://www.

In [69]:
list_of_entry_titles = [element.title for element in data.entries]
list_of_entry_titles

['Four short links: 2 October 2019',
 'Four short links: 1 October 2019',
 'TinyML: The challenges and opportunities of low-power ML applications',
 'Four short links: 30 September 2019',
 'Highlights from the Strata Data Conference in New York 2019',
 'Four short links: 27 September 2019',
 'Data Science Pioneers: Conquering the next frontier, a documentary investigating the future of data science',
 'Postrevolutionary big data: Promoting the general welfare',
 'Say what? The ethical challenges of designing for humanlike interaction',
 'RL in real life: Bringing reinforcement learning to the enterprise',
 'Strata Data Awards winners 2019',
 'Staying safe in the AI era',
 'Data sonification: Making music from the yield curve',
 'Unlocking the value of your data',
 'Four Short Links: 26 September 2019',
 'The future of Google Cloud data processing',
 'Interactive sports analytics',
 'AI isn’t magic. It’s computer science.']

### 8. Calculate the percentage of "Four short links" entry titles.

In [73]:
selection = [element for element in list_of_entry_titles if "Four short links" in str(element)]

f'{len(selection)/len(list_of_entry_titles)*100}%'

'22.22222222222222%'

### 9. Create a Pandas data frame from the feed's entries.

In [74]:
import pandas as pd

In [120]:
df = pd.DataFrame(data.entries)
df.head(10)

Unnamed: 0,author,author_detail,authors,comments,content,feedburner_origlink,guidislink,id,link,links,published,published_parsed,slash_comments,summary,summary_detail,tags,title,title_detail,wfw_commentrss
0,jwebb@oreilly.com,{'email': 'jwebb@oreilly.com'},[{'email': 'jwebb@oreilly.com'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=9832,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Wed, 02 Oct 2019 04:01:09 +0000","(2019, 10, 2, 4, 1, 9, 2, 275, 0)",0,Data Fallacies to Avoid &#8212; nifty infograp...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 2 October 2019,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...
1,jwebb@oreilly.com,{'email': 'jwebb@oreilly.com'},[{'email': 'jwebb@oreilly.com'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=9792,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Tue, 01 Oct 2019 04:05:11 +0000","(2019, 10, 1, 4, 5, 11, 1, 274, 0)",0,Just Enough Research &#8212; a book that comes...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 1 October 2019,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...
2,Mac Slocum,{'name': 'Mac Slocum'},[{'name': 'Mac Slocum'}],https://www.oreilly.com/radar/tinyml-the-chall...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/tinyml-the-chall...,False,https://www.oreilly.com/radar/?p=9378,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Tue, 01 Oct 2019 04:01:53 +0000","(2019, 10, 1, 4, 1, 53, 1, 274, 0)",0,Pete Warden has an ambitious goal: he wants to...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': '~home', 'scheme': None, 'label': No...",TinyML: The challenges and opportunities of lo...,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/tinyml-the-chall...
3,Mac Slocum,{'name': 'Mac Slocum'},[{'name': 'Mac Slocum'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=9768,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Mon, 30 Sep 2019 04:01:40 +0000","(2019, 9, 30, 4, 1, 40, 0, 273, 0)",0,Stamos on CLOUD Act — cogent and informative s...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 30 September 2019,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...
4,Mac Slocum,{'name': 'Mac Slocum'},[{'name': 'Mac Slocum'}],https://www.oreilly.com/radar/highlights-from-...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/highlights-from-...,False,https://www.oreilly.com/radar/?p=9487,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 27 Sep 2019 12:00:52 +0000","(2019, 9, 27, 12, 0, 52, 4, 270, 0)",0,People from across the data world came togethe...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'AI & ML', 'scheme': None, 'label': ...",Highlights from the Strata Data Conference in ...,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/highlights-from-...
5,Mac Slocum,{'name': 'Mac Slocum'},[{'name': 'Mac Slocum'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=9715,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 27 Sep 2019 04:01:35 +0000","(2019, 9, 27, 4, 1, 35, 4, 270, 0)",0,"Intro to Creative Coding — this is the repo, a...","{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 27 September 2019,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...
6,Mac Slocum,{'name': 'Mac Slocum'},[{'name': 'Mac Slocum'}],https://www.oreilly.com/radar/data-science-pio...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/data-science-pio...,False,https://www.oreilly.com/radar/?p=9479,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 27 Sep 2019 00:00:43 +0000","(2019, 9, 27, 0, 0, 43, 4, 270, 0)",0,This is a keynote from the Strata Data Confere...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'AI & ML', 'scheme': None, 'label': ...",Data Science Pioneers: Conquering the next fro...,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/data-science-pio...
7,Mac Slocum,{'name': 'Mac Slocum'},[{'name': 'Mac Slocum'}],https://www.oreilly.com/radar/postrevolutionar...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/postrevolutionar...,False,https://www.oreilly.com/radar/?p=9457,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 27 Sep 2019 00:00:39 +0000","(2019, 9, 27, 0, 0, 39, 4, 270, 0)",0,This is a keynote from the Strata Data Confere...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'AI & ML', 'scheme': None, 'label': ...",Postrevolutionary big data: Promoting the gene...,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/postrevolutionar...
8,Mac Slocum,{'name': 'Mac Slocum'},[{'name': 'Mac Slocum'}],https://www.oreilly.com/radar/say-what-the-eth...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/say-what-the-eth...,False,https://www.oreilly.com/radar/?p=9475,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 27 Sep 2019 00:00:38 +0000","(2019, 9, 27, 0, 0, 38, 4, 270, 0)",0,This is a keynote highlight from the Strata Da...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'AI & ML', 'scheme': None, 'label': ...",Say what? The ethical challenges of designing ...,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/say-what-the-eth...
9,Mac Slocum,{'name': 'Mac Slocum'},[{'name': 'Mac Slocum'}],https://www.oreilly.com/radar/rl-in-real-life-...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/rl-in-real-life-...,False,https://www.oreilly.com/radar/?p=9463,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 27 Sep 2019 00:00:19 +0000","(2019, 9, 27, 0, 0, 19, 4, 270, 0)",0,This is a keynote from the Strata Data Confere...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'AI & ML', 'scheme': None, 'label': ...",RL in real life: Bringing reinforcement learni...,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/rl-in-real-life-...


### 10. Count the number of entries per author and sort them in descending order.

In [121]:
df['author'].value_counts()

Mac Slocum           16
jwebb@oreilly.com     2
Name: author, dtype: int64

In [122]:
df.sort_values(by="author", inplace=True)
df.head(2)

Unnamed: 0,author,author_detail,authors,comments,content,feedburner_origlink,guidislink,id,link,links,published,published_parsed,slash_comments,summary,summary_detail,tags,title,title_detail,wfw_commentrss
8,Mac Slocum,{'name': 'Mac Slocum'},[{'name': 'Mac Slocum'}],https://www.oreilly.com/radar/say-what-the-eth...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/say-what-the-eth...,False,https://www.oreilly.com/radar/?p=9475,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 27 Sep 2019 00:00:38 +0000","(2019, 9, 27, 0, 0, 38, 4, 270, 0)",0,This is a keynote highlight from the Strata Da...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'AI & ML', 'scheme': None, 'label': ...",Say what? The ethical challenges of designing ...,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/say-what-the-eth...
15,Mac Slocum,{'name': 'Mac Slocum'},[{'name': 'Mac Slocum'}],https://www.oreilly.com/radar/the-future-of-go...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/the-future-of-go...,False,https://www.oreilly.com/radar/?p=9402,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Thu, 26 Sep 2019 00:00:45 +0000","(2019, 9, 26, 0, 0, 45, 3, 269, 0)",0,This is a keynote from the Strata Data Confere...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'AI & ML', 'scheme': None, 'label': ...",The future of Google Cloud data processing,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/the-future-of-go...


### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [97]:
df['length'] = df['title'].apply(lambda x: len(x))

In [104]:
df2 = df[['title', 'author', 'length']]
df2

Unnamed: 0,title,author,length
8,Say what? The ethical challenges of designing ...,Mac Slocum,71
16,Interactive sports analytics,Mac Slocum,28
2,TinyML: The challenges and opportunities of lo...,Mac Slocum,69
3,Four short links: 30 September 2019,Mac Slocum,35
4,Highlights from the Strata Data Conference in ...,Mac Slocum,59
5,Four short links: 27 September 2019,Mac Slocum,35
6,Data Science Pioneers: Conquering the next fro...,Mac Slocum,107
7,Postrevolutionary big data: Promoting the gene...,Mac Slocum,57
17,AI isn’t magic. It’s computer science.,Mac Slocum,38
9,RL in real life: Bringing reinforcement learni...,Mac Slocum,66


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [126]:
import numpy as np
df3 = df2['title'].apply(lambda x: x if "ml" in str(x).lower() else np.NaN)
df3.dropna(inplace=True)
df3

2    TinyML: The challenges and opportunities of lo...
Name: title, dtype: object

In [127]:
import numpy as np
df3 = df2['title'].apply(lambda x: x if "machine learning" in str(x).lower() else np.NaN)
df3.dropna(inplace=True)
df3

Series([], Name: title, dtype: float64)

In [129]:
import numpy as np
df3 = df2['title'].apply(lambda x: x if "google cloud" in str(x).lower() else np.NaN)
df3.dropna(inplace=True)
df3

15    The future of Google Cloud data processing
Name: title, dtype: object

In [130]:
import numpy as np
df3 = df2['title'].apply(lambda x: x if "of" in str(x).lower() else np.NaN)
df3.dropna(inplace=True)
df3

8     Say what? The ethical challenges of designing ...
2     TinyML: The challenges and opportunities of lo...
6     Data Science Pioneers: Conquering the next fro...
13                     Unlocking the value of your data
15           The future of Google Cloud data processing
Name: title, dtype: object