# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
%pip install feedparser

Note: you may need to restart the kernel to use updated packages.


In [2]:
import feedparser

In [3]:
import pandas as pd

### 1. Use feedparser to parse the following RSS feed URL.

In [4]:
url = feedparser.parse('http://feeds.feedburner.com/oreilly/radar/atom')

In [5]:
type(url)

feedparser.util.FeedParserDict

### 2. Obtain a list of components (keys) that are available for this feed.

In [6]:
url.keys()

dict_keys(['bozo', 'entries', 'feed', 'headers', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [7]:
lst = url['feed'].keys()
print(lst)

dict_keys(['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator'])


### 4. Extract and print the feed title, subtitle, author, and link.

In [8]:
url['feed']

{'title': 'Radar',
 'title_detail': {'type': 'text/plain',
  'language': None,
  'base': 'http://feeds.feedburner.com/oreilly/radar/atom',
  'value': 'Radar'},
 'links': [{'href': 'https://www.oreilly.com/radar/feed/',
   'rel': 'self',
   'type': 'application/rss+xml'},
  {'rel': 'alternate',
   'type': 'text/html',
   'href': 'https://www.oreilly.com/radar'}],
 'link': 'https://www.oreilly.com/radar',
 'subtitle': 'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology',
 'subtitle_detail': {'type': 'text/html',
  'language': None,
  'base': 'http://feeds.feedburner.com/oreilly/radar/atom',
  'value': 'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology'},
 'updated': 'Tue, 07 Feb 2023 11:18:48 +0000',
 'updated_parsed': time.struct_time(tm_year=2023, tm_mon=2, tm_mday=7, tm_hour=11, tm_min=18, tm_sec=48, tm_wday=1, tm_yday=38, tm_isdst=0),
 'language': 'en-US',
 'sy_updateperiod': 'hourly',
 '

### 5. Count the number of entries that are contained in this RSS feed.

In [9]:
len(url['feed'])

13

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [10]:
url['entries'][1].keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments'])

### 7. Extract a list of entry titles.

In [21]:
[url.entries[i].title for i in range(len(url.entries))]

['Radar Trends to Watch: February 2023',
 'Automating the Automators: Shift Change in the Robot Factory',
 'Digesting 2022',
 'Radar Trends to Watch: January 2023',
 'What Does Copyright Say about Generative Models?',
 'Radar Trends to Watch: December 2022',
 'AI’s ‘SolarWinds Moment’ Will Occur; It’s Just a Matter of When',
 'Technical Health Isn’t Optional',
 'Healthy Data',
 'Formal Informal Languages',
 'Radar Trends to Watch: November 2022',
 'What We Learned Auditing Sophisticated AI for Bias',
 'The Collaborative Metaverse',
 'What Is Hyperautomation?',
 'Radar Trends to Watch: October 2022']

### 8. Calculate the percentage of "Four short links" entry titles.

### 9. Create a Pandas data frame from the feed's entries.

In [12]:
df = pd.DataFrame(url['entries'])
df.head()

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments
0,Radar Trends to Watch: February 2023,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/radar-trends-to-...,https://www.oreilly.com/radar/radar-trends-to-...,"Tue, 07 Feb 2023 11:18:47 +0000","(2023, 2, 7, 11, 18, 47, 1, 38, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Radar Trends', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=14852,False,This month’s news seems to have been derailed ...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...,0
1,Automating the Automators: Shift Change in the...,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/automating-the-a...,https://www.oreilly.com/radar/automating-the-a...,"Tue, 17 Jan 2023 11:33:31 +0000","(2023, 1, 17, 11, 33, 31, 1, 17, 0)",[{'name': 'Q McCallum'}],Q McCallum,{'name': 'Q McCallum'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=14841,False,What would you say is the job of a software de...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/automating-the-a...,0
2,Digesting 2022,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/digesting-2022/,https://www.oreilly.com/radar/digesting-2022/#...,"Tue, 10 Jan 2023 13:37:13 +0000","(2023, 1, 10, 13, 37, 13, 1, 10, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=14837,False,Although I don’t subscribe to the idea that hi...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/digesting-2022/f...,0
3,Radar Trends to Watch: January 2023,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/radar-trends-to-...,https://www.oreilly.com/radar/radar-trends-to-...,"Wed, 04 Jan 2023 11:53:08 +0000","(2023, 1, 4, 11, 53, 8, 2, 4, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Radar Trends', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=14826,False,"Perhaps unsurprisingly, December was a slow mo...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...,0
4,What Does Copyright Say about Generative Models?,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/what-does-copyri...,https://www.oreilly.com/radar/what-does-copyri...,"Tue, 13 Dec 2022 12:22:38 +0000","(2022, 12, 13, 12, 22, 38, 1, 347, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Artificial Intelligence', 'scheme':...",https://www.oreilly.com/radar/?p=14806,False,The current generation of flashy AI applicatio...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/what-does-copyri...,0


### 10. Count the number of entries per author and sort them in descending order.

In [15]:
df.author.value_counts().sort_values(ascending=False)

Mike Loukides    12
Q McCallum        1
Mike Barlow       1
Patrick Hall      1
Name: author, dtype: int64

### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [22]:
df['title_length'] = [len(url.entries[i].title) for i in range(len(url.entries))]

df[['title', 'author', 'title_length']].sort_values(by='title_length', ascending=False)

Unnamed: 0,title,author,title_length
6,AI’s ‘SolarWinds Moment’ Will Occur; It’s Just...,Mike Barlow,63
1,Automating the Automators: Shift Change in the...,Q McCallum,60
11,What We Learned Auditing Sophisticated AI for ...,Patrick Hall,50
4,What Does Copyright Say about Generative Models?,Mike Loukides,48
0,Radar Trends to Watch: February 2023,Mike Loukides,36
5,Radar Trends to Watch: December 2022,Mike Loukides,36
10,Radar Trends to Watch: November 2022,Mike Loukides,36
3,Radar Trends to Watch: January 2023,Mike Loukides,35
14,Radar Trends to Watch: October 2022,Mike Loukides,35
7,Technical Health Isn’t Optional,Mike Loukides,31


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."