# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
pip install feedparser

Note: you may need to restart the kernel to use updated packages.


In [2]:
%pip install xmltodict

Note: you may need to restart the kernel to use updated packages.


In [3]:
import feedparser
import requests as request
import xmltodict

### 1. Use feedparser to parse the following RSS feed URL.

In [4]:
feed=feedparser.parse('http://feeds.feedburner.com/oreilly/radar/atom')

In [5]:
type(feed)

feedparser.util.FeedParserDict

### 2. Obtain a list of components (keys) that are available for this feed.

In [6]:
feed.keys()

dict_keys(['bozo', 'entries', 'feed', 'headers', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])

In [7]:
list(feed.keys())

['bozo',
 'entries',
 'feed',
 'headers',
 'updated',
 'updated_parsed',
 'href',
 'status',
 'encoding',
 'version',
 'namespaces']

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [8]:
feed['feed'].keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator'])

In [9]:
list(feed['feed'].keys())

['title',
 'title_detail',
 'links',
 'link',
 'subtitle',
 'subtitle_detail',
 'updated',
 'updated_parsed',
 'language',
 'sy_updateperiod',
 'sy_updatefrequency',
 'generator_detail',
 'generator']

### 4. Extract and print the feed title, subtitle, author, and link.

In [10]:
feed['feed']['link']

'https://www.oreilly.com/radar'

In [11]:
feed['feed']['subtitle']

'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology'

In [12]:
feed['feed']['title']

'Radar'

### 5. Count the number of entries that are contained in this RSS feed.

In [13]:
len(feed['entries'])

15

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [14]:
feed['entries'][0].keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments'])

In [15]:
list(feed['entries'][0].keys())

['title',
 'title_detail',
 'links',
 'link',
 'comments',
 'published',
 'published_parsed',
 'authors',
 'author',
 'author_detail',
 'tags',
 'id',
 'guidislink',
 'summary',
 'summary_detail',
 'content',
 'wfw_commentrss',
 'slash_comments']

### 7. Extract a list of entry titles.

In [16]:
feed['entries'][0].keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments'])

In [17]:
feed['entries'][0]['title']
lst=[]
for x in range(len(feed['entries'])):
    lst.append(feed['entries'][x]['title'])
     

In [18]:
lst

['Radar Trends to Watch: February 2023',
 'Automating the Automators: Shift Change in the Robot Factory',
 'Digesting 2022',
 'Radar Trends to Watch: January 2023',
 'What Does Copyright Say about Generative Models?',
 'Radar Trends to Watch: December 2022',
 'AI’s ‘SolarWinds Moment’ Will Occur; It’s Just a Matter of When',
 'Technical Health Isn’t Optional',
 'Healthy Data',
 'Formal Informal Languages',
 'Radar Trends to Watch: November 2022',
 'What We Learned Auditing Sophisticated AI for Bias',
 'The Collaborative Metaverse',
 'What Is Hyperautomation?',
 'Radar Trends to Watch: October 2022']

### 9. Create a Pandas data frame from the feed's entries.

In [19]:
import pandas as pd

In [20]:
feed2 = pd.DataFrame(feed['entries'])

In [21]:
feed2.head(5)

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments
0,Radar Trends to Watch: February 2023,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/radar-trends-to-...,https://www.oreilly.com/radar/radar-trends-to-...,"Tue, 07 Feb 2023 11:18:47 +0000","(2023, 2, 7, 11, 18, 47, 1, 38, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Radar Trends', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=14852,False,This month’s news seems to have been derailed ...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...,0
1,Automating the Automators: Shift Change in the...,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/automating-the-a...,https://www.oreilly.com/radar/automating-the-a...,"Tue, 17 Jan 2023 11:33:31 +0000","(2023, 1, 17, 11, 33, 31, 1, 17, 0)",[{'name': 'Q McCallum'}],Q McCallum,{'name': 'Q McCallum'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=14841,False,What would you say is the job of a software de...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/automating-the-a...,0
2,Digesting 2022,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/digesting-2022/,https://www.oreilly.com/radar/digesting-2022/#...,"Tue, 10 Jan 2023 13:37:13 +0000","(2023, 1, 10, 13, 37, 13, 1, 10, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=14837,False,Although I don’t subscribe to the idea that hi...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/digesting-2022/f...,0
3,Radar Trends to Watch: January 2023,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/radar-trends-to-...,https://www.oreilly.com/radar/radar-trends-to-...,"Wed, 04 Jan 2023 11:53:08 +0000","(2023, 1, 4, 11, 53, 8, 2, 4, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Radar Trends', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=14826,False,"Perhaps unsurprisingly, December was a slow mo...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...,0
4,What Does Copyright Say about Generative Models?,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/what-does-copyri...,https://www.oreilly.com/radar/what-does-copyri...,"Tue, 13 Dec 2022 12:22:38 +0000","(2022, 12, 13, 12, 22, 38, 1, 347, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Artificial Intelligence', 'scheme':...",https://www.oreilly.com/radar/?p=14806,False,The current generation of flashy AI applicatio...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/what-does-copyri...,0


### 10. Count the number of entries per author and sort them in descending order.

In [22]:
feed2['author'].value_counts().sort_values(ascending = False)

Mike Loukides    12
Q McCallum        1
Mike Barlow       1
Patrick Hall      1
Name: author, dtype: int64

### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [23]:
feed2['length'] = [len(feed2.title[i]) for i in range(len(feed2.author))]

In [24]:
feed2.head(5)

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments,length
0,Radar Trends to Watch: February 2023,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/radar-trends-to-...,https://www.oreilly.com/radar/radar-trends-to-...,"Tue, 07 Feb 2023 11:18:47 +0000","(2023, 2, 7, 11, 18, 47, 1, 38, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Radar Trends', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=14852,False,This month’s news seems to have been derailed ...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...,0,36
1,Automating the Automators: Shift Change in the...,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/automating-the-a...,https://www.oreilly.com/radar/automating-the-a...,"Tue, 17 Jan 2023 11:33:31 +0000","(2023, 1, 17, 11, 33, 31, 1, 17, 0)",[{'name': 'Q McCallum'}],Q McCallum,{'name': 'Q McCallum'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=14841,False,What would you say is the job of a software de...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/automating-the-a...,0,60
2,Digesting 2022,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/digesting-2022/,https://www.oreilly.com/radar/digesting-2022/#...,"Tue, 10 Jan 2023 13:37:13 +0000","(2023, 1, 10, 13, 37, 13, 1, 10, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=14837,False,Although I don’t subscribe to the idea that hi...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/digesting-2022/f...,0,14
3,Radar Trends to Watch: January 2023,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/radar-trends-to-...,https://www.oreilly.com/radar/radar-trends-to-...,"Wed, 04 Jan 2023 11:53:08 +0000","(2023, 1, 4, 11, 53, 8, 2, 4, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Radar Trends', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=14826,False,"Perhaps unsurprisingly, December was a slow mo...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...,0,35
4,What Does Copyright Say about Generative Models?,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/what-does-copyri...,https://www.oreilly.com/radar/what-does-copyri...,"Tue, 13 Dec 2022 12:22:38 +0000","(2022, 12, 13, 12, 22, 38, 1, 347, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Artificial Intelligence', 'scheme':...",https://www.oreilly.com/radar/?p=14806,False,The current generation of flashy AI applicatio...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/what-does-copyri...,0,48


In [25]:
feed3 = feed2[['author', 'title', 'length']]

In [26]:
feed3.sort_values('length', ascending = False)

Unnamed: 0,author,title,length
6,Mike Barlow,AI’s ‘SolarWinds Moment’ Will Occur; It’s Just...,63
1,Q McCallum,Automating the Automators: Shift Change in the...,60
11,Patrick Hall,What We Learned Auditing Sophisticated AI for ...,50
4,Mike Loukides,What Does Copyright Say about Generative Models?,48
0,Mike Loukides,Radar Trends to Watch: February 2023,36
5,Mike Loukides,Radar Trends to Watch: December 2022,36
10,Mike Loukides,Radar Trends to Watch: November 2022,36
3,Mike Loukides,Radar Trends to Watch: January 2023,35
14,Mike Loukides,Radar Trends to Watch: October 2022,35
7,Mike Loukides,Technical Health Isn’t Optional,31


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [34]:
result = feed2['summary'].str.contains('machine learning')

In [38]:
result

0     False
1     False
2     False
3     False
4     False
5     False
6     False
7     False
8     False
9     False
10    False
11    False
12    False
13    False
14     True
Name: summary, dtype: bool

In [29]:
result = (feed2['summary'].str.count('machine learning')).sum()

In [30]:
result

1

In [31]:
feed2.title[(feed2['summary'].str.count('machine learning')).sum()]

'Automating the Automators: Shift Change in the Robot Factory'

In [32]:
feed2['summary'].str.count('machine learning')

0     0
1     0
2     0
3     0
4     0
5     0
6     0
7     0
8     0
9     0
10    0
11    0
12    0
13    0
14    1
Name: summary, dtype: int64

In [33]:
feed2.title[feed2['summary'].str.find('machine learning') != -1]

14    Radar Trends to Watch: October 2022
Name: title, dtype: object