# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
import feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [2]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [9]:
radar_atom = feedparser.parse(url)
# radar_atom

### 2. Obtain a list of components (keys) that are available for this feed.

In [11]:
keys = list(radar_atom.keys())
keys

['feed',
 'entries',
 'bozo',
 'headers',
 'etag',
 'updated',
 'updated_parsed',
 'href',
 'status',
 'encoding',
 'version',
 'namespaces']

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [14]:
keys_feed = list(radar_atom['feed'].keys())
keys_feed

['title',
 'title_detail',
 'links',
 'link',
 'subtitle',
 'subtitle_detail',
 'updated',
 'updated_parsed',
 'language',
 'sy_updateperiod',
 'sy_updatefrequency',
 'generator_detail',
 'generator',
 'feedburner_info',
 'geo_lat',
 'geo_long',
 'feedburner_emailserviceid',
 'feedburner_feedburnerhostname']

### 4. Extract and print the feed title, subtitle, author, and link.

In [24]:
cols = ['title','subtitle','author','link']
values_feed = dict()

for col in cols:
    values_feed[col] = radar_atom['feed'].get(col)
    print(col,':', values_feed[col])
    
# values_feed

title : Radar
subtitle : Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology
author : None
link : https://www.oreilly.com/radar


### 5. Count the number of entries that are contained in this RSS feed.

In [7]:
len(radar_atom['entries'])

60

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [25]:
list(radar_atom['entries'][0].keys())

['title',
 'title_detail',
 'links',
 'link',
 'comments',
 'published',
 'published_parsed',
 'authors',
 'author',
 'author_detail',
 'tags',
 'id',
 'guidislink',
 'summary',
 'summary_detail',
 'content',
 'wfw_commentrss',
 'slash_comments']

### 7. Extract a list of entry titles.

In [28]:
titles = [radar_atom.entries[i].title for i in range(len(radar_atom.entries))]
titles[:10]

['The Cloud in 2021: Adoption Continues',
 'Radar trends to watch: December 2021',
 'Low-Code and the Democratization of Programming',
 'Remote Teams in ML/AI',
 'Radar trends to watch: November 2021',
 'The Sobering Truth About the Impact of Your Business Ideas',
 'MLOps and DevOps: Why Data Makes It Different',
 'The Quality of Auto-Generated Code',
 'Radar trends to watch: October 2021',
 'Ethical Social Media: Oxymoron or Attainable Goal?']

### 8. Calculate the percentage of "Four short links" entry titles.

In [41]:
shorttitles = [radar_atom.entries[i].title for i in range(len(radar_atom.entries)) if 'Four short links' in radar_atom.entries[i].title]
percent = round(( len(shorttitles) / len(titles) ) * 100, 1)
print(f'The percentage of "Four short links" is:{percent}%')

The percentage of "Four short links" is:18.3%


### 9. Create a Pandas data frame from the feed's entries.

In [44]:
import pandas as pd
df=pd.DataFrame(radar_atom['entries'])
df.head()

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments
0,The Cloud in 2021: Adoption Continues,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/the-cloud-in-202...,https://www.oreilly.com/radar/the-cloud-in-202...,"Tue, 07 Dec 2021 12:26:55 +0000","(2021, 12, 7, 12, 26, 55, 1, 341, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Cloud', 'scheme': None, 'label': No...",https://www.oreilly.com/radar/?p=14129,False,"Last year, our report on cloud adoption showed...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/the-cloud-in-202...,0
1,Radar trends to watch: December 2021,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/radar-trends-to-...,https://www.oreilly.com/radar/radar-trends-to-...,"Wed, 01 Dec 2021 13:48:10 +0000","(2021, 12, 1, 13, 48, 10, 2, 335, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Radar Trends', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=14103,False,The last month had a few surprises. Three item...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...,0
2,Low-Code and the Democratization of Programming,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/low-code-and-the...,https://www.oreilly.com/radar/low-code-and-the...,"Tue, 16 Nov 2021 12:36:18 +0000","(2021, 11, 16, 12, 36, 18, 1, 320, 0)",[{}],,,"[{'term': 'Programming', 'scheme': None, 'labe...",https://www.oreilly.com/radar/?p=14083,False,"In the past decade, the growth in low-code and...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/low-code-and-the...,0
3,Remote Teams in ML/AI,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/remote-teams-in-...,https://www.oreilly.com/radar/remote-teams-in-...,"Tue, 09 Nov 2021 14:05:48 +0000","(2021, 11, 9, 14, 5, 48, 1, 313, 0)",[{'name': 'Q McCallum'}],Q McCallum,{'name': 'Q McCallum'},"[{'term': 'Building a data culture', 'scheme':...",https://www.oreilly.com/radar/?p=14075,False,I&#8217;m well-versed in the ups and downs of ...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/remote-teams-in-...,0
4,Radar trends to watch: November 2021,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/radar-trends-to-...,https://www.oreilly.com/radar/radar-trends-to-...,"Tue, 02 Nov 2021 11:40:17 +0000","(2021, 11, 2, 11, 40, 17, 1, 306, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Radar Trends', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=14066,False,While October’s news was dominated by Facebook...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...,0


### 10. Count the number of entries per author and sort them in descending order.

In [51]:
authors = df.groupby('author', as_index=False).agg({'title':'count'}).sort_values('title', ascending=False)
authors.columns = ['author', 'entries']
authors

Unnamed: 0,author,entries
6,Mike Loukides,28
8,Nat Torkington,11
0,,4
1,Chris Butler,3
13,Tim O’Reilly,3
11,Q McCallum,2
2,"Eric Colson, Daragh Sibley and Dave Spiegel",1
3,Hugo Bowne-Anderson,1
4,Kevlin Henney,1
5,Mike Barlow,1


### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [52]:
df['title_length'] = df['title'].apply(len)
df[['author', 'title', 'title_length']].sort_values('title_length', ascending=False)

Unnamed: 0,author,title,title_length
40,Mike Loukides,"Where Programming, Ops, AI, and the Cloud are ...",60
24,Nitesh Dhanjani,AI Powered Misinformation and Manipulation at ...,58
5,"Eric Colson, Daragh Sibley and Dave Spiegel",The Sobering Truth About the Impact of Your Bu...,58
37,,5 infrastructure and operations trends to watc...,55
46,,O’Reilly’s top 20 live online training courses...,54
14,Mike Loukides,Defending against ransomware is all about the ...,52
9,Mike Barlow,Ethical Social Media: Oxymoron or Attainable G...,50
2,,Low-Code and the Democratization of Programming,47
6,Ville Tuulos and Hugo Bowne-Anderson,MLOps and DevOps: Why Data Makes It Different,45
36,,5 things on our data and AI radar for 2021,42


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [53]:
ML_titles = [radar_atom.entries[i].title for i in range(len(radar_atom.entries)) if "machine learning" in radar_atom.entries[i].summary.lower()]
ML_titles

['MLOps and DevOps: Why Data Makes It Different',
 'Rebranding Data',
 'Hand Labeling Considered Harmful',
 'AI Powered Misinformation and Manipulation at Scale #GPT-3',
 'Radar trends to watch: April 2021',
 '5 things on our data and AI radar for 2021',
 'Seven Legal Questions for Data Scientists']