# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
import feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [2]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [3]:
res=feedparser.parse(url)
print (type(res))

<class 'feedparser.FeedParserDict'>


### 2. Obtain a list of components (keys) that are available for this feed.

In [4]:
list_parse=list(res.keys())
print (list_parse)

['feed', 'entries', 'bozo', 'headers', 'etag', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces']


### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [5]:
list_feed=[res['feed'].keys()]
print (list_feed)

[dict_keys(['title', 'title_detail', 'id', 'guidislink', 'link', 'updated', 'updated_parsed', 'subtitle', 'subtitle_detail', 'links', 'authors', 'author_detail', 'author', 'feedburner_info', 'geo_lat', 'geo_long', 'feedburner_emailserviceid', 'feedburner_feedburnerhostname'])]


### 4. Extract and print the feed title, subtitle, author, and link.

In [6]:
print (res['feed']['title'])
print (res['feed']['subtitle'])
print (res['feed']['author'])
print (res['feed']['link'])

All - O'Reilly Media
All of our Ideas and Learning material from all of our topics.
O'Reilly Media
https://www.oreilly.com


### 5. Count the number of entries that are contained in this RSS feed.

In [7]:
print (len(res['entries']))

60


### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [8]:
list_components=[res['entries'][0].keys()]
print (list_components)

[dict_keys(['title', 'title_detail', 'updated', 'updated_parsed', 'id', 'guidislink', 'link', 'content', 'summary', 'links', 'authors', 'author_detail', 'author', 'feedburner_origlink'])]


### 7. Extract a list of entry titles.

In [9]:
list_titles=[res['entries'][i]['title'] for i in range(len(res['entries']))]
print (list_titles)

['Four short links: 8 July 2019', 'Four short links: 5 July 2019', 'Four short links: 4 July 2019', 'Tools for machine learning development', 'New live online training courses', 'Four short links: 3 July 2019', 'Four short links: 2 July 2019', 'Four short links: 1 July 2019', 'RISELab’s AutoPandas hints at automation tech that will change the nature of software development', 'Four short links: 28 June 2019', 'One simple chart: Who is interested in Spark NLP?', 'Four short links: 27 June 2019', 'Four short links: 26 June 2019', 'AI and machine learning will require retraining your entire organization', 'Four short links: 25 June 2019', 'Four short links: 24 June 2019', 'Four short links: 21 June 2019', 'Four short links: 20 June 2019', 'Enabling end-to-end machine learning pipelines in real-world applications', 'Four short links: 19 June 2019', 'What are model governance and model operations?', 'Four short links: 18 June 2019', 'The quest for high-quality data', 'Four short links: 17 Ju

### 8. Calculate the percentage of "Four short links" entry titles.

In [10]:
count=len([i for i in range(len(list_titles)) if 'Four short links' in list_titles[i]])
print (count*100/(len(res['entries'])), '%')

45.0 %


### 9. Create a Pandas data frame from the feed's entries.

In [11]:
import pandas as pd

In [12]:
entry=pd.DataFrame.from_dict(res['entries'])
display (entry.head())

Unnamed: 0,author,author_detail,authors,content,feedburner_origlink,guidislink,id,link,links,summary,title,title_detail,updated,updated_parsed
0,Nat Torkington,{'name': 'Nat Torkington'},[{'name': 'Nat Torkington'}],"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/ideas/four-short-links...,True,"tag:www.oreilly.com,2019-07-08:/ideas/four-sho...",http://feedproxy.google.com/~r/oreilly/radar/a...,[{'href': 'http://feedproxy.google.com/~r/orei...,"<p><em>Algorithmic Governance, DevOps Assessme...",Four short links: 8 July 2019,"{'type': 'text/plain', 'language': None, 'base...",2019-07-08T10:50:00Z,"(2019, 7, 8, 10, 50, 0, 0, 189, 0)"
1,Nat Torkington,{'name': 'Nat Torkington'},[{'name': 'Nat Torkington'}],"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/ideas/four-short-links...,True,"tag:www.oreilly.com,2019-07-05:/ideas/four-sho...",http://feedproxy.google.com/~r/oreilly/radar/a...,[{'href': 'http://feedproxy.google.com/~r/orei...,"<p><em>Online Not All Bad, Emotional Space, Te...",Four short links: 5 July 2019,"{'type': 'text/plain', 'language': None, 'base...",2019-07-05T13:10:00Z,"(2019, 7, 5, 13, 10, 0, 4, 186, 0)"
2,Nat Torkington,{'name': 'Nat Torkington'},[{'name': 'Nat Torkington'}],"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/ideas/four-short-links...,True,"tag:www.oreilly.com,2019-07-04:/ideas/four-sho...",http://feedproxy.google.com/~r/oreilly/radar/a...,[{'href': 'http://feedproxy.google.com/~r/orei...,"<p><em>Debugging AI, Serverless Foundations, Y...",Four short links: 4 July 2019,"{'type': 'text/plain', 'language': None, 'base...",2019-07-04T13:50:00Z,"(2019, 7, 4, 13, 50, 0, 3, 185, 0)"
3,Jenn Webb,{'name': 'Jenn Webb'},[{'name': 'Jenn Webb'}],"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/ideas/tools-for-machin...,True,"tag:www.oreilly.com,2019-07-03:/ideas/tools-fo...",http://feedproxy.google.com/~r/oreilly/radar/a...,[{'href': 'http://feedproxy.google.com/~r/orei...,<p><img src='https://d3ucjech6zwjp8.cloudfront...,Tools for machine learning development,"{'type': 'text/plain', 'language': None, 'base...",2019-07-03T13:35:00Z,"(2019, 7, 3, 13, 35, 0, 2, 184, 0)"
4,,,,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/ideas/new-live-online-...,True,"tag:www.oreilly.com,2019-07-03:/ideas/new-live...",http://feedproxy.google.com/~r/oreilly/radar/a...,[{'href': 'http://feedproxy.google.com/~r/orei...,<p><img src='https://d3ucjech6zwjp8.cloudfront...,New live online training courses,"{'type': 'text/plain', 'language': None, 'base...",2019-07-03T11:20:00Z,"(2019, 7, 3, 11, 20, 0, 2, 184, 0)"


### 10. Count the number of entries per author and sort them in descending order.

In [13]:
author=entry['author'].value_counts()
display (author.head())

Nat Torkington                           27
Ben Lorica                                6
Jenn Webb                                 2
Ben Lorica, Harish Doddi, David Talby     1
Chris Guzikowski                          1
Name: author, dtype: int64

### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [14]:
len_title=[len(entry['title'][i]) for i in range(len(entry))]
entry['len_title']=len_title
display (entry.head())

Unnamed: 0,author,author_detail,authors,content,feedburner_origlink,guidislink,id,link,links,summary,title,title_detail,updated,updated_parsed,len_title
0,Nat Torkington,{'name': 'Nat Torkington'},[{'name': 'Nat Torkington'}],"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/ideas/four-short-links...,True,"tag:www.oreilly.com,2019-07-08:/ideas/four-sho...",http://feedproxy.google.com/~r/oreilly/radar/a...,[{'href': 'http://feedproxy.google.com/~r/orei...,"<p><em>Algorithmic Governance, DevOps Assessme...",Four short links: 8 July 2019,"{'type': 'text/plain', 'language': None, 'base...",2019-07-08T10:50:00Z,"(2019, 7, 8, 10, 50, 0, 0, 189, 0)",29
1,Nat Torkington,{'name': 'Nat Torkington'},[{'name': 'Nat Torkington'}],"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/ideas/four-short-links...,True,"tag:www.oreilly.com,2019-07-05:/ideas/four-sho...",http://feedproxy.google.com/~r/oreilly/radar/a...,[{'href': 'http://feedproxy.google.com/~r/orei...,"<p><em>Online Not All Bad, Emotional Space, Te...",Four short links: 5 July 2019,"{'type': 'text/plain', 'language': None, 'base...",2019-07-05T13:10:00Z,"(2019, 7, 5, 13, 10, 0, 4, 186, 0)",29
2,Nat Torkington,{'name': 'Nat Torkington'},[{'name': 'Nat Torkington'}],"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/ideas/four-short-links...,True,"tag:www.oreilly.com,2019-07-04:/ideas/four-sho...",http://feedproxy.google.com/~r/oreilly/radar/a...,[{'href': 'http://feedproxy.google.com/~r/orei...,"<p><em>Debugging AI, Serverless Foundations, Y...",Four short links: 4 July 2019,"{'type': 'text/plain', 'language': None, 'base...",2019-07-04T13:50:00Z,"(2019, 7, 4, 13, 50, 0, 3, 185, 0)",29
3,Jenn Webb,{'name': 'Jenn Webb'},[{'name': 'Jenn Webb'}],"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/ideas/tools-for-machin...,True,"tag:www.oreilly.com,2019-07-03:/ideas/tools-fo...",http://feedproxy.google.com/~r/oreilly/radar/a...,[{'href': 'http://feedproxy.google.com/~r/orei...,<p><img src='https://d3ucjech6zwjp8.cloudfront...,Tools for machine learning development,"{'type': 'text/plain', 'language': None, 'base...",2019-07-03T13:35:00Z,"(2019, 7, 3, 13, 35, 0, 2, 184, 0)",38
4,,,,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/ideas/new-live-online-...,True,"tag:www.oreilly.com,2019-07-03:/ideas/new-live...",http://feedproxy.google.com/~r/oreilly/radar/a...,[{'href': 'http://feedproxy.google.com/~r/orei...,<p><img src='https://d3ucjech6zwjp8.cloudfront...,New live online training courses,"{'type': 'text/plain', 'language': None, 'base...",2019-07-03T11:20:00Z,"(2019, 7, 3, 11, 20, 0, 2, 184, 0)",32


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [15]:
ml=[entry['title'][i] for i in range(len(entry)) if 'machine learning' in entry['summary'][i]]
print (ml)

['Tools for machine learning development', 'New live online training courses', 'RISELab’s AutoPandas hints at automation tech that will change the nature of software development', 'AI and machine learning will require retraining your entire organization', 'Enabling end-to-end machine learning pipelines in real-world applications', 'What are model governance and model operations?', 'The quest for high-quality data', 'AI adoption is being fueled by an improved tool ecosystem', 'Maximizing paper clips', 'Four short links: 6 June 2019', 'What’s driving open source software in 2019']
