# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
import feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [2]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [3]:
page = feedparser.parse(url)
type(page)
page['feed']

{'title': 'Radar',
 'title_detail': {'type': 'text/plain',
  'language': None,
  'base': 'http://feeds.feedburner.com/oreilly/radar/atom',
  'value': 'Radar'},
 'links': [{'rel': 'alternate',
   'type': 'text/html',
   'href': 'https://www.oreilly.com/radar'},
  {'rel': 'self',
   'type': 'application/rss+xml',
   'href': 'http://feeds.feedburner.com/oreilly/radar/atom'},
  {'rel': 'hub',
   'href': 'http://pubsubhubbub.appspot.com/',
   'type': 'text/html'}],
 'link': 'https://www.oreilly.com/radar',
 'subtitle': 'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology',
 'subtitle_detail': {'type': 'text/html',
  'language': None,
  'base': 'http://feeds.feedburner.com/oreilly/radar/atom',
  'value': 'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology'},
 'updated': 'Tue, 07 Sep 2021 13:57:20 +0000',
 'updated_parsed': time.struct_time(tm_year=2021, tm_mon=9, tm_mday=7, tm_hour=13, tm_min=57, t

### 2. Obtain a list of components (keys) that are available for this feed.

In [4]:
page.keys()

dict_keys(['bozo', 'entries', 'feed', 'headers', 'etag', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [5]:
page.feed.keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator', 'feedburner_info', 'geo_lat', 'geo_long', 'feedburner_emailserviceid', 'feedburner_feedburnerhostname'])

### 4. Extract and print the feed title, subtitle, author, and link.

In [12]:
x,y,z,l=page.feed.title,page.feed.subtitle,page.feed.title_detail,page.feed.link

print(x,y,z,l,sep="\n")

Radar
Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology
{'type': 'text/plain', 'language': None, 'base': 'http://feeds.feedburner.com/oreilly/radar/atom', 'value': 'Radar'}
https://www.oreilly.com/radar


### 5. Count the number of entries that are contained in this RSS feed.

In [13]:

len(page)

12

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [14]:
page.entries[0].keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments', 'feedburner_origlink'])

### 7. Extract a list of entry titles.

In [15]:
titulo = [page.entries[i].title for i in range(len(page.entries))]
titulo

['Radar trends to watch: September 2021',
 'Rebranding Data',
 'A Way Forward with Communal Computing',
 'Defending against ransomware is all about the basics',
 'Radar trends to watch: August 2021',
 'Communal Computing’s Many Problems',
 'Thinking About Glue',
 'Radar trends to watch: July 2021',
 'Hand Labeling Considered Harmful',
 'Two economies. Two sets of rules.',
 'Communal Computing',
 'Code as Infrastructure',
 'Radar trends to watch: June 2021',
 'AI Powered Misinformation and Manipulation at Scale #GPT-3',
 'DeepCheapFakes',
 'Radar trends to watch: May 2021',
 'Checking Jeff Bezos’s Math',
 'AI Adoption in the Enterprise 2021',
 'NFTs: Owning Digital Art',
 'Radar trends to watch: April 2021',
 'InfoTribes, Reality Brokers',
 'The End of Silicon Valley as We Know It?',
 'The Next Generation of AI',
 'Radar trends to watch: March 2021',
 'Product Management for AI',
 '5 things on our data and AI radar for 2021',
 '5 infrastructure and operations trends to watch in 2021',
 

### 8. Calculate the percentage of "Four short links" entry titles.

In [16]:
total = len(titulo)
four_short = [i for i in titulo if 'Four short links' in i]
fh = len(four_short)
porcentaje = fh/total
porcentaje

0.2833333333333333

### 9. Create a Pandas data frame from the feed's entries.

In [18]:
import pandas as pd

In [19]:
df = pd.DataFrame(page.entries)
df.head(3)

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments,feedburner_origlink
0,Radar trends to watch: September 2021,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/radar-trends-to-...,"Wed, 01 Sep 2021 12:18:33 +0000","(2021, 9, 1, 12, 18, 33, 2, 244, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Radar Trends', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=13943,False,Let’s start with a moment of silence for O’Rei...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...,0,https://www.oreilly.com/radar/radar-trends-to-...
1,Rebranding Data,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/rebranding-data/...,"Tue, 24 Aug 2021 14:16:28 +0000","(2021, 8, 24, 14, 16, 28, 1, 236, 0)",[{'name': 'Q McCallum'}],Q McCallum,{'name': 'Q McCallum'},"[{'term': 'Data', 'scheme': None, 'label': Non...",https://www.oreilly.com/radar/?p=13932,False,There&#8217;s a flavor of puzzle in which you ...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/rebranding-data/...,0,https://www.oreilly.com/radar/rebranding-data/
2,A Way Forward with Communal Computing,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/a-way-forward-wi...,"Tue, 17 Aug 2021 12:45:50 +0000","(2021, 8, 17, 12, 45, 50, 1, 229, 0)",[{'name': 'Chris Butler'}],Chris Butler,{'name': 'Chris Butler'},"[{'term': 'Artificial Intelligence', 'scheme':...",https://www.oreilly.com/radar/?p=13910,False,Communal devices in our homes and offices aren...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/a-way-forward-wi...,0,https://www.oreilly.com/radar/a-way-forward-wi...


### 10. Count the number of entries per author and sort them in descending order.

In [20]:
autores = df.groupby('author', as_index=False).agg({'title':'count'})
autores.sort_values('title', ascending=False)

Unnamed: 0,author,title
5,Mike Loukides,25
6,Nat Torkington,17
0,,4
1,Chris Butler,3
12,Tim O’Reilly,3
2,Hugo Bowne-Anderson,1
3,Justin Norman and Mike Loukides,1
4,Kevlin Henney,1
7,Nitesh Dhanjani,1
8,Patrick Hall and Ayoub Ouederni,1


### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [21]:
df['title_leng'] = df['title'].apply(len)
df[['title', 'author', 'title_leng']].sort_values('title_leng', ascending=False).head()

Unnamed: 0,title,author,title_leng
29,"Where Programming, Ops, AI, and the Cloud are ...",Mike Loukides,60
13,AI Powered Misinformation and Manipulation at ...,Nitesh Dhanjani,58
26,5 infrastructure and operations trends to watc...,,55
35,O’Reilly’s top 20 live online training courses...,,54
3,Defending against ransomware is all about the ...,Mike Loukides,52


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [22]:
def valor(row):
    return 'machine learning' in row.summary

titulo = list(df[df.apply(valor, axis=1)]['title'])
titulo

['Hand Labeling Considered Harmful',
 'Radar trends to watch: April 2021',
 'Seven Legal Questions for Data Scientists']