# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [2]:
import feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [3]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [4]:
data = feedparser.parse(url)

### 2. Obtain a list of components (keys) that are available for this feed.

In [5]:
data.keys()

dict_keys(['bozo', 'entries', 'feed', 'headers', 'etag', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [7]:
data.feed.keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator', 'feedburner_info', 'geo_lat', 'geo_long', 'feedburner_emailserviceid', 'feedburner_feedburnerhostname'])

### 4. Extract and print the feed title, subtitle, author, and link.

In [14]:
print (data.feed.title)
print ('')
print (data.feed.subtitle)
print ('')
print (data.feed.language)
print ('')
print (data.feed.link)

Radar

Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology

en-US

https://www.oreilly.com/radar


### 5. Count the number of entries that are contained in this RSS feed.

In [44]:
total = len(data.entries)
print(total)

60


### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [21]:
data.entries[0].keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments', 'feedburner_origlink'])

### 7. Extract a list of entry titles.

In [23]:
titles = [data.entries[i].title for i in range(len(data.entries))]
print(titles)

['Where Programming, Ops, AI, and the Cloud are Headed in 2021', 'Seven Legal Questions for Data Scientists', 'Patterns', 'Radar trends to watch: January 2021', 'Four short links: 14 Dec 2020', 'Four short links: 8 Dec 2020', 'O’Reilly’s top 20 live online training courses of 2020', 'What is functional programming?', 'Four short links: 4 Dec 2020', 'Four short links: 1 Dec 2020', 'Radar trends to watch: December 2020', 'Four short links: 27 Nov 2020', 'Four short links: 24 Nov 2020', 'Four short links: 20 Nov 2020', 'On Exactitude in Technical Debt', 'Four short links: 17 Nov 2020', 'Four short links: 13 Nov 2020', 'Multi-Paradigm Languages', 'Four short links: 10 November 2020', 'Four short links: 6 Nov 2020', 'Four short links: 4 Nov 2020', 'Radar trends to watch: November 2020', 'Four short links: 30 Oct 2020', 'Four short links: 28 Oct 2020', 'Our Favorite Questions', 'Four short links: 21 Oct 2020', 'Four Short Links: 16 October 2020', 'Four short links: 14 Oct 2020', 'AI Product 

In [26]:
type(titles)

### 8. Calculate the percentage of "Four short links" entry titles.

In [46]:
import re

contador = 0
for element in titles:
    if 'Four short links' in element:
        contador+=1
propor = contador/total
porcentaje = round(propor*100 , 1)
print(contador,'/',total,'=',propor,'=',porcentaje,'%')

37 / 60 = 0.6166666666666667 = 61.7 %


### 9. Create a Pandas data frame from the feed's entries.

In [48]:
import pandas as pd

In [54]:
df = pd.DataFrame(data.entries)
df.head()

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments,feedburner_origlink
0,"Where Programming, Ops, AI, and the Cloud are ...","{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/where-programmin...,"Mon, 25 Jan 2021 12:03:14 +0000","(2021, 1, 25, 12, 3, 14, 0, 25, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=13616,False,"In this report, we look at the data generated ...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/where-programmin...,0,https://www.oreilly.com/radar/where-programmin...
1,Seven Legal Questions for Data Scientists,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/seven-legal-ques...,"Tue, 19 Jan 2021 12:21:18 +0000","(2021, 1, 19, 12, 21, 18, 1, 19, 0)",[{'name': 'Patrick Hall and Ayoub Ouederni'}],Patrick Hall and Ayoub Ouederni,{'name': 'Patrick Hall and Ayoub Ouederni'},"[{'term': 'Artificial Intelligence', 'scheme':...",https://www.oreilly.com/radar/?p=13610,False,“[T]he threats to consumers arising from data ...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/seven-legal-ques...,0,https://www.oreilly.com/radar/seven-legal-ques...
2,Patterns,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/patterns/#respond,"Tue, 12 Jan 2021 12:56:01 +0000","(2021, 1, 12, 12, 56, 1, 1, 12, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Radar Column', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=13601,False,"A few months ago, I said that &#8220;making ev...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/patterns/feed/,0,https://www.oreilly.com/radar/patterns/
3,Radar trends to watch: January 2021,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/radar-trends-to-...,"Tue, 05 Jan 2021 11:40:19 +0000","(2021, 1, 5, 11, 40, 19, 1, 5, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Radar Trends', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=13596,False,The last month of the old year showed a lot of...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...,0,https://www.oreilly.com/radar/radar-trends-to-...
4,Four short links: 14 Dec 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Tue, 15 Dec 2020 14:52:18 +0000","(2020, 12, 15, 14, 52, 18, 1, 350, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=13589,False,End-to-end Entity Resolution for Big Data — In...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...


### 10. Count the number of entries per author and sort them in descending order.

In [56]:
authors = df.groupby('author', as_index=False).agg({'title':'count'})
authors.columns = ['author', 'entries']
authors.sort_values('entries', ascending=False)

Unnamed: 0,author,entries
6,Nat Torkington,38
5,Mike Loukides,13
0,,2
1,Alex Castrounis,1
2,Justin Norman and Mike Loukides,1
3,Kevlin Henney,1
4,Matthew Rocklin and Hugo Bowne-Anderson,1
7,Patrick Hall and Ayoub Ouederni,1
8,Q Ethan McCallum and Mike Loukides,1
9,"Q Ethan McCallum, Chris Butler and Shane Glynn",1


### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [61]:
df['number_characters'] = df['title'].apply(len)
df[['title','author','number_characters']].sort_values('number_characters', ascending=False).head()

Unnamed: 0,title,author,number_characters
50,Why Best-of-Breed is a Better Choice than All-...,Matthew Rocklin and Hugo Bowne-Anderson,79
0,"Where Programming, Ops, AI, and the Cloud are ...",Mike Loukides,60
6,O’Reilly’s top 20 live online training courses...,,54
1,Seven Legal Questions for Data Scientists,Patrick Hall and Ayoub Ouederni,41
28,AI Product Management After Deployment,Justin Norman and Mike Loukides,38


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [113]:
titles_list = df['title'].tolist() if "machine learning" in df['summary']
print(titles_list)

SyntaxError: invalid syntax (<ipython-input-113-c77d89f356d1>, line 1)

In [119]:
df.loc["machine learning" in 'summary'].title.tolist()



KeyError: False

In [None]:
#iterrow()

In [111]:
summary_list = []

for element in df(['title']['author']):
    print(element)
    summary_list.append('1')
    

  for element in df(['title']['author']):


TypeError: list indices must be integers or slices, not str

In [104]:
print(summary_list)

['1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1']


In [105]:
len(summary_list)

20