# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
import feedparser
import pandas as pd

### 1. Use feedparser to parse the following RSS feed URL.

In [2]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [3]:
d=feedparser.parse(url)

### 2. Obtain a list of components (keys) that are available for this feed.

In [6]:
list(d)

['feed',
 'entries',
 'bozo',
 'headers',
 'updated',
 'updated_parsed',
 'href',
 'status',
 'encoding',
 'version',
 'namespaces']

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [8]:
list(d.feed)

['title',
 'title_detail',
 'links',
 'link',
 'subtitle',
 'subtitle_detail',
 'updated',
 'updated_parsed',
 'language',
 'sy_updateperiod',
 'sy_updatefrequency',
 'generator_detail',
 'generator',
 'feedburner_info',
 'geo_lat',
 'geo_long',
 'feedburner_emailserviceid',
 'feedburner_feedburnerhostname']

### 4. Extract and print the feed title, subtitle, author, and link.

In [37]:
print(d.feed.title,d.feed.subtitle, d.feed.link) #there is no author

Radar Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology https://www.oreilly.com/radar


### 5. Count the number of entries that are contained in this RSS feed.

In [49]:
len(d.entries)

18

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [11]:
#z=[x for x in d.entries[1]]
#print(z)

list(d.entries[0])

['title',
 'title_detail',
 'links',
 'link',
 'comments',
 'published',
 'published_parsed',
 'authors',
 'author',
 'author_detail',
 'tags',
 'id',
 'guidislink',
 'summary',
 'summary_detail',
 'content',
 'wfw_commentrss',
 'slash_comments',
 'feedburner_origlink']

### 7. Extract a list of entry titles.

In [14]:
titles_list=[]
for i in range(len(d.entries)):
    titles_list.append(d.entries[i].title)

print(titles_list)

['Four short links: 6 December 2019', 'Radar trends to watch: December 2019', 'Four short links: 5 December 2019', 'Four short links: 4 December 2019', 'Use your people as competitive advantage', 'Four short links: 3 December 2019', 'A 5G future', 'Four short links: 2 December 2019', 'Four short links: 29 November 2019', 'Four short links: 28 November 2019', 'Four short links: 27 November 2019', 'Moving AI and ML from research into production', 'Four short links: 26 November 2019', 'Four short links: 25 November 2019', 'Four short links: 22 November 2019', 'Why you should care about robotic process automation', 'Unraveling the mystery of code', 'Four short links: 21 November 2019']


### 8. Calculate the percentage of "Four short links" entry titles.

In [15]:
fsl=[]
for i in range(len(d.entries)):
       if d.entries[i].title.startswith('Four short links'):
             fsl.append(d.entries[i].title.startswith('Four short links'))

print(f'Percentage of "Four short links: {round((len(fsl)/len(titles_list))*100, 2)}%')
    

Percentage of "Four short links: 66.67%


### 9. Create a Pandas data frame from the feed's entries.

In [12]:
import pandas as pd

In [16]:
df = pd.DataFrame(d.entries)

In [18]:
df.head(2)

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments,feedburner_origlink
0,Four short links: 6 December 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Fri, 06 Dec 2019 05:01:00 +0000","(2019, 12, 6, 5, 1, 0, 4, 340, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=11147,False,Declarative Assembly of Web Applications From ...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
1,Radar trends to watch: December 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/radar-trends-to-...,"Thu, 05 Dec 2019 12:00:00 +0000","(2019, 12, 5, 12, 0, 0, 3, 339, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Radar Trends', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=11118,False,Privacy and security trends DNS over HTTPS is ...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...,0,https://www.oreilly.com/radar/radar-trends-to-...


### 10. Count the number of entries per author and sort them in descending order.

In [45]:
df.groupby(['author'])['title'].count().sort_values(ascending=False)
#df['author'].value_counts()

author
Nat Torkington                                  12
Mike Loukides                                    2
Jenn Webb                                        2
Sunil Ranka, Roger Magoulas and Steve Swoyer     1
Pamela Rucker                                    1
Name: title, dtype: int64

### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [56]:
df['length_title'] = df['title'].str.len()
df.head(2)

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments,feedburner_origlink,length_title
0,Four short links: 6 December 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Fri, 06 Dec 2019 05:01:00 +0000","(2019, 12, 6, 5, 1, 0, 4, 340, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=11147,False,Declarative Assembly of Web Applications From ...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...,33
1,Radar trends to watch: December 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/radar-trends-to-...,"Thu, 05 Dec 2019 12:00:00 +0000","(2019, 12, 5, 12, 0, 0, 3, 339, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Radar Trends', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=11118,False,Privacy and security trends DNS over HTTPS is ...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...,0,https://www.oreilly.com/radar/radar-trends-to-...,36


In [63]:
df.sort_values('length_title', ascending = False).head(2)

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments,feedburner_origlink,length_title
15,Why you should care about robotic process auto...,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/why-you-should-c...,"Thu, 21 Nov 2019 12:00:00 +0000","(2019, 11, 21, 12, 0, 0, 3, 325, 0)","[{'name': 'Sunil Ranka, Roger Magoulas and Ste...","Sunil Ranka, Roger Magoulas and Steve Swoyer","{'name': 'Sunil Ranka, Roger Magoulas and Stev...","[{'term': 'Innovation & Disruption', 'scheme':...",https://www.oreilly.com/radar/?p=11021,False,"In a classic 1983 paper, cognitive psychologis...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/why-you-should-c...,0,https://www.oreilly.com/radar/why-you-should-c...,52
11,Moving AI and ML from research into production,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/moving-ai-and-ml...,"Tue, 26 Nov 2019 05:10:13 +0000","(2019, 11, 26, 5, 10, 13, 1, 330, 0)",[{'name': 'Jenn Webb'}],Jenn Webb,{'name': 'Jenn Webb'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=10241,False,In this interview from O&#8217;Reilly Foo Camp...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/moving-ai-and-ml...,0,https://www.oreilly.com/radar/moving-ai-and-ml...,46


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [65]:
df['summary']

'Declarative Assembly of Web Applications From Predefined Concepts &#8212; To build an app, the developer imports concepts from the catalog, tunes them to fit the application’s particular needs via configuration variables, and links concept components together to create pages. Components of different concepts may be executed independently, or bound together declaratively with dataflows and synchronization. [&#8230;]'