# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
import feedparser
import pandas as pd

### 1. Use feedparser to parse the following RSS feed URL.

In [2]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [3]:
d=feedparser.parse(url)

### 2. Obtain a list of components (keys) that are available for this feed.

In [4]:
list(d)

['feed',
 'entries',
 'bozo',
 'headers',
 'updated',
 'updated_parsed',
 'href',
 'status',
 'encoding',
 'version',
 'namespaces']

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [5]:
list(d.feed)

['title',
 'title_detail',
 'links',
 'link',
 'subtitle',
 'subtitle_detail',
 'updated',
 'updated_parsed',
 'language',
 'sy_updateperiod',
 'sy_updatefrequency',
 'generator_detail',
 'generator',
 'feedburner_info',
 'geo_lat',
 'geo_long',
 'feedburner_emailserviceid',
 'feedburner_feedburnerhostname']

### 4. Extract and print the feed title, subtitle, author, and link.

In [6]:
print(d.feed.title,d.feed.subtitle, d.feed.link) #there is no author

Radar Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology https://www.oreilly.com/radar


### 5. Count the number of entries that are contained in this RSS feed.

In [7]:
len(d.entries)

18

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [8]:
#z=[x for x in d.entries[1]]
#print(z)

list(d.entries[0])

['title',
 'title_detail',
 'links',
 'link',
 'comments',
 'published',
 'published_parsed',
 'authors',
 'author',
 'author_detail',
 'tags',
 'id',
 'guidislink',
 'summary',
 'summary_detail',
 'content',
 'wfw_commentrss',
 'slash_comments',
 'feedburner_origlink']

### 7. Extract a list of entry titles.

In [9]:
titles_list=[]
for i in range(len(d.entries)):
    titles_list.append(d.entries[i].title)

print(titles_list)

['The road to Software 2.0', 'Four short links: 10 December 2019', 'Four short links: 9 December 2019', 'Four short links: 6 December 2019', 'Radar trends to watch: December 2019', 'Four short links: 5 December 2019', 'Four short links: 4 December 2019', 'Use your people as competitive advantage', 'Four short links: 3 December 2019', 'A 5G future', 'Four short links: 2 December 2019', 'Four short links: 29 November 2019', 'Four short links: 28 November 2019', 'Four short links: 27 November 2019', 'Moving AI and ML from research into production', 'Four short links: 26 November 2019', 'Four short links: 25 November 2019', 'Four short links: 22 November 2019']


### 8. Calculate the percentage of "Four short links" entry titles.

In [10]:
fsl=[]
for i in range(len(d.entries)):
       if d.entries[i].title.startswith('Four short links'):
             fsl.append(d.entries[i].title.startswith('Four short links'))

print(f'Percentage of "Four short links: {round((len(fsl)/len(titles_list))*100, 2)}%')
    

Percentage of "Four short links: 72.22%


### 9. Create a Pandas data frame from the feed's entries.

In [11]:
import pandas as pd

In [12]:
df = pd.DataFrame(d.entries)

In [13]:
df.head(2)

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments,feedburner_origlink
0,The road to Software 2.0,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/the-road-to-soft...,"Tue, 10 Dec 2019 11:00:00 +0000","(2019, 12, 10, 11, 0, 0, 1, 344, 0)",[{'name': 'Mike Loukides and Ben Lorica'}],Mike Loukides and Ben Lorica,{'name': 'Mike Loukides and Ben Lorica'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=11155,False,"Roughly a year ago, we wrote “What machine lea...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/the-road-to-soft...,0,https://www.oreilly.com/radar/the-road-to-soft...
1,Four short links: 10 December 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Tue, 10 Dec 2019 05:01:00 +0000","(2019, 12, 10, 5, 1, 0, 1, 344, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=11192,False,The Hidden Worries of Facial Recognition Techn...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...


### 10. Count the number of entries per author and sort them in descending order.

In [14]:
df.groupby(['author'])['title'].count().sort_values(ascending=False)
#df['author'].value_counts()

author
Nat Torkington                  13
Mike Loukides                    2
Pamela Rucker                    1
Mike Loukides and Ben Lorica     1
Jenn Webb                        1
Name: title, dtype: int64

### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [15]:
df['length_title'] = df['title'].str.len()
df.head(2)

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments,feedburner_origlink,length_title
0,The road to Software 2.0,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/the-road-to-soft...,"Tue, 10 Dec 2019 11:00:00 +0000","(2019, 12, 10, 11, 0, 0, 1, 344, 0)",[{'name': 'Mike Loukides and Ben Lorica'}],Mike Loukides and Ben Lorica,{'name': 'Mike Loukides and Ben Lorica'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=11155,False,"Roughly a year ago, we wrote “What machine lea...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/the-road-to-soft...,0,https://www.oreilly.com/radar/the-road-to-soft...,24
1,Four short links: 10 December 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Tue, 10 Dec 2019 05:01:00 +0000","(2019, 12, 10, 5, 1, 0, 1, 344, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=11192,False,The Hidden Worries of Facial Recognition Techn...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...,34


In [16]:
df.sort_values('length_title', ascending = False).head(2)

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments,feedburner_origlink,length_title
14,Moving AI and ML from research into production,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/moving-ai-and-ml...,"Tue, 26 Nov 2019 05:10:13 +0000","(2019, 11, 26, 5, 10, 13, 1, 330, 0)",[{'name': 'Jenn Webb'}],Jenn Webb,{'name': 'Jenn Webb'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=10241,False,In this interview from O&#8217;Reilly Foo Camp...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/moving-ai-and-ml...,0,https://www.oreilly.com/radar/moving-ai-and-ml...,46
7,Use your people as competitive advantage,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/use-your-people-...,"Tue, 03 Dec 2019 09:00:00 +0000","(2019, 12, 3, 9, 0, 0, 1, 337, 0)",[{'name': 'Pamela Rucker'}],Pamela Rucker,{'name': 'Pamela Rucker'},"[{'term': 'Future of the Firm', 'scheme': None...",https://www.oreilly.com/radar/?p=11068,False,"In a fast-paced digital world, it is tempting ...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/use-your-people-...,0,https://www.oreilly.com/radar/use-your-people-...,40


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [23]:
df[df['summary'].str.contains('machine learning')]

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments,feedburner_origlink,length_title
0,The road to Software 2.0,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/the-road-to-soft...,"Tue, 10 Dec 2019 11:00:00 +0000","(2019, 12, 10, 11, 0, 0, 1, 344, 0)",[{'name': 'Mike Loukides and Ben Lorica'}],Mike Loukides and Ben Lorica,{'name': 'Mike Loukides and Ben Lorica'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=11155,False,"Roughly a year ago, we wrote “What machine lea...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/the-road-to-soft...,0,https://www.oreilly.com/radar/the-road-to-soft...,24
14,Moving AI and ML from research into production,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/moving-ai-and-ml...,"Tue, 26 Nov 2019 05:10:13 +0000","(2019, 11, 26, 5, 10, 13, 1, 330, 0)",[{'name': 'Jenn Webb'}],Jenn Webb,{'name': 'Jenn Webb'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=10241,False,In this interview from O&#8217;Reilly Foo Camp...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/moving-ai-and-ml...,0,https://www.oreilly.com/radar/moving-ai-and-ml...,46
