# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
import feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [2]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [6]:
urlp = feedparser.parse(url)

### 2. Obtain a list of components (keys) that are available for this feed.

In [7]:
urlp.keys()

dict_keys(['bozo', 'entries', 'feed', 'headers', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [8]:
urlp.feed.keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator'])

### 4. Extract and print the feed title, subtitle, author, and link.

In [9]:
print(urlp.feed.title)
print(urlp.feed.subtitle)
print(urlp.feed.link)
print(urlp.entries[0].author)

Radar
Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology
https://www.oreilly.com/radar
Mike Loukides


### 5. Count the number of entries that are contained in this RSS feed.

In [11]:
len(urlp.entries)

15

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [12]:
urlp.entries[10].keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments'])

### 7. Extract a list of entry titles.

In [14]:
titles=[x.title for x in urlp.entries]

In [15]:
titles

['The Problem with Intelligence',
 'Radar Trends to Watch: September 2022',
 'Ad Networks and Content Marketing',
 'On Technique',
 'Scaling False Peaks',
 'The Metaverse Is Not a Place',
 'Radar Trends to Watch: August 2022',
 'SQL: The Universal Solvent for REST APIs',
 'Artificial Creativity?',
 'Radar Trends to Watch: July 2022',
 '2022 Cloud Salary Survey',
 '“Sentience” is the Wrong Question',
 'Closer to AGI?',
 'Radar Trends to Watch: June 2022',
 'Building a Better Middleman']

### 8. Calculate the percentage of "Four short links" entry titles.

In [16]:
four_shots = []
for x in titles:
    if ("Four short links:" in x):
        four_shots.append(x)

num_titles = (len(titles))
num_four = (len(four_shots))

print (num_titles,num_four)

print ((num_four*100)/num_titles)

15 0
0.0


### 9. Create a Pandas data frame from the feed's entries.

In [17]:
import pandas as pd

In [18]:
df = pd.DataFrame(urlp.entries)

In [20]:
df.head()

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments
0,The Problem with Intelligence,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/the-problem-with...,https://www.oreilly.com/radar/the-problem-with...,"Tue, 13 Sep 2022 11:21:40 +0000","(2022, 9, 13, 11, 21, 40, 1, 256, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Artificial Intelligence', 'scheme':...",https://www.oreilly.com/radar/?p=14718,False,Projects like OpenAI’s DALL-E and DeepMind’s G...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/the-problem-with...,0
1,Radar Trends to Watch: September 2022,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/radar-trends-to-...,https://www.oreilly.com/radar/radar-trends-to-...,"Tue, 06 Sep 2022 11:21:09 +0000","(2022, 9, 6, 11, 21, 9, 1, 249, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Radar Trends', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=14700,False,It’s hardly news to talk about the AI developm...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...,0
2,Ad Networks and Content Marketing,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/ad-networks-and-...,https://www.oreilly.com/radar/ad-networks-and-...,"Tue, 16 Aug 2022 11:21:21 +0000","(2022, 8, 16, 11, 21, 21, 1, 228, 0)",[{'name': 'Q McCallum'}],Q McCallum,{'name': 'Q McCallum'},"[{'term': 'Operations', 'scheme': None, 'label...",https://www.oreilly.com/radar/?p=14688,False,"In a recent Radar piece, I explored N-sided ma...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/ad-networks-and-...,0
3,On Technique,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/on-technique/,https://www.oreilly.com/radar/on-technique/#re...,"Tue, 09 Aug 2022 11:12:22 +0000","(2022, 8, 9, 11, 12, 22, 1, 221, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=14669,False,"In a previous article, I wrote about how model...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/on-technique/feed/,0
4,Scaling False Peaks,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/scaling-false-pe...,https://www.oreilly.com/radar/scaling-false-pe...,"Thu, 04 Aug 2022 11:12:44 +0000","(2022, 8, 4, 11, 12, 44, 3, 216, 0)",[{'name': 'Kevlin Henney'}],Kevlin Henney,{'name': 'Kevlin Henney'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=14661,False,Humans are notoriously poor at judging distanc...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/scaling-false-pe...,0


### 10. Count the number of entries per author and sort them in descending order.

In [22]:
entries = df.groupby('author').agg({'title':'count'}).sort_values('title')

In [23]:
entries

Unnamed: 0_level_0,title
author,Unnamed: 1_level_1
Jon Udell,1
Kevlin Henney,1
Tim O’Reilly,1
Q McCallum,2
Mike Loukides,10


### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [24]:
df['title_length'] = df['title'].apply(len)
df[['title', 'author', 'title_length']].sort_values('title_length', ascending=False)

Unnamed: 0,title,author,title_length
7,SQL: The Universal Solvent for REST APIs,Jon Udell,40
1,Radar Trends to Watch: September 2022,Mike Loukides,37
6,Radar Trends to Watch: August 2022,Mike Loukides,34
2,Ad Networks and Content Marketing,Q McCallum,33
11,“Sentience” is the Wrong Question,Mike Loukides,33
9,Radar Trends to Watch: July 2022,Mike Loukides,32
13,Radar Trends to Watch: June 2022,Mike Loukides,32
0,The Problem with Intelligence,Mike Loukides,29
5,The Metaverse Is Not a Place,Tim O’Reilly,28
14,Building a Better Middleman,Q McCallum,27


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [None]:
###  ---  ###