# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [5]:
%pip install feedparser
import feedparser as fp

Note: you may need to restart the kernel to use updated packages.


### 1. Use feedparser to parse the following RSS feed URL.

In [3]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [7]:
feed = fp.parse(url)

### 2. Obtain a list of components (keys) that are available for this feed.

In [None]:
feed.keys()

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [37]:
feed['feed'].keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator'])

### 4. Extract and print the feed title, subtitle, author, and link.

In [102]:
print('title: ', feed['feed']['title'])

print()

print('subtitle: ', feed['feed']['subtitle'])

print()

print('author: ', feed['entries'][0]['author'])

print()
print('link: ', feed['entries'][0]['link'])

title:  Radar

subtitle:  Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology

author:  Q McCallum

link:  https://www.oreilly.com/radar/automating-the-automators-shift-change-in-the-robot-factory/


### 5. Count the number of entries that are contained in this RSS feed.

In [106]:
len(feed['entries'])

15

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [111]:
feed['entries'][0].keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments'])

### 7. Extract a list of entry titles.

In [117]:
lst = [e['title'] for e in feed['entries']]
lst

['Automating the Automators: Shift Change in the Robot Factory',
 'Digesting 2022',
 'Radar Trends to Watch: January 2023',
 'What Does Copyright Say about Generative Models?',
 'Radar Trends to Watch: December 2022',
 'AI’s ‘SolarWinds Moment’ Will Occur; It’s Just a Matter of When',
 'Technical Health Isn’t Optional',
 'Healthy Data',
 'Formal Informal Languages',
 'Radar Trends to Watch: November 2022',
 'What We Learned Auditing Sophisticated AI for Bias',
 'The Collaborative Metaverse',
 'What Is Hyperautomation?',
 'Radar Trends to Watch: October 2022',
 'The Problem with Intelligence']

### 8. Calculate the percentage of "Four short links" entry titles.

### 9. Create a Pandas data frame from the feed's entries.

In [126]:
import pandas as pd

In [138]:
df = pd.DataFrame(feed['entries'])
df

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments
0,Automating the Automators: Shift Change in the...,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/automating-the-a...,https://www.oreilly.com/radar/automating-the-a...,"Tue, 17 Jan 2023 11:33:31 +0000","(2023, 1, 17, 11, 33, 31, 1, 17, 0)",[{'name': 'Q McCallum'}],Q McCallum,{'name': 'Q McCallum'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=14841,False,What would you say is the job of a software de...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/automating-the-a...,0
1,Digesting 2022,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/digesting-2022/,https://www.oreilly.com/radar/digesting-2022/#...,"Tue, 10 Jan 2023 13:37:13 +0000","(2023, 1, 10, 13, 37, 13, 1, 10, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=14837,False,Although I don’t subscribe to the idea that hi...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/digesting-2022/f...,0
2,Radar Trends to Watch: January 2023,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/radar-trends-to-...,https://www.oreilly.com/radar/radar-trends-to-...,"Wed, 04 Jan 2023 11:53:08 +0000","(2023, 1, 4, 11, 53, 8, 2, 4, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Radar Trends', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=14826,False,"Perhaps unsurprisingly, December was a slow mo...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...,0
3,What Does Copyright Say about Generative Models?,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/what-does-copyri...,https://www.oreilly.com/radar/what-does-copyri...,"Tue, 13 Dec 2022 12:22:38 +0000","(2022, 12, 13, 12, 22, 38, 1, 347, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Artificial Intelligence', 'scheme':...",https://www.oreilly.com/radar/?p=14806,False,The current generation of flashy AI applicatio...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/what-does-copyri...,0
4,Radar Trends to Watch: December 2022,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/radar-trends-to-...,https://www.oreilly.com/radar/radar-trends-to-...,"Tue, 06 Dec 2022 12:21:48 +0000","(2022, 12, 6, 12, 21, 48, 1, 340, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Radar Trends', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=14799,False,This month’s news has been overshadowed by the...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...,0
5,AI’s ‘SolarWinds Moment’ Will Occur; It’s Just...,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/ais-solarwinds-m...,https://www.oreilly.com/radar/ais-solarwinds-m...,"Tue, 29 Nov 2022 12:36:46 +0000","(2022, 11, 29, 12, 36, 46, 1, 333, 0)",[{'name': 'Mike Barlow'}],Mike Barlow,{'name': 'Mike Barlow'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=14792,False,Major catastrophes can transform industries an...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/ais-solarwinds-m...,0
6,Technical Health Isn’t Optional,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/technical-health...,https://www.oreilly.com/radar/technical-health...,"Tue, 22 Nov 2022 12:25:18 +0000","(2022, 11, 22, 12, 25, 18, 1, 326, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Open Source', 'scheme': None, 'labe...",https://www.oreilly.com/radar/?p=14780,False,"If every company is a technology company, then...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/technical-health...,0
7,Healthy Data,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/healthy-data/,https://www.oreilly.com/radar/healthy-data/#re...,"Tue, 15 Nov 2022 15:18:53 +0000","(2022, 11, 15, 15, 18, 53, 1, 319, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Data', 'scheme': None, 'label': Non...",https://www.oreilly.com/radar/?p=14784,False,"This summer, we started asking about “technica...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/healthy-data/feed/,0
8,Formal Informal Languages,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/formal-informal-...,https://www.oreilly.com/radar/formal-informal-...,"Tue, 08 Nov 2022 11:58:09 +0000","(2022, 11, 8, 11, 58, 9, 1, 312, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Programming', 'scheme': None, 'labe...",https://www.oreilly.com/radar/?p=14772,False,We’ve all been impressed by the generative art...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/formal-informal-...,0
9,Radar Trends to Watch: November 2022,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/radar-trends-to-...,https://www.oreilly.com/radar/radar-trends-to-...,"Tue, 01 Nov 2022 11:15:57 +0000","(2022, 11, 1, 11, 15, 57, 1, 305, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Radar Trends', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=14760,False,Maintaining a separate category for AI is gett...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...,0


### 10. Count the number of entries per author and sort them in descending order.

In [165]:
df.pivot_table(index = 'author', values = 'title', aggfunc = 'count').sort_values('title',ascending=False)

Unnamed: 0_level_0,title
author,Unnamed: 1_level_1
Mike Loukides,12
Mike Barlow,1
Patrick Hall,1
Q McCallum,1


### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [168]:
lst = [len(e) for e in df['title']]
df['len']= lst

### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [194]:
titles = []
string = 'machine learning'
for e in feed['entries']:
    for k,v in e.items():
        try : 
            if string in v:
                titles.append(e['title'])
        except: pass
titles

['Radar Trends to Watch: October 2022']