# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
%pip install feedparser

import feedparser

Note: you may need to restart the kernel to use updated packages.


### 1. Use feedparser to parse the following RSS feed URL.

In [2]:
NBC = feedparser.parse('https://podcastfeeds.nbcnews.com/HL4TzgYC')

In [3]:
print(NBC['feed'])

{'links': [{'href': 'https://feeds.simplecast.com/HL4TzgYC', 'rel': 'self', 'title': 'MP3 Audio', 'type': 'application/atom+xml'}, {'href': 'https://simplecast.superfeedr.com/', 'rel': 'hub', 'type': 'text/html'}, {'rel': 'alternate', 'type': 'text/html', 'href': 'https://www.nbcnews.com/dateline'}], 'generator_detail': {'name': 'https://simplecast.com'}, 'generator': 'https://simplecast.com', 'title': 'Dateline NBC', 'title_detail': {'type': 'text/plain', 'language': None, 'base': 'https://podcastfeeds.nbcnews.com/HL4TzgYC', 'value': 'Dateline NBC'}, 'subtitle': 'Current and classic episodes, featuring compelling true-crime mysteries, powerful documentaries and in-depth investigations.', 'subtitle_detail': {'type': 'text/html', 'language': None, 'base': 'https://podcastfeeds.nbcnews.com/HL4TzgYC', 'value': 'Current and classic episodes, featuring compelling true-crime mysteries, powerful documentaries and in-depth investigations.'}, 'rights': '2019 NBC News', 'rights_detail': {'type':

### 2. Obtain a list of components (keys) that are available for this feed.

In [4]:
NBC.keys()

dict_keys(['bozo', 'entries', 'feed', 'headers', 'etag', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [5]:
NBC.feed.keys()

dict_keys(['links', 'generator_detail', 'generator', 'title', 'title_detail', 'subtitle', 'subtitle_detail', 'rights', 'rights_detail', 'language', 'published', 'published_parsed', 'updated', 'updated_parsed', 'image', 'link', 'itunes_type', 'summary', 'summary_detail', 'authors', 'author', 'author_detail', 'itunes_explicit', 'itunes_new-feed-url', 'tags', 'publisher_detail'])

### 4. Extract and print the feed title, subtitle, author, and link.

In [6]:
print (NBC.feed.title)
print ('')
print (NBC.feed.subtitle)
print ('')
print (NBC.feed.author)
print ('')
print (NBC.feed.link)

Dateline NBC

Current and classic episodes, featuring compelling true-crime mysteries, powerful documentaries and in-depth investigations.

NBC News

https://www.nbcnews.com/dateline


### 5. Count the number of entries that are contained in this RSS feed.

In [7]:
len(NBC.entries)

454

In [8]:
NBC.entries[0]

{'id': '34c72474-9e16-4dec-9d90-e3b7ab47ac42',
 'guidislink': False,
 'title': 'Circle of Friends',
 'title_detail': {'type': 'text/plain',
  'language': None,
  'base': 'https://podcastfeeds.nbcnews.com/HL4TzgYC',
  'value': 'Circle of Friends'},
 'summary': 'In this Dateline classic, a circle of friends from a Miami high school are about to embark on a new beginning at college. But one friend surprises them all. Dennis Murphy reports. Originally aired on NBC on August 22, 2014.',
 'summary_detail': {'type': 'text/plain',
  'language': None,
  'base': 'https://podcastfeeds.nbcnews.com/HL4TzgYC',
  'value': 'In this Dateline classic, a circle of friends from a Miami high school are about to embark on a new beginning at college. But one friend surprises them all. Dennis Murphy reports. Originally aired on NBC on August 22, 2014.'},
 'published': 'Tue, 1 Aug 2023 10:00:00 +0000',
 'published_parsed': time.struct_time(tm_year=2023, tm_mon=8, tm_mday=1, tm_hour=10, tm_min=0, tm_sec=0, tm_w

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [9]:
NBC.entries[0].keys()

dict_keys(['id', 'guidislink', 'title', 'title_detail', 'summary', 'summary_detail', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'links', 'link', 'content', 'itunes_title', 'itunes_duration', 'subtitle', 'subtitle_detail', 'tags', 'itunes_explicit', 'itunes_episodetype'])

### 7. Extract a list of entry titles.

In [10]:
titles_list = []
for entry in NBC.entries:
    titles_list.append(entry['title'])
    
print(titles_list)

['Circle of Friends', 'Deadly Hollywood Dreams', 'On the Trail of the Bike Path Rapist', 'Diabolical', 'Death in the Hollywood Hills', 'The Trap', 'A Long, Dark Stretch of Road', 'Into Thin Air', 'Something Wicked', 'What Happened to Tara Grant?', 'Behind the Closet Door', 'What happened to Dia Abrams? | Dateline: Missing in America', 'The Mystery of Katrina Montgomery', 'What happened to Logan Schiendelman? | Dateline: Missing in America', 'Deadly Secret', "Father's Day", 'Bad Intentions', 'What happened to Elizabeth Ann Gill? | Dateline: Missing in America', 'The Alibi', 'What happened to Jasmine Robinson? | Dateline: Missing in America', 'Secrets in the Ozarks', 'Bodies of Evidence', 'What happened to Nevaeh Kingbird? | Dateline: Missing in America', "The House on Sidney's Cove", 'What happened to Kent Jacobs? | Dateline: Missing in America', 'The Model & the Millionaire', 'An all-new season of ‘Dateline Missing in America’', 'The Killings on King Road', 'Return to the Early Shift',

### 8. Calculate the percentage of "Four short links" entry titles.

In [11]:
titles_list.count("Four short links")

0

### 9. Create a Pandas data frame from the feed's entries.

In [12]:
import pandas as pd

In [13]:
df = pd.DataFrame(NBC.entries)
df.head()

Unnamed: 0,id,guidislink,title,title_detail,summary,summary_detail,published,published_parsed,authors,author,...,content,itunes_title,itunes_duration,subtitle,subtitle_detail,tags,itunes_explicit,itunes_episodetype,image,itunes_episode
0,34c72474-9e16-4dec-9d90-e3b7ab47ac42,False,Circle of Friends,"{'type': 'text/plain', 'language': None, 'base...","In this Dateline classic, a circle of friends ...","{'type': 'text/plain', 'language': None, 'base...","Tue, 1 Aug 2023 10:00:00 +0000","(2023, 8, 1, 10, 0, 0, 1, 213, 0)","[{'name': 'NBC News', 'email': 'nbcnews.podcas...",NBC News,...,"[{'type': 'text/html', 'language': None, 'base...",Circle of Friends,00:41:49,"In this Dateline classic, a circle of friends ...","{'type': 'text/plain', 'language': None, 'base...","[{'term': 'gainesville', 'scheme': 'http://www...",,full,,
1,2ec51ada-c679-4703-b2a8-97c220649343,False,Deadly Hollywood Dreams,"{'type': 'text/plain', 'language': None, 'base...","In this Dateline classic, Keith Morrison tells...","{'type': 'text/plain', 'language': None, 'base...","Wed, 26 Jul 2023 10:00:00 +0000","(2023, 7, 26, 10, 0, 0, 2, 207, 0)","[{'name': 'NBC News', 'email': 'nbcnews.podcas...",NBC News,...,"[{'type': 'text/html', 'language': None, 'base...",Deadly Hollywood Dreams,00:19:32,"In this Dateline classic, Keith Morrison tells...","{'type': 'text/plain', 'language': None, 'base...","[{'term': 'bill bradford', 'scheme': 'http://w...",,full,,
2,5bd7df29-92f0-45ee-99a7-81dd043d05ea,False,On the Trail of the Bike Path Rapist,"{'type': 'text/plain', 'language': None, 'base...","In this Dateline classic, Keith Morrison repor...","{'type': 'text/plain', 'language': None, 'base...","Tue, 25 Jul 2023 10:00:00 +0000","(2023, 7, 25, 10, 0, 0, 1, 206, 0)","[{'name': 'NBC News', 'email': 'nbcnews.podcas...",NBC News,...,"[{'type': 'text/html', 'language': None, 'base...",On the Trail of the Bike Path Rapist,00:40:12,"In this Dateline classic, Keith Morrison repor...","{'type': 'text/plain', 'language': None, 'base...","[{'term': 'clarence', 'scheme': 'http://www.it...",,full,,
3,c7b882d1-b153-4eef-a984-8110fe0bfd56,False,Diabolical,"{'type': 'text/plain', 'language': None, 'base...",A bizarre cyber-revenge case lands a young wom...,"{'type': 'text/plain', 'language': None, 'base...","Wed, 19 Jul 2023 10:00:00 +0000","(2023, 7, 19, 10, 0, 0, 2, 200, 0)","[{'name': 'NBC News', 'email': 'nbcnews.podcas...",NBC News,...,"[{'type': 'text/html', 'language': None, 'base...",Diabolical,00:41:41,A bizarre cyber-revenge case lands a young wom...,"{'type': 'text/plain', 'language': None, 'base...","[{'term': 'michelle hadley', 'scheme': 'http:/...",,full,,
4,dff93a73-f3b8-416c-aa7b-b4c77286218d,False,Death in the Hollywood Hills,"{'type': 'text/plain', 'language': None, 'base...","In this Dateline classic, a predator strikes i...","{'type': 'text/plain', 'language': None, 'base...","Tue, 18 Jul 2023 10:00:00 +0000","(2023, 7, 18, 10, 0, 0, 1, 199, 0)","[{'name': 'NBC News', 'email': 'nbcnews.podcas...",NBC News,...,"[{'type': 'text/html', 'language': None, 'base...",Death in the Hollywood Hills,00:39:00,"In this Dateline classic, a predator strikes i...","{'type': 'text/plain', 'language': None, 'base...","[{'term': 'michigan', 'scheme': 'http://www.it...",,full,,


### 10. Count the number of entries per author and sort them in descending order.

In [14]:
authors = df.groupby('author', as_index=False).agg({'title':'count'})
authors.columns = ['author', 'entries']
authors.sort_values('entries', ascending=False)
authors

Unnamed: 0,author,entries
0,Keith Morrison,1
1,NBC News,453


### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [15]:
df["title_length"] = df["title"].apply(len)
df.head()

Unnamed: 0,id,guidislink,title,title_detail,summary,summary_detail,published,published_parsed,authors,author,...,itunes_title,itunes_duration,subtitle,subtitle_detail,tags,itunes_explicit,itunes_episodetype,image,itunes_episode,title_length
0,34c72474-9e16-4dec-9d90-e3b7ab47ac42,False,Circle of Friends,"{'type': 'text/plain', 'language': None, 'base...","In this Dateline classic, a circle of friends ...","{'type': 'text/plain', 'language': None, 'base...","Tue, 1 Aug 2023 10:00:00 +0000","(2023, 8, 1, 10, 0, 0, 1, 213, 0)","[{'name': 'NBC News', 'email': 'nbcnews.podcas...",NBC News,...,Circle of Friends,00:41:49,"In this Dateline classic, a circle of friends ...","{'type': 'text/plain', 'language': None, 'base...","[{'term': 'gainesville', 'scheme': 'http://www...",,full,,,17
1,2ec51ada-c679-4703-b2a8-97c220649343,False,Deadly Hollywood Dreams,"{'type': 'text/plain', 'language': None, 'base...","In this Dateline classic, Keith Morrison tells...","{'type': 'text/plain', 'language': None, 'base...","Wed, 26 Jul 2023 10:00:00 +0000","(2023, 7, 26, 10, 0, 0, 2, 207, 0)","[{'name': 'NBC News', 'email': 'nbcnews.podcas...",NBC News,...,Deadly Hollywood Dreams,00:19:32,"In this Dateline classic, Keith Morrison tells...","{'type': 'text/plain', 'language': None, 'base...","[{'term': 'bill bradford', 'scheme': 'http://w...",,full,,,23
2,5bd7df29-92f0-45ee-99a7-81dd043d05ea,False,On the Trail of the Bike Path Rapist,"{'type': 'text/plain', 'language': None, 'base...","In this Dateline classic, Keith Morrison repor...","{'type': 'text/plain', 'language': None, 'base...","Tue, 25 Jul 2023 10:00:00 +0000","(2023, 7, 25, 10, 0, 0, 1, 206, 0)","[{'name': 'NBC News', 'email': 'nbcnews.podcas...",NBC News,...,On the Trail of the Bike Path Rapist,00:40:12,"In this Dateline classic, Keith Morrison repor...","{'type': 'text/plain', 'language': None, 'base...","[{'term': 'clarence', 'scheme': 'http://www.it...",,full,,,36
3,c7b882d1-b153-4eef-a984-8110fe0bfd56,False,Diabolical,"{'type': 'text/plain', 'language': None, 'base...",A bizarre cyber-revenge case lands a young wom...,"{'type': 'text/plain', 'language': None, 'base...","Wed, 19 Jul 2023 10:00:00 +0000","(2023, 7, 19, 10, 0, 0, 2, 200, 0)","[{'name': 'NBC News', 'email': 'nbcnews.podcas...",NBC News,...,Diabolical,00:41:41,A bizarre cyber-revenge case lands a young wom...,"{'type': 'text/plain', 'language': None, 'base...","[{'term': 'michelle hadley', 'scheme': 'http:/...",,full,,,10
4,dff93a73-f3b8-416c-aa7b-b4c77286218d,False,Death in the Hollywood Hills,"{'type': 'text/plain', 'language': None, 'base...","In this Dateline classic, a predator strikes i...","{'type': 'text/plain', 'language': None, 'base...","Tue, 18 Jul 2023 10:00:00 +0000","(2023, 7, 18, 10, 0, 0, 1, 199, 0)","[{'name': 'NBC News', 'email': 'nbcnews.podcas...",NBC News,...,Death in the Hollywood Hills,00:39:00,"In this Dateline classic, a predator strikes i...","{'type': 'text/plain', 'language': None, 'base...","[{'term': 'michigan', 'scheme': 'http://www.it...",,full,,,28


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [16]:
list1 = list(df[df["summary"].str.contains("machine learning")]["title"])
list1

[]