# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
import feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [2]:
url = 'https://podcastfeeds.nbcnews.com/HL4TzgYC'

In [3]:
x = feedparser.parse('https://podcastfeeds.nbcnews.com/HL4TzgYC')

### 2. Obtain a list of components (keys) that are available for this feed.

In [4]:
x.keys()

dict_keys(['bozo', 'entries', 'feed', 'headers', 'etag', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [5]:
x.feed.keys()

dict_keys(['links', 'generator_detail', 'generator', 'title', 'title_detail', 'subtitle', 'subtitle_detail', 'rights', 'rights_detail', 'language', 'published', 'published_parsed', 'updated', 'updated_parsed', 'image', 'link', 'itunes_type', 'summary', 'summary_detail', 'authors', 'author', 'author_detail', 'itunes_explicit', 'itunes_new-feed-url', 'tags', 'publisher_detail'])

### 4. Extract and print the feed title, subtitle, author, and link.

In [6]:
print (x.feed.title)
print ('')
print (x.feed.subtitle)
print ('')
print (x.feed.author)
print ('')
print (x.feed.link)

Dateline NBC

Current and classic episodes, featuring compelling true-crime mysteries, powerful documentaries and in-depth investigations.

NBC News

https://www.nbcnews.com/dateline


### 5. Count the number of entries that are contained in this RSS feed.

In [18]:
len(x.entries)

454

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [10]:
list(x.entries[0].keys())

['id',
 'guidislink',
 'title',
 'title_detail',
 'summary',
 'summary_detail',
 'published',
 'published_parsed',
 'authors',
 'author',
 'author_detail',
 'links',
 'link',
 'content',
 'itunes_title',
 'itunes_duration',
 'subtitle',
 'subtitle_detail',
 'tags',
 'itunes_explicit',
 'itunes_episodetype']

### 7. Extract a list of entry titles.

In [11]:
[x.entries[i].title for i in range(len(x.entries))]

['Circle of Friends',
 'Deadly Hollywood Dreams',
 'On the Trail of the Bike Path Rapist',
 'Diabolical',
 'Death in the Hollywood Hills',
 'The Trap',
 'A Long, Dark Stretch of Road',
 'Into Thin Air',
 'Something Wicked',
 'What Happened to Tara Grant?',
 'Behind the Closet Door',
 'What happened to Dia Abrams? | Dateline: Missing in America',
 'The Mystery of Katrina Montgomery',
 'What happened to Logan Schiendelman? | Dateline: Missing in America',
 'Deadly Secret',
 "Father's Day",
 'Bad Intentions',
 'What happened to Elizabeth Ann Gill? | Dateline: Missing in America',
 'The Alibi',
 'What happened to Jasmine Robinson? | Dateline: Missing in America',
 'Secrets in the Ozarks',
 'Bodies of Evidence',
 'What happened to Nevaeh Kingbird? | Dateline: Missing in America',
 "The House on Sidney's Cove",
 'What happened to Kent Jacobs? | Dateline: Missing in America',
 'The Model & the Millionaire',
 'An all-new season of ‘Dateline Missing in America’',
 'The Killings on King Road',
 

### 8. Calculate the percentage of "Four short links" entry titles.

### 9. Create a Pandas data frame from the feed's entries.

In [12]:
import pandas as pd

In [13]:
df = pd.DataFrame(x.entries)
df.head()

Unnamed: 0,id,guidislink,title,title_detail,summary,summary_detail,published,published_parsed,authors,author,...,content,itunes_title,itunes_duration,subtitle,subtitle_detail,tags,itunes_explicit,itunes_episodetype,image,itunes_episode
0,34c72474-9e16-4dec-9d90-e3b7ab47ac42,False,Circle of Friends,"{'type': 'text/plain', 'language': None, 'base...","In this Dateline classic, a circle of friends ...","{'type': 'text/plain', 'language': None, 'base...","Tue, 1 Aug 2023 10:00:00 +0000","(2023, 8, 1, 10, 0, 0, 1, 213, 0)","[{'name': 'NBC News', 'email': 'nbcnews.podcas...",NBC News,...,"[{'type': 'text/html', 'language': None, 'base...",Circle of Friends,00:41:49,"In this Dateline classic, a circle of friends ...","{'type': 'text/plain', 'language': None, 'base...","[{'term': 'gainesville', 'scheme': 'http://www...",,full,,
1,2ec51ada-c679-4703-b2a8-97c220649343,False,Deadly Hollywood Dreams,"{'type': 'text/plain', 'language': None, 'base...","In this Dateline classic, Keith Morrison tells...","{'type': 'text/plain', 'language': None, 'base...","Wed, 26 Jul 2023 10:00:00 +0000","(2023, 7, 26, 10, 0, 0, 2, 207, 0)","[{'name': 'NBC News', 'email': 'nbcnews.podcas...",NBC News,...,"[{'type': 'text/html', 'language': None, 'base...",Deadly Hollywood Dreams,00:19:32,"In this Dateline classic, Keith Morrison tells...","{'type': 'text/plain', 'language': None, 'base...","[{'term': 'bill bradford', 'scheme': 'http://w...",,full,,
2,5bd7df29-92f0-45ee-99a7-81dd043d05ea,False,On the Trail of the Bike Path Rapist,"{'type': 'text/plain', 'language': None, 'base...","In this Dateline classic, Keith Morrison repor...","{'type': 'text/plain', 'language': None, 'base...","Tue, 25 Jul 2023 10:00:00 +0000","(2023, 7, 25, 10, 0, 0, 1, 206, 0)","[{'name': 'NBC News', 'email': 'nbcnews.podcas...",NBC News,...,"[{'type': 'text/html', 'language': None, 'base...",On the Trail of the Bike Path Rapist,00:40:12,"In this Dateline classic, Keith Morrison repor...","{'type': 'text/plain', 'language': None, 'base...","[{'term': 'clarence', 'scheme': 'http://www.it...",,full,,
3,c7b882d1-b153-4eef-a984-8110fe0bfd56,False,Diabolical,"{'type': 'text/plain', 'language': None, 'base...",A bizarre cyber-revenge case lands a young wom...,"{'type': 'text/plain', 'language': None, 'base...","Wed, 19 Jul 2023 10:00:00 +0000","(2023, 7, 19, 10, 0, 0, 2, 200, 0)","[{'name': 'NBC News', 'email': 'nbcnews.podcas...",NBC News,...,"[{'type': 'text/html', 'language': None, 'base...",Diabolical,00:41:41,A bizarre cyber-revenge case lands a young wom...,"{'type': 'text/plain', 'language': None, 'base...","[{'term': 'michelle hadley', 'scheme': 'http:/...",,full,,
4,dff93a73-f3b8-416c-aa7b-b4c77286218d,False,Death in the Hollywood Hills,"{'type': 'text/plain', 'language': None, 'base...","In this Dateline classic, a predator strikes i...","{'type': 'text/plain', 'language': None, 'base...","Tue, 18 Jul 2023 10:00:00 +0000","(2023, 7, 18, 10, 0, 0, 1, 199, 0)","[{'name': 'NBC News', 'email': 'nbcnews.podcas...",NBC News,...,"[{'type': 'text/html', 'language': None, 'base...",Death in the Hollywood Hills,00:39:00,"In this Dateline classic, a predator strikes i...","{'type': 'text/plain', 'language': None, 'base...","[{'term': 'michigan', 'scheme': 'http://www.it...",,full,,


### 10. Count the number of entries per author and sort them in descending order.

In [14]:
authors = df.groupby('author', as_index=False).agg({'title':'count'})
authors.columns = ['author', 'entries']
authors.sort_values('entries', ascending=False)

Unnamed: 0,author,entries
1,NBC News,453
0,Keith Morrison,1


### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [15]:
df['title_length'] = df['title'].apply(len)
df[['title', 'author', 'title_length']].sort_values('title_length', ascending=False).head()

Unnamed: 0,title,author,title_length
17,What happened to Elizabeth Ann Gill? | Datelin...,NBC News,67
13,What happened to Logan Schiendelman? | Datelin...,NBC News,67
69,"Dr. Seuss' ""How The Grinch Stole Christmas!"" r...",NBC News,67
19,What happened to Jasmine Robinson? | Dateline:...,NBC News,65
22,What happened to Nevaeh Kingbird? | Dateline: ...,NBC News,64


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [19]:


machine_learning_entries = []
for entry in x.entries:
    if 'machine learning' in entry.summary.lower():
        machine_learning_entries.append(entry.title)

print(machine_learning_entries)

[]
