# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
import feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [2]:
url = 'https://feeds.simplecast.com/54nAGcIl'

In [3]:
rss = feedparser.parse('https://feeds.simplecast.com/54nAGcIl')

### 2. Obtain a list of components (keys) that are available for this feed.

In [4]:
rss.keys()

dict_keys(['bozo', 'entries', 'feed', 'headers', 'etag', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [5]:
rss.feed.keys()

dict_keys(['links', 'generator_detail', 'generator', 'title', 'title_detail', 'subtitle', 'subtitle_detail', 'rights', 'rights_detail', 'language', 'published', 'published_parsed', 'updated', 'updated_parsed', 'image', 'link', 'itunes_type', 'summary', 'summary_detail', 'authors', 'author', 'author_detail', 'itunes_explicit', 'itunes_new-feed-url', 'publisher_detail', 'tags'])

### 4. Extract and print the feed title, subtitle, author, and link.

In [6]:
print (rss.feed.title)
print ('')
print (rss.feed.subtitle)
print ('')
print (rss.feed.author)
print ('')
print (rss.feed.link)

The Daily

This is what the news should sound like. The biggest stories of our time, told by the best journalists in the world. Hosted by Michael Barbaro and Sabrina Tavernise. Twenty minutes a day, five days a week, ready by 6 a.m.

Listen to this podcast in New York Times Audio, our new iOS app for news subscribers. Download now at nytimes.com/audioapp

The New York Times

https://www.nytimes.com/the-daily


### 5. Count the number of entries that are contained in this RSS feed.

In [8]:
len(rss.entries)

1902

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [16]:
list(rss.entries[0].keys())

['id',
 'guidislink',
 'title',
 'title_detail',
 'summary',
 'summary_detail',
 'published',
 'published_parsed',
 'authors',
 'author',
 'author_detail',
 'links',
 'link',
 'content',
 'itunes_title',
 'itunes_duration',
 'subtitle',
 'subtitle_detail',
 'itunes_explicit',
 'itunes_episodetype']

### 7. Extract a list of entry titles.

In [19]:
titles = [rss.entries[i].title for i in range(len(rss.entries))]

titles

['The Secret History of Gun Rights',
 'Italy’s Giorgia Meloni Charts a Path for the Far Right',
 'The Sunday Read: ‘The America That Americans Forget’',
 'Menopause Is Having a Moment',
 'Affirmative Action for the 1 Percent',
 'Hunter Biden’s Day in Court',
 'Russia’s Newest Target: The Global Food Supply',
 'A One-Man Blockade Against the U.S. Military',
 'The Sunday Read: ‘The Trillion- Gallon Question’',
 'Can Barbie Be Rebranded as a Feminist Icon?',
 'The Man Trying to Save Phoenix From Historic Heat',
 'How the Birth Control Pill Got Over the Counter',
 'The Writers’ Revolt Against A.I. Companies',
 'China’s Economic Rebound Hits a Wall',
 'The Sunday Read: ‘The Moral Crisis of America’s Doctors’',
 'How Clarence Thomas Came to Reject Affirmative Action',
 'How Affirmative Action Changed Their Lives',
 'The Great Resignation is Over',
 'Many Countries Banned Cluster Munitions. The U.S. Is Sending Them to Ukraine Anyway.',
 'Will Threads Kill Twitter?',
 'The Sunday Read: ‘The Sp

### 8. Calculate the percentage of "Four short links" entry titles.

### 9. Create a Pandas data frame from the feed's entries.

In [20]:
import pandas as pd

In [21]:
rss_df = pd.DataFrame(rss.entries)
rss_df.head()

Unnamed: 0,id,guidislink,title,title_detail,summary,summary_detail,published,published_parsed,authors,author,...,link,content,itunes_title,itunes_duration,subtitle,subtitle_detail,itunes_explicit,itunes_episodetype,itunes_episode,image
0,3f6e5473-d26a-4c9c-bb97-6e17e5b00260,False,The Secret History of Gun Rights,"{'type': 'text/plain', 'language': None, 'base...","How did the National Rifle Association, Americ...","{'type': 'text/plain', 'language': None, 'base...","Tue, 1 Aug 2023 09:45:00 +0000","(2023, 8, 1, 9, 45, 0, 1, 213, 0)","[{'name': 'The New York Times', 'email': 'thed...",The New York Times,...,https://www.nytimes.com/the-daily,"[{'type': 'text/html', 'language': None, 'base...",The Secret History of Gun Rights,00:26:57,"How did the National Rifle Association, Americ...","{'type': 'text/plain', 'language': None, 'base...",,full,,
1,6a349a51-f9de-4fd0-820d-e8a9f137b199,False,Italy’s Giorgia Meloni Charts a Path for the F...,"{'type': 'text/plain', 'language': None, 'base...","Last year, Giorgia Meloni, an Italian far-righ...","{'type': 'text/plain', 'language': None, 'base...","Mon, 31 Jul 2023 09:45:00 +0000","(2023, 7, 31, 9, 45, 0, 0, 212, 0)","[{'name': 'The New York Times', 'email': 'thed...",The New York Times,...,https://www.nytimes.com/the-daily,"[{'type': 'text/html', 'language': None, 'base...",Italy’s Giorgia Meloni Charts a Path for the F...,00:31:31,"Last year, Giorgia Meloni, an Italian far-righ...","{'type': 'text/plain', 'language': None, 'base...",,full,,
2,b4282e9e-bbb4-43cb-80ca-49a76f0d9c2f,False,The Sunday Read: ‘The America That Americans F...,"{'type': 'text/plain', 'language': None, 'base...","On the weekends, when Roy Gamboa was a little ...","{'type': 'text/plain', 'language': None, 'base...","Sun, 30 Jul 2023 10:00:00 +0000","(2023, 7, 30, 10, 0, 0, 6, 211, 0)","[{'name': 'The New York Times', 'email': 'thed...",The New York Times,...,https://www.nytimes.com/the-daily,"[{'type': 'text/html', 'language': None, 'base...",The Sunday Read: ‘The America That Americans F...,01:43:56,"On the weekends, when Roy Gamboa was a little ...","{'type': 'text/plain', 'language': None, 'base...",,full,,
3,bacbf190-63d7-446c-a92e-4b15823e482f,False,Menopause Is Having a Moment,"{'type': 'text/plain', 'language': None, 'base...",Some of the worst symptoms of menopause — incl...,"{'type': 'text/plain', 'language': None, 'base...","Fri, 28 Jul 2023 09:45:00 +0000","(2023, 7, 28, 9, 45, 0, 4, 209, 0)","[{'name': 'The New York Times', 'email': 'thed...",The New York Times,...,https://www.nytimes.com/the-daily,"[{'type': 'text/html', 'language': None, 'base...",Menopause Is Having a Moment,00:32:08,Some of the worst symptoms of menopause — incl...,"{'type': 'text/plain', 'language': None, 'base...",,full,,
4,3e818886-9ecf-4ca9-992c-ac45d931f420,False,Affirmative Action for the 1 Percent,"{'type': 'text/plain', 'language': None, 'base...",A major new study has revealed just how much e...,"{'type': 'text/plain', 'language': None, 'base...","Thu, 27 Jul 2023 09:45:00 +0000","(2023, 7, 27, 9, 45, 0, 3, 208, 0)","[{'name': 'The New York Times', 'email': 'thed...",The New York Times,...,https://www.nytimes.com/the-daily,"[{'type': 'text/html', 'language': None, 'base...",Affirmative Action for the 1 Percent,00:38:24,A major new study has revealed just how much e...,"{'type': 'text/plain', 'language': None, 'base...",,full,,


### 10. Count the number of entries per author and sort them in descending order.

In [22]:
authors = rss_df.groupby('author', as_index=False).agg({'title':'count'})
authors.columns = ['author', 'entries']
authors.sort_values('entries', ascending=False)

Unnamed: 0,author,entries
0,The New York Times,1902


### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [23]:
rss_df['title_length'] = rss_df['title'].apply(len)
rss_df[['title', 'author', 'title_length']].sort_values('title_length', ascending=False).head()

Unnamed: 0,title,author,title_length
49,"Special Episode: A Crash Course in Dembow, a M...",The New York Times,114
731,Bonus: The N-Word is Both Unspeakable and Ubiq...,The New York Times,109
778,The Sunday Read: ‘The Amateur Cloud Society Th...,The New York Times,92
455,The Sunday Read: ‘Animals That Infect Humans A...,The New York Times,92
343,"The Sunday Read: ‘How Houston Moved 25,000 Peo...",The New York Times,91


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [24]:
machine_learning = []

In [27]:
for entry in rss.entries:
    
    if 'machine_learning' in entry.summary.lower():
        
        machine_learning.append(entry.title)

machine_learning

[]