# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [6]:
%pip install feedparser
import feedparser

Note: you may need to restart the kernel to use updated packages.


### 1. Use feedparser to parse the following RSS feed URL.

In [41]:
nasa= feedparser.parse('https://www.nasa.gov/rss/dyn/solar_system.rss')

In [42]:
print(nasa['feed'])

{'language': 'en-us', 'title': 'Solar System and Beyond', 'title_detail': {'type': 'text/plain', 'language': 'en', 'base': 'http://www.nasa.gov/', 'value': 'Solar System and Beyond'}, 'subtitle': 'A RSS news feed containing the latest NASA press releases on missions exploring our solar system and beyond.', 'subtitle_detail': {'type': 'text/html', 'language': 'en', 'base': 'http://www.nasa.gov/', 'value': 'A RSS news feed containing the latest NASA press releases on missions exploring our solar system and beyond.'}, 'links': [{'rel': 'alternate', 'type': 'text/html', 'href': 'http://www.nasa.gov/'}, {'rel': 'self', 'href': 'http://www.nasa.gov/rss/dyn/solar_system.rss', 'type': 'application/atom+xml'}], 'link': 'http://www.nasa.gov/', 'authors': [{'email': 'jim.wilson@nasa.gov'}], 'author': 'jim.wilson@nasa.gov', 'author_detail': {'email': 'jim.wilson@nasa.gov'}, 'publisher': 'brian.dunbar@nasa.gov', 'publisher_detail': {'email': 'brian.dunbar@nasa.gov'}, 'docs': 'http://blogs.harvard.e

### 2. Obtain a list of components (keys) that are available for this feed.

In [43]:
nasa.keys()

dict_keys(['bozo', 'entries', 'feed', 'headers', 'href', 'status', 'encoding', 'version', 'namespaces'])

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [46]:
print (nasa.feed)


{'language': 'en-us', 'title': 'Solar System and Beyond', 'title_detail': {'type': 'text/plain', 'language': 'en', 'base': 'http://www.nasa.gov/', 'value': 'Solar System and Beyond'}, 'subtitle': 'A RSS news feed containing the latest NASA press releases on missions exploring our solar system and beyond.', 'subtitle_detail': {'type': 'text/html', 'language': 'en', 'base': 'http://www.nasa.gov/', 'value': 'A RSS news feed containing the latest NASA press releases on missions exploring our solar system and beyond.'}, 'links': [{'rel': 'alternate', 'type': 'text/html', 'href': 'http://www.nasa.gov/'}, {'rel': 'self', 'href': 'http://www.nasa.gov/rss/dyn/solar_system.rss', 'type': 'application/atom+xml'}], 'link': 'http://www.nasa.gov/', 'authors': [{'email': 'jim.wilson@nasa.gov'}], 'author': 'jim.wilson@nasa.gov', 'author_detail': {'email': 'jim.wilson@nasa.gov'}, 'publisher': 'brian.dunbar@nasa.gov', 'publisher_detail': {'email': 'brian.dunbar@nasa.gov'}, 'docs': 'http://blogs.harvard.e

### 4. Extract and print the feed title, subtitle, author, and link.

In [49]:
print('Title:', nasa.feed.title)
print('Subtitle:', nasa.feed.subtitle)
print('Author:', nasa.feed.author)
print('Link:', nasa.feed.link)

Title: Solar System and Beyond
Subtitle: A RSS news feed containing the latest NASA press releases on missions exploring our solar system and beyond.
Author: jim.wilson@nasa.gov
Link: http://www.nasa.gov/


### 5. Count the number of entries that are contained in this RSS feed.

In [50]:
num_entries = len(nasa.entries)
print({num_entries})




{10}


### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [51]:
keys = nasa.entries[0].keys()

print(keys)

dict_keys(['title', 'title_detail', 'links', 'link', 'summary', 'summary_detail', 'id', 'guidislink', 'published', 'published_parsed', 'source'])


### 7. Extract a list of entry titles.

In [52]:
titles = [entry.title for entry in nasa.entries]

print(titles)

['NASA Awards Contract to Maintain Webb Telescope Operations', 'NASA’s Big 2022: Historic Moon Mission, Webb Telescope Images, More', 'Experts Available to Discuss NASA Webb Telescope Science Results', 'El impacto de DART cambió el movimiento de un asteroide en el espacio', 'NASA Confirms DART Mission Impact Changed Asteroid’s Motion in Space', 'NASA to Provide Update on DART, World’s First Planetary Defense Test', 'NASA’s DART Mission Hits Asteroid in First-Ever Planetary Defense Test', "Celebrate 'International Observe the Moon Night' with NASA", 'NASA to Host Briefing on Perseverance Mars Rover Mission Operations', 'La NASA invita a la prensa a la primera prueba de defensa planetaria']


### 8. Calculate the percentage of "Four short links" entry titles.

In [53]:
count = sum('Four short links' in entry.title for entry in nasa.entries)

percentage = (count / len(nasa.entries)) * 100

print(percentage)

0.0


### 9. Create a Pandas data frame from the feed's entries.

In [56]:
import pandas as pd



In [75]:
entries_list = []
for entry in nasa.entries:
    entry_dict ={'title': entry.title,
                'link': entry.link,
                'published': entry.published,
                'summary': entry.summary}
        
    
    entries_list.append(entry_dict)


df = pd.DataFrame(entries_list)

df.head()

Unnamed: 0,title,link,published,summary
0,NASA Awards Contract to Maintain Webb Telescop...,http://www.nasa.gov/press-release/nasa-awards-...,"Thu, 15 Dec 2022 16:02 EST",NASA has selected Northrop Grumman Systems Cor...
1,"NASA’s Big 2022: Historic Moon Mission, Webb T...",http://www.nasa.gov/press-release/nasa-s-big-2...,"Tue, 13 Dec 2022 10:22 EST",2022 is one for the history books as NASA caps...
2,Experts Available to Discuss NASA Webb Telesco...,http://www.nasa.gov/press-release/experts-avai...,"Tue, 15 Nov 2022 16:41 EST",Experts from NASA and other institutions will ...
3,El impacto de DART cambió el movimiento de un ...,http://www.nasa.gov/press-release/el-impacto-d...,"Tue, 11 Oct 2022 13:28 EDT",El análisis de los datos obtenidos en las últi...
4,NASA Confirms DART Mission Impact Changed Aste...,http://www.nasa.gov/press-release/nasa-confirm...,"Tue, 11 Oct 2022 13:12 EDT",Analysis of data obtained over the past two we...


### 10. Count the number of entries per author and sort them in descending order.

In [78]:
counts = df['title'].value_counts(ascending=False)

print(counts)

NASA Awards Contract to Maintain Webb Telescope Operations                1
NASA’s Big 2022: Historic Moon Mission, Webb Telescope Images, More       1
Experts Available to Discuss NASA Webb Telescope Science Results          1
El impacto de DART cambió el movimiento de un asteroide en el espacio     1
NASA Confirms DART Mission Impact Changed Asteroid’s Motion in Space      1
NASA to Provide Update on DART, World’s First Planetary Defense Test      1
NASA’s DART Mission Hits Asteroid in First-Ever Planetary Defense Test    1
Celebrate 'International Observe the Moon Night' with NASA                1
NASA to Host Briefing on Perseverance Mars Rover Mission Operations       1
La NASA invita a la prensa a la primera prueba de defensa planetaria      1
Name: title, dtype: int64


### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [83]:
entries_list = []
for entry in nasa.entries:
    entry_dict = {'title': entry.title,
                'link': entry.link,
                'published': entry.published,
                'summary': entry.summary,
                'title': entry.title}
    
    entries_list.append(entry_dict)

In [84]:
df['title_length'] = df['title'].apply(len)


df_sorted = df[['title', 'title_length']].sort_values(by='title_length', ascending=False)

print(df_sorted)

                                               title  title_length
6  NASA’s DART Mission Hits Asteroid in First-Eve...            70
3  El impacto de DART cambió el movimiento de un ...            69
4  NASA Confirms DART Mission Impact Changed Aste...            68
5  NASA to Provide Update on DART, World’s First ...            68
9  La NASA invita a la prensa a la primera prueba...            68
1  NASA’s Big 2022: Historic Moon Mission, Webb T...            67
8  NASA to Host Briefing on Perseverance Mars Rov...            67
2  Experts Available to Discuss NASA Webb Telesco...            64
0  NASA Awards Contract to Maintain Webb Telescop...            58
7  Celebrate 'International Observe the Moon Nigh...            58


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [86]:
entries_list = []


for entry in nasa.entries:
    if 'machine learning' in entry.summary.lower():
        entries_list.append(entry.title)


print(entries_list)

[]
