# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
%pip install feedparser
import feedparser

Note: you may need to restart the kernel to use updated packages.


### 1. Use feedparser to parse the following RSS feed URL.

In [2]:
url = 'https://www.nasa.gov/rss/dyn/solar_system.rss'

In [3]:
nasa = feedparser.parse(url)
nasa

{'bozo': False,
 'entries': [{'title': 'NASA Awards Contract to Maintain Webb Telescope Operations',
   'title_detail': {'type': 'text/plain',
    'language': 'en',
    'base': 'http://www.nasa.gov/',
    'value': 'NASA Awards Contract to Maintain Webb Telescope Operations'},
   'links': [{'rel': 'alternate',
     'type': 'text/html',
     'href': 'http://www.nasa.gov/press-release/nasa-awards-contract-to-maintain-webb-telescope-operations'},
    {'length': '189751',
     'type': 'image/jpeg',
     'href': 'http://www.nasa.gov/sites/default/files/styles/1x1_cardfeed/public/thumbnails/image/nasa-logo-web-rgb_0.jpg?itok=mrBnB_c9',
     'rel': 'enclosure'}],
   'link': 'http://www.nasa.gov/press-release/nasa-awards-contract-to-maintain-webb-telescope-operations',
   'summary': 'NASA has selected Northrop Grumman Systems Corporation of Redondo Beach, California, to support the James Webb Space Telescope Phase E – Operations and Sustainment contract.',
   'summary_detail': {'type': 'text/ht

### 2. Obtain a list of components (keys) that are available for this feed.

In [4]:
nasa.keys()

dict_keys(['bozo', 'entries', 'feed', 'headers', 'href', 'status', 'encoding', 'version', 'namespaces'])

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [11]:
list(nasa['feed'])

['language',
 'title',
 'title_detail',
 'subtitle',
 'subtitle_detail',
 'links',
 'link',
 'authors',
 'author',
 'author_detail',
 'publisher',
 'publisher_detail',
 'docs']

### 4. Extract and print the feed title, subtitle, author, and link.

In [6]:
print(nasa['feed']['title'])
print(nasa['feed']['subtitle'])
print(nasa['feed']['author'])
print(nasa['feed']['link'])

Solar System and Beyond
A RSS news feed containing the latest NASA press releases on missions exploring our solar system and beyond.
jim.wilson@nasa.gov
http://www.nasa.gov/


### 5. Count the number of entries that are contained in this RSS feed.

In [7]:
len(nasa['entries'])

10

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [8]:
nasa['entries'][0].keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'summary', 'summary_detail', 'id', 'guidislink', 'published', 'published_parsed', 'source'])

### 7. Extract a list of entry titles.

In [14]:
entry_titles = [entry.title for entry in nasa['entries']]
entry_titles

['NASA Awards Contract to Maintain Webb Telescope Operations',
 'NASA’s Big 2022: Historic Moon Mission, Webb Telescope Images, More',
 'Experts Available to Discuss NASA Webb Telescope Science Results',
 'El impacto de DART cambió el movimiento de un asteroide en el espacio',
 'NASA Confirms DART Mission Impact Changed Asteroid’s Motion in Space',
 'NASA to Provide Update on DART, World’s First Planetary Defense Test',
 'NASA’s DART Mission Hits Asteroid in First-Ever Planetary Defense Test',
 "Celebrate 'International Observe the Moon Night' with NASA",
 'NASA to Host Briefing on Perseverance Mars Rover Mission Operations',
 'La NASA invita a la prensa a la primera prueba de defensa planetaria']

### 8. Calculate the percentage of "Four short links" entry titles.

### 9. Create a Pandas data frame from the feed's entries.

In [15]:
import pandas as pd

In [17]:
entries_df = pd.DataFrame(nasa['entries'])
entries_df

Unnamed: 0,title,title_detail,links,link,summary,summary_detail,id,guidislink,published,published_parsed,source
0,NASA Awards Contract to Maintain Webb Telescop...,"{'type': 'text/plain', 'language': 'en', 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://www.nasa.gov/press-release/nasa-awards-...,NASA has selected Northrop Grumman Systems Cor...,"{'type': 'text/html', 'language': 'en', 'base'...",http://www.nasa.gov/press-release/nasa-awards-...,False,"Thu, 15 Dec 2022 16:02 EST","(2022, 12, 15, 21, 2, 0, 3, 349, 0)",{'href': 'http://www.nasa.gov/rss/dyn/solar_sy...
1,"NASA’s Big 2022: Historic Moon Mission, Webb T...","{'type': 'text/plain', 'language': 'en', 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://www.nasa.gov/press-release/nasa-s-big-2...,2022 is one for the history books as NASA caps...,"{'type': 'text/html', 'language': 'en', 'base'...",http://www.nasa.gov/press-release/nasa-s-big-2...,False,"Tue, 13 Dec 2022 10:22 EST","(2022, 12, 13, 15, 22, 0, 1, 347, 0)",{'href': 'http://www.nasa.gov/rss/dyn/solar_sy...
2,Experts Available to Discuss NASA Webb Telesco...,"{'type': 'text/plain', 'language': 'en', 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://www.nasa.gov/press-release/experts-avai...,Experts from NASA and other institutions will ...,"{'type': 'text/html', 'language': 'en', 'base'...",http://www.nasa.gov/press-release/experts-avai...,False,"Tue, 15 Nov 2022 16:41 EST","(2022, 11, 15, 21, 41, 0, 1, 319, 0)",{'href': 'http://www.nasa.gov/rss/dyn/solar_sy...
3,El impacto de DART cambió el movimiento de un ...,"{'type': 'text/plain', 'language': 'en', 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://www.nasa.gov/press-release/el-impacto-d...,El análisis de los datos obtenidos en las últi...,"{'type': 'text/html', 'language': 'en', 'base'...",http://www.nasa.gov/press-release/el-impacto-d...,False,"Tue, 11 Oct 2022 13:28 EDT","(2022, 10, 11, 17, 28, 0, 1, 284, 0)",{'href': 'http://www.nasa.gov/rss/dyn/solar_sy...
4,NASA Confirms DART Mission Impact Changed Aste...,"{'type': 'text/plain', 'language': 'en', 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://www.nasa.gov/press-release/nasa-confirm...,Analysis of data obtained over the past two we...,"{'type': 'text/html', 'language': 'en', 'base'...",http://www.nasa.gov/press-release/nasa-confirm...,False,"Tue, 11 Oct 2022 13:12 EDT","(2022, 10, 11, 17, 12, 0, 1, 284, 0)",{'href': 'http://www.nasa.gov/rss/dyn/solar_sy...
5,"NASA to Provide Update on DART, World’s First ...","{'type': 'text/plain', 'language': 'en', 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://www.nasa.gov/press-release/nasa-to-prov...,"NASA will host a media briefing at 2 p.m. EDT,...","{'type': 'text/html', 'language': 'en', 'base'...",http://www.nasa.gov/press-release/nasa-to-prov...,False,"Fri, 07 Oct 2022 15:34 EDT","(2022, 10, 7, 19, 34, 0, 4, 280, 0)",{'href': 'http://www.nasa.gov/rss/dyn/solar_sy...
6,NASA’s DART Mission Hits Asteroid in First-Eve...,"{'type': 'text/plain', 'language': 'en', 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://www.nasa.gov/press-release/nasa-s-dart-...,"After 10 months flying in space, NASA’s Double...","{'type': 'text/html', 'language': 'en', 'base'...",http://www.nasa.gov/press-release/nasa-s-dart-...,False,"Mon, 26 Sep 2022 20:09 EDT","(2022, 9, 27, 0, 9, 0, 1, 270, 0)",{'href': 'http://www.nasa.gov/rss/dyn/solar_sy...
7,Celebrate 'International Observe the Moon Nigh...,"{'type': 'text/plain', 'language': 'en', 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://www.nasa.gov/press-release/goddard/2022...,The public is invited to participate in NASA’s...,"{'type': 'text/html', 'language': 'en', 'base'...",http://www.nasa.gov/press-release/goddard/2022...,False,"Fri, 23 Sep 2022 10:00 EDT","(2022, 9, 23, 14, 0, 0, 4, 266, 0)",{'href': 'http://www.nasa.gov/rss/dyn/solar_sy...
8,NASA to Host Briefing on Perseverance Mars Rov...,"{'type': 'text/plain', 'language': 'en', 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://www.nasa.gov/press-release/nasa-to-host...,NASA will host a briefing at 11:30 a.m. EDT (8...,"{'type': 'text/html', 'language': 'en', 'base'...",http://www.nasa.gov/press-release/nasa-to-host...,False,"Mon, 12 Sep 2022 09:49 EDT","(2022, 9, 12, 13, 49, 0, 0, 255, 0)",{'href': 'http://www.nasa.gov/rss/dyn/solar_sy...
9,La NASA invita a la prensa a la primera prueba...,"{'type': 'text/plain', 'language': 'en', 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://www.nasa.gov/press-release/la-nasa-invi...,La misión Prueba de redireccionamiento del ast...,"{'type': 'text/html', 'language': 'en', 'base'...",http://www.nasa.gov/press-release/la-nasa-invi...,False,"Tue, 23 Aug 2022 11:47 EDT","(2022, 8, 23, 15, 47, 0, 1, 235, 0)",{'href': 'http://www.nasa.gov/rss/dyn/solar_sy...


### 10. Count the number of entries per author and sort them in descending order.

In [22]:
entries_df['Author'].value_counts().sort_values(ascending=False)

# no existe la columna "Author", asi que dejo el codigo de como seria

KeyError: 'Author'

### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [21]:
entries_df['Title Length'] = entries_df['Title'].apply(len)

new_df = entries_df[['Title', 'Author', 'Title Length']].sort_values(by='Title Length', ascending=False)

# no existen esas columnas, pero pongo el codigo de como seria

KeyError: 'Title'

### 12. Create a list of entry titles whose summary includes the phrase "machine learning."