# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [3]:
import feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [29]:
nasa = feedparser.parse('https://www.nasa.gov/rss/dyn/onthestation_rss.rss')

In [30]:
print(nasa['feed'])

{'language': 'en-us', 'title': 'On the Station - Latest News', 'title_detail': {'type': 'text/plain', 'language': 'en', 'base': 'http://www.nasa.gov/', 'value': 'On the Station - Latest News'}, 'subtitle': 'On the Station - Latest News', 'subtitle_detail': {'type': 'text/html', 'language': 'en', 'base': 'http://www.nasa.gov/', 'value': 'On the Station - Latest News'}, 'links': [{'rel': 'alternate', 'type': 'text/html', 'href': 'http://www.nasa.gov/'}, {'rel': 'self', 'href': 'http://www.nasa.gov/rss/dyn/onthestation_rss.rss', 'type': 'application/atom+xml'}], 'link': 'http://www.nasa.gov/'}


### 2. Obtain a list of components (keys) that are available for this feed.

In [31]:
nasa.keys()

dict_keys(['bozo', 'entries', 'feed', 'headers', 'href', 'status', 'encoding', 'version', 'namespaces'])

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [32]:
nasa.feed.keys()

dict_keys(['language', 'title', 'title_detail', 'subtitle', 'subtitle_detail', 'links', 'link'])

### 4. Extract and print the feed title, subtitle, author, and link.

In [41]:
# print (nasa.feed.author)
print ('')
print (nasa.feed.link)
print ('')
print (nasa.feed.title)
print ('')
print (nasa.feed.subtitle)


http://www.nasa.gov/

On the Station - Latest News

On the Station - Latest News


### 5. Count the number of entries that are contained in this RSS feed.

In [42]:
len(nasa.entries)

10

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [44]:
nasa.entries[0]

{'title': 'Space Station Science Highlights: Week of April 24, 2023',
 'title_detail': {'type': 'text/plain',
  'language': 'en',
  'base': 'http://www.nasa.gov/',
  'value': 'Space Station Science Highlights: Week of April 24, 2023'},
 'links': [{'rel': 'alternate',
   'type': 'text/html',
   'href': 'http://www.nasa.gov/mission_pages/station/research/news/space-station-science-highlights-24apr23'},
  {'length': '2224270',
   'type': 'image/jpeg',
   'href': 'http://www.nasa.gov/sites/default/files/styles/1x1_cardfeed/public/thumbnails/image/canadarm.jpg?itok=vVHvKY0-',
   'rel': 'enclosure'}],
 'link': 'http://www.nasa.gov/mission_pages/station/research/news/space-station-science-highlights-24apr23',
 'summary': 'Crew members aboard the International Space Station conducted scientific investigations during the week of April 24.',
 'summary_detail': {'type': 'text/html',
  'language': 'en',
  'base': 'http://www.nasa.gov/',
  'value': 'Crew members aboard the International Space Stati

### 7. Extract a list of entry titles.

In [45]:
titles = [nasa.entries[i].title for i in range(len(nasa.entries))]
print(titles)

['Space Station Science Highlights: Week of April 24, 2023', 'Space Station Science Highlights: Week of April 17, 2023', 'Space Station Studies Help Monitor Climate Change', 'NASA Teams Persevere Through Plant Challenges in Space', 'Space Station Science Highlights: Week of April 10, 2023', "NASA, SpaceX's 27th Resupply Mission Returns Science Samples for Study", 'Space Station Science Highlights: Week of April 3, 2023', 'Space Station Science Highlights: Week of March 27, 2023', 'Celebrating Women’s History Month: Female Space Station Crew Members', 'Space Station Science Highlights: Week of March 20, 2023']


### 8. Calculate the percentage of "Four short links" entry titles.

In [58]:
# Cuenta el número de entradas en el feed con títulos que contienen "Four short links"
num_entries_with_fsl = 0
for entry in nasa.entries:
    if 'Four short links' in entry.title:
        num_entries_with_fsl += 1

percentage_with_fsl = (num_entries_with_fsl / len(nasa.entries)) * 100 #porcentaje en relacion a las entries totales

print('Porcentaje de entradas que contienen "Four short links": {}%'.format(percentage_with_fsl))

Porcentaje de entradas que contienen "Four short links": 0.0%


### 9. Create a Pandas data frame from the feed's entries.

In [46]:
import pandas as pd

df = pd.DataFrame(nasa.entries)
df.head()

Unnamed: 0,title,title_detail,links,link,summary,summary_detail,id,guidislink,published,published_parsed,source
0,Space Station Science Highlights: Week of Apri...,"{'type': 'text/plain', 'language': 'en', 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://www.nasa.gov/mission_pages/station/rese...,Crew members aboard the International Space St...,"{'type': 'text/html', 'language': 'en', 'base'...",http://www.nasa.gov/mission_pages/station/rese...,False,"Fri, 28 Apr 2023 14:00 EDT","(2023, 4, 28, 18, 0, 0, 4, 118, 0)",{'href': 'http://www.nasa.gov/rss/dyn/onthesta...
1,Space Station Science Highlights: Week of Apri...,"{'type': 'text/plain', 'language': 'en', 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://www.nasa.gov/mission_pages/station/rese...,Crew members aboard the International Space St...,"{'type': 'text/html', 'language': 'en', 'base'...",http://www.nasa.gov/mission_pages/station/rese...,False,"Fri, 21 Apr 2023 13:00 EDT","(2023, 4, 21, 17, 0, 0, 4, 111, 0)",{'href': 'http://www.nasa.gov/rss/dyn/onthesta...
2,Space Station Studies Help Monitor Climate Change,"{'type': 'text/plain', 'language': 'en', 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://www.nasa.gov/mission_pages/station/rese...,"On Earth Day, people focus on the well-being o...","{'type': 'text/html', 'language': 'en', 'base'...",http://www.nasa.gov/mission_pages/station/rese...,False,"Fri, 21 Apr 2023 09:00 EDT","(2023, 4, 21, 13, 0, 0, 4, 111, 0)",{'href': 'http://www.nasa.gov/rss/dyn/onthesta...
3,NASA Teams Persevere Through Plant Challenges ...,"{'type': 'text/plain', 'language': 'en', 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://www.nasa.gov/feature/nasa-teams-perseve...,"Dr. Gioia Massa, life sciences project scienti...","{'type': 'text/html', 'language': 'en', 'base'...",http://www.nasa.gov/feature/nasa-teams-perseve...,False,"Thu, 20 Apr 2023 12:53 EDT","(2023, 4, 20, 16, 53, 0, 3, 110, 0)",{'href': 'http://www.nasa.gov/rss/dyn/onthesta...
4,Space Station Science Highlights: Week of Apri...,"{'type': 'text/plain', 'language': 'en', 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://www.nasa.gov/mission_pages/station/rese...,Crew members aboard the International Space St...,"{'type': 'text/html', 'language': 'en', 'base'...",http://www.nasa.gov/mission_pages/station/rese...,False,"Fri, 14 Apr 2023 14:30 EDT","(2023, 4, 14, 18, 30, 0, 4, 104, 0)",{'href': 'http://www.nasa.gov/rss/dyn/onthesta...


### 10. Count the number of entries per author and sort them in descending order.

In [53]:
entries_author = {}

for entry in nasa.entries:
    author = entry.get('author', 'Desconocido')  #como no he econtrado autores, le digo que meta las entries en autor desconocido
    if author not in entries_author:
        entries_author[author] = 1
    else:
        entries_author[author] += 1

sorted_entries_author = dict(sorted(entries_author.items(), key=lambda x: x[1], reverse=True))

for author, num_entries in sorted_entries_author.items():
    print('{}: {}'.format(author, num_entries))

Desconocido: 10


### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [55]:
df['title_length'] = df['title'].apply(len)
df = df[['title', 'title_length']].sort_values('title_length', ascending=False)

print(df)

                                               title  title_length
5  NASA, SpaceX's 27th Resupply Mission Returns S...            70
8  Celebrating Women’s History Month: Female Spac...            68
0  Space Station Science Highlights: Week of Apri...            56
1  Space Station Science Highlights: Week of Apri...            56
4  Space Station Science Highlights: Week of Apri...            56
7  Space Station Science Highlights: Week of Marc...            56
9  Space Station Science Highlights: Week of Marc...            56
6  Space Station Science Highlights: Week of Apri...            55
3  NASA Teams Persevere Through Plant Challenges ...            54
2  Space Station Studies Help Monitor Climate Change            49


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [59]:
machine_learning_entries = [entry.title for entry in nasa.entries if 'machine learning' in entry.summary.lower()]
print(machine_learning_entries)

[]
