# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
import feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [2]:
import feedparser

url = "https://www.lanasa.net/rss"  

rss = feedparser.parse(url)

# Aquí puedes comenzar a trabajar con el contenido del feed, por ejemplo:
# for entry in feed.entries:
#    print(entry.title)
#    print(entry.link)
#    print(entry.published)

### 2. Obtain a list of components (keys) that are available for this feed.

In [3]:
rss.keys()
#shif + enter

dict_keys(['bozo', 'entries', 'feed', 'headers', 'href', 'status', 'encoding', 'bozo_exception', 'version', 'namespaces'])

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [4]:
rss['feed'].keys()

dict_keys(['html', 'meta', 'links', 'script', 'style', 'source', 'summary'])

### 4. Extract and print the feed title, subtitle, author, and link.

In [5]:
import feedparser

url = 'https://www.lanasa.net/rss'

rss = feedparser.parse(url)

meta = rss.feed.meta #title me da error, lo sustituyo por meta
subtitle = rss.feed.html #subtitle me da error, lo sustituyo por html
links = rss.feed.links

print("Título del feed:", meta)
print("Subtítulo del feed:", subtitle)
print("Enlace del feed:", links)



Título del feed: {'name': 'viewport', 'content': 'width=device-width, initial-scale=1'}
Subtítulo del feed: {'class': 'no-js', 'lang': 'en'}
Enlace del feed: [{'rel': 'shortcut icon', 'href': 'https://www.lanasa.net/application/files/6616/4720/4828/favicon.ico', 'type': 'image/x-icon'}, {'rel': 'icon', 'href': 'https://www.lanasa.net/application/files/6616/4720/4828/favicon.ico', 'type': 'image/x-icon'}, {'rel': 'canonical', 'href': 'https://www.lanasa.net/page_not_found', 'type': 'text/html'}, {'href': 'https://www.lanasa.net/updates/concrete5-8.5.1/concrete/css/font-awesome.css?ccm_nocache=ca17add95b11d1baf3d74fcd3fa4637fb02c14ba', 'rel': 'stylesheet', 'type': 'text/css', 'media': 'all'}, {'rel': 'stylesheet', 'href': 'https://www.lanasa.net/packages/theme_nasanet/themes/nasanet/css/bootstrap-theme.min.css', 'type': 'text/html'}, {'rel': 'stylesheet', 'href': 'https://www.lanasa.net/packages/theme_nasanet/themes/nasanet/css/star.css', 'type': 'text/html'}, {'rel': 'stylesheet', 'href

### 5. Count the number of entries that are contained in this RSS feed.

In [8]:
len(rss['entries'])


0

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [12]:
import feedparser

url = "https://www.lanasa.net/rss"
rss = feedparser.parse(url)

# Verificar si existen entradas en el feed
if rss.entries:
    # Accediendo a las claves de la primera entrada
    claves_entrada = rss.entries[0].keys()

    # Imprimiendo la lista de claves
    print("Lista de claves para la primera entrada:")
    for clave in claves_entrada:
        print(clave)
else:
    print("No se encontraron entradas en el feed RSS.")

No se encontraron entradas en el feed RSS.


### 7. Extract a list of entry titles.

In [14]:
if rss.entries:
    # Extraer una lista de títulos de las entradas
    titulos_entradas = [entry.title for entry in rss.entries]

    # Imprimir la lista de títulos
    print("Lista de títulos de las entradas:")
    for titulo in titulos_entradas:
        print(titulo)
else:
    print("No se encontraron entradas en el feed RSS.")

No se encontraron entradas en el feed RSS.


### 8. Calculate the percentage of "Four short links" entry titles.

In [16]:
import feedparser

url = "https://www.lanasa.net/rss"
rss = feedparser.parse(url)

entry_count = len(rss.entries) if rss.entries else 0
four_short_links_count = 0

for entry in rss.entries:
    if "Four short links" in entry.title:
        four_short_links_count += 1

percentage = (four_short_links_count / entry_count) * 100 if entry_count != 0 else 0

print("Percentage of 'Four short links' entry titles:", percentage)

Percentage of 'Four short links' entry titles: 0


### 9. Create a Pandas data frame from the feed's entries.

In [18]:
import feedparser
import pandas as pd

url = "https://www.lanasa.net/rss"
rss = feedparser.parse(url)

data = []

for entry in rss.entries:
    data.append({'Title': entry.title, 'Published Date': entry.published, 'Summary': entry.summary, 'Link': entry.link})

df = pd.DataFrame(data)

print(df.head())
#en la web elegida no hay entries por eso no dá resultados vacíos pero dejo el código.

Empty DataFrame
Columns: []
Index: []


### 10. Count the number of entries per author and sort them in descending order.

In [21]:
import feedparser
from collections import Counter

url = "https://www.lanasa.net/rss"
rss = feedparser.parse(url)

author_counts = Counter(entry.author for entry in rss.entries) if rss.entries else Counter()

author_counts_sorted = dict(sorted(author_counts.items(), key=lambda x: x[1], reverse=True))

for author, count in author_counts_sorted.items():
    print(f"Author: {author}, Entry Count: {count}")
#en la web elegida no hay entries por eso no dá resultados pero dejo el código.

### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [None]:
url = "https://www.lanasa.net/rss"
rss = feedparser.parse(url)

data = []

for entry in rss.entries:
    data.append({'Title': entry.title, 'Author': entry.author, 'Title Length': len(entry.title)})#añado columna 'Title Length'

df = pd.DataFrame(data)
df_sorted = df.sort_values(by='Title Length', ascending=False) #dame el df en orden descendente, el + longest el primero

print(df_sorted[['Title', 'Author', 'Title Length']]) #imprímela
#en la web elegida no hay entries por eso no dá resultados pero dejo el código.

### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [24]:
import feedparser

url = "https://www.lanasa.net/rss"
rss = feedparser.parse(url)

entry_titles_machine_learning = []

for entry in rss.entries:
    if "machine learning" in entry.summary.lower():
        entry_titles_machine_learning.append(entry.title)

print(entry_titles_machine_learning)

[]
