# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
%pip install feedparser

Note: you may need to restart the kernel to use updated packages.


In [2]:
import feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [3]:
data = feedparser.parse('https://feeds.acast.com/public/shows/5ea17537-f11f-4532-8202-294d976b9d5c')

### 2. Obtain a list of components (keys) that are available for this feed.

In [18]:
print(list(data.keys()))

['bozo', 'entries', 'feed', 'headers', 'etag', 'href', 'status', 'encoding', 'version', 'namespaces']


### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [38]:
data['feed'].keys()

dict_keys(['ttl', 'generator_detail', 'generator', 'title', 'title_detail', 'links', 'link', 'language', 'rights', 'rights_detail', 'authors', 'author', 'author_detail', 'subtitle', 'subtitle_detail', 'summary', 'summary_detail', 'content', 'content_detail', 'itunes_explicit', 'publisher_detail', 'acast_showid', 'acast_showurl', 'acast_signature', 'acast_settings', 'acast_network', 'acast_importedfeed', 'itunes_type', 'image', 'itunes_new-feed-url', 'tags'])

### 4. Extract and print the feed title, subtitle, author, and link.

In [6]:
print(data['feed']['title'])


Unraveled


In [7]:
print(data['feed']['subtitle'])




In [8]:
print(data['feed']['author'])

Warner Bros. Discovery


In [9]:
print(data['feed']['link'])

https://www.discoveryplus.com


### 5. Count the number of entries that are contained in this RSS feed.

In [14]:
len(data['entries'])

42

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [17]:
print(list(data['entries'][0].keys()))

['title', 'title_detail', 'itunes_title', 'published', 'published_parsed', 'itunes_duration', 'links', 'id', 'guidislink', 'itunes_explicit', 'link', 'acast_episodeid', 'acast_episodeurl', 'acast_settings', 'itunes_episodetype', 'itunes_season', 'itunes_episode', 'image', 'summary', 'summary_detail']


### 7. Extract a list of entry titles.

In [20]:
titles = [data.entries[i].title for i in range(len(data.entries))]
print(titles)

['LISK Ep. 9: Breaking - LISK Suspect in Custody', "S5 Ep.5: The Profiler's Dilemma", 'S5 Ep.4: Family Secrets', 'S5 Ep.3: My Friend, The Murderer', 'S5 Ep.2: DJ Freez', 'S5 Ep.1: The Uncatchable Killer', 'Introducing Unraveled: Once a Killer', 'S4 Ep.9: Bob', 'S4 Ep.8: A Second Chance', 'S4 Ep 7: A Surprise Letter', 'S4 Ep.6: Diane', 'S4 Ep.5: Blood and Motive', 'S4 Ep.4: Physical Evidence', 'S4 Ep.3: Off With His Head', 'S4 Ep.2: The Call', 'S4 Ep.1: What Happened to Diane?', 'Introducing Unraveled: Mystery at the Mansion', 'S3 Ep.7: The Gears of the System', 'S3 Ep.6: Unreliable', 'S3 Ep.5: Fighting Fire', 'S3 Ep.4: Take My Word For It', 'S3 Ep.3: It’s Worse Than You Think', 'S3 Ep.2: Expert v Expert', 'S3 Ep.1: Eight Tiny Specks of Blood', 'Introducing Unraveled: Experts on Trial', 'S2 Ep.7: The Aftermath', 'S2 Ep.6: Hiding Out', "S2 Ep.5: The Devil's Web", 'S2 Ep.4: Mother', 'S2 Ep.3: The Extinct Marsupial', 'S2 Ep.2: Rachel', 'S2 Ep.1:Antisense', 'Introducing Unraveled: The Stalk

### 8. Calculate the percentage of "Four short links" entry titles.

### 9. Create a Pandas data frame from the feed's entries.

In [21]:
import pandas as pd

In [32]:
df = pd.DataFrame(data.entries)
df.head()

Unnamed: 0,title,title_detail,itunes_title,published,published_parsed,itunes_duration,links,id,guidislink,itunes_explicit,...,acast_settings,itunes_episodetype,itunes_season,itunes_episode,image,summary,summary_detail,content,subtitle,subtitle_detail
0,LISK Ep. 9: Breaking - LISK Suspect in Custody,"{'type': 'text/plain', 'language': None, 'base...",LISK Ep. 9: Breaking - LISK Suspect in Custody,"Mon, 17 Jul 2023 09:00:52 GMT","(2023, 7, 17, 9, 0, 52, 0, 198, 0)",47:38,"[{'length': '45745024', 'type': 'audio/mpeg', ...",64b48ed671b6290011e26cd7,False,,...,8+JWyvRbH7rg8keQDyu9F1HQVthSOQl0D9lW6WMdkKtYAm...,full,1,9,{'href': 'https://assets.pippa.io/shows/61b764...,<p>Law enforcement has made an arrest in the c...,"{'type': 'text/html', 'language': None, 'base'...",,,
1,S5 Ep.5: The Profiler's Dilemma,"{'type': 'text/plain', 'language': None, 'base...",S5 Ep.5: The Profiler's Dilemma,"Wed, 20 Apr 2022 07:00:31 GMT","(2022, 4, 20, 7, 0, 31, 2, 110, 0)",43:04,"[{'length': '68680535', 'type': 'audio/mpeg', ...",6219601e5cdbb4001256445d,False,,...,8+JWyvRbH7rg8keQDyu9F1HQVthSOQl0D9lW6WMdkKtYAm...,full,5,5,{'href': 'https://assets.pippa.io/shows/61b764...,<p>One-and-done killers have exposed massive s...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",,
2,S5 Ep.4: Family Secrets,"{'type': 'text/plain', 'language': None, 'base...",S5 Ep.4: Family Secrets,"Wed, 13 Apr 2022 07:00:26 GMT","(2022, 4, 13, 7, 0, 26, 2, 103, 0)",41:39,"[{'length': '68566270', 'type': 'audio/mpeg', ...",62195f32503d1700125f4eb8,False,,...,8+JWyvRbH7rg8keQDyu9F1HQVthSOQl0D9lW6WMdkKtYAm...,full,5,4,{'href': 'https://assets.pippa.io/shows/61b764...,<p>The disturbing cases of Raymond Rowe and Wi...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",,
3,"S5 Ep.3: My Friend, The Murderer","{'type': 'text/plain', 'language': None, 'base...","S5 Ep.3: My Friend, The Murderer","Wed, 06 Apr 2022 07:00:48 GMT","(2022, 4, 6, 7, 0, 48, 2, 96, 0)",39:09,"[{'length': '64611011', 'type': 'audio/mpeg', ...",62195e501ea237001277ddd9,False,,...,8+JWyvRbH7rg8keQDyu9F1HQVthSOQl0D9lW6WMdkKtYAm...,full,5,3,{'href': 'https://assets.pippa.io/shows/61b764...,<p>A deeper dive into the one-and-done-killer ...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",,
4,S5 Ep.2: DJ Freez,"{'type': 'text/plain', 'language': None, 'base...",S5 Ep.2: DJ Freez,"Wed, 30 Mar 2022 07:00:38 GMT","(2022, 3, 30, 7, 0, 38, 2, 89, 0)",44:02,"[{'length': '71061082', 'type': 'audio/mpeg', ...",62195d7ebd60d400145b3db1,False,,...,8+JWyvRbH7rg8keQDyu9F1HQVthSOQl0D9lW6WMdkKtYAm...,full,5,2,{'href': 'https://assets.pippa.io/shows/61b764...,<p>The identification of Christy Mirack's kill...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",,


### 10. Count the number of entries per author and sort them in descending order.

In [52]:
# No hay columna de autores. Aun así este sería el código:

# authors = df.groupby('author', as_index=False).agg({'title':'count'})
# authors.columns = ['author', 'entries']
# authors.sort_values('entries', ascending=False)

### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [51]:
# No hay columna de autores, así que lo haré solamente con los títulos:

df['title_length'] = df['title'].apply(len)
df[['title', 'title_length']].sort_values('title_length', ascending=False).head()

Unnamed: 0,title,title_length
33,S1 Ep. 8: Damning New LISK Information + Seaso...,62
34,LISK Ep. 7: Who Is The Long Island Serial Killer?,49
41,Introducing Unraveled: Long Island Serial Killer,48
0,LISK Ep. 9: Breaking - LISK Suspect in Custody,46
16,Introducing Unraveled: Mystery at the Mansion,45


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [50]:
machinelearning = []
for entry in data.entries:
    summary = entry.get('summary', '')
    if 'machine learning' in summary.lower():
        machinelearning.append(entry.get('title', ''))

print(machinelearning)

[]
