# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
import re
import requests
import xml.etree.ElementTree as ET

import pandas as pd 
from bs4 import BeautifulSoup
import feedparser



### 1. Use feedparser to parse the following RSS feed URL.

In [2]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [3]:
parser = feedparser.parse(url)

### 2. Obtain a list of components (keys) that are available for this feed.

In [4]:
parser.keys()


dict_keys(['bozo', 'entries', 'feed', 'headers', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [5]:
parser['feed'].keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator'])

### 4. Extract and print the feed title, subtitle, author, and link.

In [6]:
response = requests.get(url)
response
componentes = response.content

In [7]:
soup = BeautifulSoup(componentes, 'xml')
titulos  = [titulo.text for titulo in soup.find_all('title')]
links  = [link.text for link in soup.find_all('link')]
creador  = [creator.text for creator in soup.find_all('creator')]

In [8]:
df = pd.DataFrame(zip(titulos, links, creador), columns = ['Titulo', 'Link', 'Creador'])
df

Unnamed: 0,Titulo,Link,Creador
0,Radar,,Mike Loukides
1,Radar Trends to Watch: November 2022,https://www.oreilly.com/radar,Patrick Hall
2,What We Learned Auditing Sophisticated AI for ...,https://www.oreilly.com/radar/radar-trends-to-...,Mike Loukides
3,The Collaborative Metaverse,https://www.oreilly.com/radar/what-we-learned-...,Mike Loukides
4,What Is Hyperautomation?,https://www.oreilly.com/radar/the-collaborativ...,Mike Loukides
5,Radar Trends to Watch: October 2022,https://www.oreilly.com/radar/what-is-hyperaut...,Mike Loukides
6,The Problem with Intelligence,https://www.oreilly.com/radar/radar-trends-to-...,Mike Loukides
7,Radar Trends to Watch: September 2022,https://www.oreilly.com/radar/the-problem-with...,Q McCallum
8,Ad Networks and Content Marketing,https://www.oreilly.com/radar/radar-trends-to-...,Mike Loukides
9,On Technique,https://www.oreilly.com/radar/ad-networks-and-...,Kevlin Henney


### 5. Count the number of entries that are contained in this RSS feed.

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [59]:
parser['entries'][0].keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments'])

### 7. Extract a list of entry titles.

In [9]:
titulos  = [titulo.text for titulo in soup.find_all('title')]

['Radar',
 'Radar Trends to Watch: November 2022',
 'What We Learned Auditing Sophisticated AI for Bias',
 'The Collaborative Metaverse',
 'What Is Hyperautomation?',
 'Radar Trends to Watch: October 2022',
 'The Problem with Intelligence',
 'Radar Trends to Watch: September 2022',
 'Ad Networks and Content Marketing',
 'On Technique',
 'Scaling False Peaks',
 'The Metaverse Is Not a Place',
 'Radar Trends to Watch: August 2022',
 'SQL: The Universal Solvent for REST APIs',
 'Artificial Creativity?',
 'Radar Trends to Watch: July 2022']

### 8. Calculate the percentage of "Radar Trends" entry titles.

In [42]:
x =[i for i in titulos if 'Radar Trends' in i]
y = (len(x)/len(titulos))*100
y

31.25

### 9. Create a Pandas data frame from the feed's entries.

In [10]:
import pandas as pd

In [22]:
df = pd.DataFrame(zip(titulos, links, creador), columns = ['Titulo', 'Link', 'Creador'])
df

Unnamed: 0,Titulo,Link,Creador
0,Radar,,Mike Loukides
1,Radar Trends to Watch: November 2022,https://www.oreilly.com/radar,Patrick Hall
2,What We Learned Auditing Sophisticated AI for ...,https://www.oreilly.com/radar/radar-trends-to-...,Mike Loukides
3,The Collaborative Metaverse,https://www.oreilly.com/radar/what-we-learned-...,Mike Loukides
4,What Is Hyperautomation?,https://www.oreilly.com/radar/the-collaborativ...,Mike Loukides
5,Radar Trends to Watch: October 2022,https://www.oreilly.com/radar/what-is-hyperaut...,Mike Loukides
6,The Problem with Intelligence,https://www.oreilly.com/radar/radar-trends-to-...,Mike Loukides
7,Radar Trends to Watch: September 2022,https://www.oreilly.com/radar/the-problem-with...,Q McCallum
8,Ad Networks and Content Marketing,https://www.oreilly.com/radar/radar-trends-to-...,Mike Loukides
9,On Technique,https://www.oreilly.com/radar/ad-networks-and-...,Kevlin Henney


### 10. Count the number of entries per author and sort them in descending order.

In [23]:
df = df.sort_values(by='Creador', ascending=False)
df

Unnamed: 0,Titulo,Link,Creador
10,Scaling False Peaks,https://www.oreilly.com/radar/on-technique/,Tim O’Reilly
7,Radar Trends to Watch: September 2022,https://www.oreilly.com/radar/the-problem-with...,Q McCallum
1,Radar Trends to Watch: November 2022,https://www.oreilly.com/radar,Patrick Hall
0,Radar,,Mike Loukides
2,What We Learned Auditing Sophisticated AI for ...,https://www.oreilly.com/radar/radar-trends-to-...,Mike Loukides
3,The Collaborative Metaverse,https://www.oreilly.com/radar/what-we-learned-...,Mike Loukides
4,What Is Hyperautomation?,https://www.oreilly.com/radar/the-collaborativ...,Mike Loukides
5,Radar Trends to Watch: October 2022,https://www.oreilly.com/radar/what-is-hyperaut...,Mike Loukides
6,The Problem with Intelligence,https://www.oreilly.com/radar/radar-trends-to-...,Mike Loukides
8,Ad Networks and Content Marketing,https://www.oreilly.com/radar/radar-trends-to-...,Mike Loukides


### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [25]:
df['Titulo_len'] = df['Titulo'].str.len()
df = df.sort_values(by='Titulo_len', ascending=False)
df

Unnamed: 0,Titulo,Link,Creador,Titulo_len
2,What We Learned Auditing Sophisticated AI for ...,https://www.oreilly.com/radar/radar-trends-to-...,Mike Loukides,50
13,SQL: The Universal Solvent for REST APIs,https://www.oreilly.com/radar/radar-trends-to-...,Mike Loukides,40
7,Radar Trends to Watch: September 2022,https://www.oreilly.com/radar/the-problem-with...,Q McCallum,37
1,Radar Trends to Watch: November 2022,https://www.oreilly.com/radar,Patrick Hall,36
5,Radar Trends to Watch: October 2022,https://www.oreilly.com/radar/what-is-hyperaut...,Mike Loukides,35
12,Radar Trends to Watch: August 2022,https://www.oreilly.com/radar/the-metaverse-is...,Jon Udell,34
8,Ad Networks and Content Marketing,https://www.oreilly.com/radar/radar-trends-to-...,Mike Loukides,33
6,The Problem with Intelligence,https://www.oreilly.com/radar/radar-trends-to-...,Mike Loukides,29
11,The Metaverse Is Not a Place,https://www.oreilly.com/radar/scaling-false-pe...,Mike Loukides,28
3,The Collaborative Metaverse,https://www.oreilly.com/radar/what-we-learned-...,Mike Loukides,27


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [30]:
descripcion = [desc.text for desc in soup.find_all('description')]

In [67]:
x = [i.lower() for i in descripcion ]
y = [i for i in x if 'machine learning' in i]
y

['september was a busy month. in addition to continued fascination over art generation with dall-e and friends, and the questions they pose for intellectual property, we see interesting things happening with machine learning for low-powered processors: using attention, mechanisms, along with a new microcontroller that can run for a week on a single aa battery. [&#8230;]']