# Web scrapping BBC RSS feed data

In [1]:
#import library
import requests
from bs4 import BeautifulSoup

#enter URL
url = "http://feeds.bbci.co.uk/news/world/us_and_canada/rss.xml"

resp = requests.get(url)

soup = BeautifulSoup(resp.content, features="xml")

In [2]:
print(soup.prettify())

<?xml version="1.0" encoding="utf-8"?>
<?xml-stylesheet title="XSL_formatting" type="text/xsl" href="/shared/bsp/xsl/rss/nolsol.xsl"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/">
 <channel>
  <title>
   BBC News - US &amp; Canada
  </title>
  <description>
   BBC News - US &amp; Canada
  </description>
  <link>
   https://www.bbc.co.uk/news/
  </link>
  <image>
   <url>
    https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif
   </url>
   <title>
    BBC News - US &amp; Canada
   </title>
   <link>
    https://www.bbc.co.uk/news/
   </link>
  </image>
  <generator>
   RSS for Node
  </generator>
  <lastBuildDate>
   Wed, 06 Nov 2019 01:45:10 GMT
  </lastBuildDate>
  <copyright>
   Copyright: (C) British Broadcasting Corporation, see http://news.bbc.co.uk/2/hi/help/rss/4498287.stm for terms and conditions of reuse.
  </

In [3]:
items = soup.findAll('item')

In [4]:
print(items)

[<item>\n<title>Trump envoy Gordon Sondland revises testimony on Ukraine aid</title>\n<description>Gordon Sondland says he now remembers saying US aid "likely" hinged on an inquiry sought by Mr Trump.</description>\n<link>https://www.bbc.co.uk/news/world-us-canada-50310695</link>\n<guid isPermaLink="true">https://www.bbc.co.uk/news/world-us-canada-50310695</guid>\n<pubDate>Tue, 05 Nov 2019 22:28:20 GMT</pubDate>\n</item>, <item>\n<title>Mexico Mormons: Nine US citizens, including children, killed in ambush</title>\n<description>Six children are among those killed in an ambush by gunmen thought to be from a drug cartel.</description>\n<link>https://www.bbc.co.uk/news/world-latin-america-50299562</link>\n<guid isPermaLink="true">https://www.bbc.co.uk/news/world-latin-america-50299562</guid>\n<pubDate>Tue, 05 Nov 2019 20:00:35 GMT</pubDate>\n</item>, <item>\n<title>Jeffrey Epstein: ABC stopped report 'amid Palace threats'</title>\n<description>A TV anchor says Buckingham Palace "threatene

In [5]:
len(items)

53

In [6]:
item = items[1]

In [7]:
item

<item>\n<title>Mexico Mormons: Nine US citizens, including children, killed in ambush</title>\n<description>Six children are among those killed in an ambush by gunmen thought to be from a drug cartel.</description>\n<link>https://www.bbc.co.uk/news/world-latin-america-50299562</link>\n<guid isPermaLink="true">https://www.bbc.co.uk/news/world-latin-america-50299562</guid>\n<pubDate>Tue, 05 Nov 2019 20:00:35 GMT</pubDate>\n</item>

In [8]:
item.title

<title>Mexico Mormons: Nine US citizens, including children, killed in ambush</title>

In [9]:
item.title.text

u'Mexico Mormons: Nine US citizens, including children, killed in ambush'

In [10]:
item.pubdate

In [11]:
#declare empty var to append data
news_items = []

In [12]:
#scarring HTML tags such as Title, Description, Links and Publication date
for item in items:
    news_item = {}
    news_item['title'] = item.title.text
    news_item['description'] = item.description.text
    news_item['link'] = item.link.text
    news_item['pubDate'] = item.pubDate.text
    news_items.append(news_item)

In [13]:
print(news_items)

[{'link': u'https://www.bbc.co.uk/news/world-us-canada-50310695', 'description': u'Gordon Sondland says he now remembers saying US aid "likely" hinged on an inquiry sought by Mr Trump.', 'pubDate': u'Tue, 05 Nov 2019 22:28:20 GMT', 'title': u'Trump envoy Gordon Sondland revises testimony on Ukraine aid'}, {'link': u'https://www.bbc.co.uk/news/world-latin-america-50299562', 'description': u'Six children are among those killed in an ambush by gunmen thought to be from a drug cartel.', 'pubDate': u'Tue, 05 Nov 2019 20:00:35 GMT', 'title': u'Mexico Mormons: Nine US citizens, including children, killed in ambush'}, {'link': u'https://www.bbc.co.uk/news/world-us-canada-50296742', 'description': u'A TV anchor says Buckingham Palace "threatened us a million different ways" to bury the story.', 'pubDate': u'Tue, 05 Nov 2019 23:01:46 GMT', 'title': u"Jeffrey Epstein: ABC stopped report 'amid Palace threats'"}, {'link': u'https://www.bbc.co.uk/news/world-us-canada-50311625', 'description': u'The 

In [14]:
news_items[0]

{'description': u'Gordon Sondland says he now remembers saying US aid "likely" hinged on an inquiry sought by Mr Trump.',
 'link': u'https://www.bbc.co.uk/news/world-us-canada-50310695',
 'pubDate': u'Tue, 05 Nov 2019 22:28:20 GMT',
 'title': u'Trump envoy Gordon Sondland revises testimony on Ukraine aid'}

In [15]:
#import pandas to create dataframe and CSV
import pandas as pd
df = pd.DataFrame(news_items,columns=['title','description','link','pubDate'])

In [18]:
df.head()

Unnamed: 0,title,description,link,pubDate
0,Trump envoy Gordon Sondland revises testimony ...,Gordon Sondland says he now remembers saying U...,https://www.bbc.co.uk/news/world-us-canada-503...,"Tue, 05 Nov 2019 22:28:20 GMT"
1,"Mexico Mormons: Nine US citizens, including ch...",Six children are among those killed in an ambu...,https://www.bbc.co.uk/news/world-latin-america...,"Tue, 05 Nov 2019 20:00:35 GMT"
2,Jeffrey Epstein: ABC stopped report 'amid Pala...,"A TV anchor says Buckingham Palace ""threatened...",https://www.bbc.co.uk/news/world-us-canada-502...,"Tue, 05 Nov 2019 23:01:46 GMT"
3,California murder suspects escaped jail throug...,"The two inmates, who were awaiting trial, absc...",https://www.bbc.co.uk/news/world-us-canada-503...,"Wed, 06 Nov 2019 01:21:12 GMT"
4,Whistleblower raises doubts over 787 oxygen sy...,Former quality manager says Boeing was driven ...,https://www.bbc.co.uk/news/business-50293927,"Wed, 06 Nov 2019 00:00:38 GMT"


In [19]:
df.to_csv('BBCdata1.csv',index=False, encoding = 'utf-8')