# **Scraping Weather from BBC Website**

# **0.0 Imports**

In [1]:
import os
import requests
import json
from urllib.parse import urlencode
import numpy as np
import pandas as pd
import re  
from bs4 import BeautifulSoup
from datetime import datetime

# **1.0 Helper Function**

In [2]:
def location_id(c):
    location_url = 'https://locator-service.api.bbci.co.uk/locations?' + urlencode({
   'api_key': 'AGbFAKx58hyjQScCXIYrxuEwJh2W2cmv',
   's': c,
   'stack': 'aws',
   'locale': 'en',
   'filter': 'international',
   'place-types': 'settlement,airport,district',
   'order': 'importance',
   'a': 'true',
   'format': 'json'
    })
    p=requests.get(location_url).json()
    k=p['response']['results']['results'][0]['id']
    return k

# **2.0 Extracting URL for the City**

**2.1 Finding Location ID for the city**

In [3]:
city=input('enter a city: ')
result=location_id(city)
print('location id is: ',result)

enter a city: new york
location id is:  5128581


**2.2 Extracting URL for the ID**

In [4]:
url      = 'https://www.bbc.com/weather/'+result
response = requests.get(url)

# **3.0 Scraping Website**

**3.1 Parsing the Webpage**

In [5]:
soup = BeautifulSoup(response.content,'html.parser') 

**3.2 Scraping Daily High Temperatures**

In [6]:
daily_high = soup.find_all('span', attrs={'class': 'wr-day-temperature__high-value'}) # block-type: span; identifier type: class; and class name: wr-day-temperature__high-value 
daily_high_values = [daily_high[i].text.strip().split()[0] for i in range(0,13)]
daily_high_values

['0°',
 '-5°',
 '5°',
 '10°',
 '10°',
 '9°',
 '6°',
 '8°',
 '7°',
 '8°',
 '9°',
 '9°',
 '9°']

**3.3 Scraping Daily Low Temperatures**

In [7]:
daily_low=soup.find_all('span', attrs={'class': 'wr-day-temperature__low-value'})
daily_low_values =[daily_low[i].text.strip().split()[0] for i in range(0,13)]
daily_low_values

['-1°',
 '-14°',
 '-7°',
 '1°',
 '1°',
 '5°',
 '2°',
 '2°',
 '2°',
 '1°',
 '1°',
 '3°',
 '3°']

**3.4 Scraping Daily Summaries**

In [8]:
daily_summary = soup.find('div', attrs={'class': 'wr-day-summary'})
daily_summary_list = re.findall('[a-zA-Z][^A-Z]*', daily_summary.text) #split the string on uppercase
daily_summary_list=daily_summary_list[0:13]
daily_summary_list

['A clear sky and a moderate breeze',
 'Sunny and a fresh breeze',
 'Sunny and a moderate breeze',
 'Sunny intervals and a gentle breeze',
 'Sunny intervals and a gentle breeze',
 'Light cloud and a gentle breeze',
 'Light cloud and a moderate breeze',
 'Sunny intervals and a gentle breeze',
 'Light rain and a moderate breeze',
 'Sunny intervals and a moderate breeze',
 'Sunny intervals and a gentle breeze',
 'Sunny intervals and a gentle breeze',
 'Light cloud and a gentle breeze']

**3.5 Scraping Dates**

In [9]:
datelist = pd.date_range(datetime.today(), periods=len(daily_high_values)).tolist()
datelist = [datelist[i].date().strftime('%y-%m-%d') for i in range(0,13)]
datelist

['23-02-03',
 '23-02-04',
 '23-02-05',
 '23-02-06',
 '23-02-07',
 '23-02-08',
 '23-02-09',
 '23-02-10',
 '23-02-11',
 '23-02-12',
 '23-02-13',
 '23-02-14',
 '23-02-15']

# **4.0 Arranging the Scraped values into a Dataframe**

In [10]:
weather={'Date':datelist,'High':daily_high_values,'Low':daily_low_values,'Summary':daily_summary_list}

In [11]:
df = pd.DataFrame(weather)
df=df.set_index('Date')
df

Unnamed: 0_level_0,High,Low,Summary
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
23-02-03,0°,-1°,A clear sky and a moderate breeze
23-02-04,-5°,-14°,Sunny and a fresh breeze
23-02-05,5°,-7°,Sunny and a moderate breeze
23-02-06,10°,1°,Sunny intervals and a gentle breeze
23-02-07,10°,1°,Sunny intervals and a gentle breeze
23-02-08,9°,5°,Light cloud and a gentle breeze
23-02-09,6°,2°,Light cloud and a moderate breeze
23-02-10,8°,2°,Sunny intervals and a gentle breeze
23-02-11,7°,2°,Light rain and a moderate breeze
23-02-12,8°,1°,Sunny intervals and a moderate breeze
