# Web Scrapping on Weather data for California US

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
import requests
from bs4 import BeautifulSoup

In [3]:
url = "https://forecast.weather.gov/MapClick.php?lat=36.37410569300005&lon=-119.27022999999997#.X_JaO9gzaMo"

response = requests.get(url)

In [4]:
print(response)

<Response [200]>


In [5]:
# let us get the first 1000 characters

print(response.content[:1000])

b'<!DOCTYPE html>\n<html class="no-js">\n    <head>\n        <!-- Meta -->\n        <meta name="viewport" content="width=device-width">\n        <link rel="schema.DC" href="http://purl.org/dc/elements/1.1/" /><title>National Weather Service</title><meta name="DC.title" content="National Weather Service" /><meta name="DC.description" content="NOAA National Weather Service National Weather Service" /><meta name="DC.creator" content="US Department of Commerce, NOAA, National Weather Service" /><meta name="DC.date.created" scheme="ISO8601" content="" /><meta name="DC.language" scheme="DCTERMS.RFC1766" content="EN-US" /><meta name="DC.keywords" content="weather, National Weather Service" /><meta name="DC.publisher" content="NOAA\'s National Weather Service" /><meta name="DC.contributor" content="National Weather Service" /><meta name="DC.rights" content="http://www.weather.gov/disclaimer.php" /><meta name="rating" content="General" /><meta name="robots" content="index,follow" />\n\n        

In [6]:
# get the page content in the form of html using beautiful soup's html parser

soup = BeautifulSoup(response.content, 'html.parser')

    We can now extract data values from the 'table' tag for the month using find_all() method of Beautiful soup 

In [7]:
print(soup)

<!DOCTYPE html>

<html class="no-js">
<head>
<!-- Meta -->
<meta content="width=device-width" name="viewport"/>
<link href="http://purl.org/dc/elements/1.1/" rel="schema.DC"/><title>National Weather Service</title><meta content="National Weather Service" name="DC.title"><meta content="NOAA National Weather Service National Weather Service" name="DC.description"/><meta content="US Department of Commerce, NOAA, National Weather Service" name="DC.creator"/><meta content="" name="DC.date.created" scheme="ISO8601"/><meta content="EN-US" name="DC.language" scheme="DCTERMS.RFC1766"/><meta content="weather, National Weather Service" name="DC.keywords"/><meta content="NOAA's National Weather Service" name="DC.publisher"/><meta content="National Weather Service" name="DC.contributor"/><meta content="http://www.weather.gov/disclaimer.php" name="DC.rights"/><meta content="General" name="rating"/><meta content="index,follow" name="robots"/>
<!-- Icons -->
<link href="./images/favicon.ico" rel="shor

In [8]:
# Here the 'tags' are 'a'. Let's extract the 'a' tags

soup.find_all('a')

[<a class="pull-left" href="http://www.noaa.gov" id="header-noaa"><img alt="National Oceanic and Atmospheric Administration" src="/css/images/header_noaa.png"/></a>,
 <a class="pull-left" href="http://www.weather.gov" id="header-nws"><img alt="National Weather Service" src="/css/images/header_nws.png"/></a>,
 <a class="pull-right" href="http://www.commerce.gov" id="header-doc"><img alt="United States Department of Commerce" src="/css/images/header_doc.png"/></a>,
 <a href="http://www.weather.gov">HOME</a>,
 <a class="dropdown-toggle" data-toggle="dropdown" href="http://www.weather.gov/forecastmaps">FORECAST <span class="caret"></span></a>,
 <a href="http://www.weather.gov">Local</a>,
 <a href="http://digital.weather.gov">Graphical</a>,
 <a href="http://www.aviationweather.gov/">Aviation</a>,
 <a href="http://www.nws.noaa.gov/om/marine/home.htm">Marine</a>,
 <a href="http://water.weather.gov/ahps/">Rivers and Lakes</a>,
 <a href="http://www.nhc.noaa.gov/">Hurricanes</a>,
 <a href="http:

In [9]:
# Lets extract the week data

week = soup.find(id="seven-day-forecast-container")

week

<div id="seven-day-forecast-container"><ul class="list-unstyled" id="seven-day-forecast-list"><li class="forecast-tombstone">
<div class="tombstone-container">
<p class="period-name">Tonight<br/><br/></p>
<p><img alt="Tonight: Mostly cloudy, with a low around 40. Calm wind becoming east southeast around 5 mph after midnight. " class="forecast-icon" src="newimages/medium/nbkn.png" title="Tonight: Mostly cloudy, with a low around 40. Calm wind becoming east southeast around 5 mph after midnight. "/></p><p class="short-desc">Mostly Cloudy</p><p class="temp temp-low">Low: 40 °F</p></div></li><li class="forecast-tombstone">
<div class="tombstone-container">
<p class="period-name">Monday<br/><br/></p>
<p><img alt="Monday: A 30 percent chance of showers, mainly after 4pm.  Increasing clouds, with a high near 61. South southeast wind 5 to 10 mph. " class="forecast-icon" src="DualImage.php?i=sct&amp;j=shra&amp;jp=30" title="Monday: A 30 percent chance of showers, mainly after 4pm.  Increasing c

In [10]:
week.find_all('li')

[<li class="forecast-tombstone">
 <div class="tombstone-container">
 <p class="period-name">Tonight<br/><br/></p>
 <p><img alt="Tonight: Mostly cloudy, with a low around 40. Calm wind becoming east southeast around 5 mph after midnight. " class="forecast-icon" src="newimages/medium/nbkn.png" title="Tonight: Mostly cloudy, with a low around 40. Calm wind becoming east southeast around 5 mph after midnight. "/></p><p class="short-desc">Mostly Cloudy</p><p class="temp temp-low">Low: 40 °F</p></div></li>,
 <li class="forecast-tombstone">
 <div class="tombstone-container">
 <p class="period-name">Monday<br/><br/></p>
 <p><img alt="Monday: A 30 percent chance of showers, mainly after 4pm.  Increasing clouds, with a high near 61. South southeast wind 5 to 10 mph. " class="forecast-icon" src="DualImage.php?i=sct&amp;j=shra&amp;jp=30" title="Monday: A 30 percent chance of showers, mainly after 4pm.  Increasing clouds, with a high near 61. South southeast wind 5 to 10 mph. "/></p><p class="short

    When we try to extract the 'li' tag we are getting the complete data. Hence, lets just try using the 'class' tag

In [11]:
items = week.find_all(class_="tombstone-container")

items

[<div class="tombstone-container">
 <p class="period-name">Tonight<br/><br/></p>
 <p><img alt="Tonight: Mostly cloudy, with a low around 40. Calm wind becoming east southeast around 5 mph after midnight. " class="forecast-icon" src="newimages/medium/nbkn.png" title="Tonight: Mostly cloudy, with a low around 40. Calm wind becoming east southeast around 5 mph after midnight. "/></p><p class="short-desc">Mostly Cloudy</p><p class="temp temp-low">Low: 40 °F</p></div>,
 <div class="tombstone-container">
 <p class="period-name">Monday<br/><br/></p>
 <p><img alt="Monday: A 30 percent chance of showers, mainly after 4pm.  Increasing clouds, with a high near 61. South southeast wind 5 to 10 mph. " class="forecast-icon" src="DualImage.php?i=sct&amp;j=shra&amp;jp=30" title="Monday: A 30 percent chance of showers, mainly after 4pm.  Increasing clouds, with a high near 61. South southeast wind 5 to 10 mph. "/></p><p class="short-desc">Mostly Sunny<br/>then Chance<br/>Showers</p><p class="temp temp-

In [12]:
# Here item[0] means only checking the data for 1 day

items[0]

<div class="tombstone-container">
<p class="period-name">Tonight<br/><br/></p>
<p><img alt="Tonight: Mostly cloudy, with a low around 40. Calm wind becoming east southeast around 5 mph after midnight. " class="forecast-icon" src="newimages/medium/nbkn.png" title="Tonight: Mostly cloudy, with a low around 40. Calm wind becoming east southeast around 5 mph after midnight. "/></p><p class="short-desc">Mostly Cloudy</p><p class="temp temp-low">Low: 40 °F</p></div>

In [13]:
# Now lets find only the class in which Tonight is there

items[0].find(class_="period-name")

<p class="period-name">Tonight<br/><br/></p>

In [14]:
items[0].find(class_="period-name").get_text()

'Tonight'

In [15]:
items[0].find(class_="short-desc").get_text()

'Mostly Cloudy'

In [16]:
items[0].find(class_="temp").get_text()

'Low: 40 °F'

## Now lets get the data for entire week

In [17]:
# we are extracting the day period name of entire week into a list
period_names = [item.find(class_='period-name').get_text() for item in items]

print(period_names)

['Tonight', 'Monday', 'MondayNight', 'Tuesday', 'TuesdayNight', 'Wednesday', 'WednesdayNight', 'Thursday', 'ThursdayNight']


In [18]:
# Now lets do the same for description and temperature as well

short_desc = [item.find(class_='short-desc').get_text() for item in items]

print(short_desc)

['Mostly Cloudy', 'Mostly Sunnythen ChanceShowers', 'ChanceShowers thenAreas Fog', 'Areas Fogthen PartlySunny', 'Mostly Cloudy', 'Mostly Cloudy', 'Mostly Cloudy', 'Mostly Sunny', 'Mostly Cloudy']


In [19]:
# Now lets get the data for temperature

temp = [item.find(class_='temp').get_text() for item in items]

print(temp)

['Low: 40 °F', 'High: 61 °F', 'Low: 39 °F', 'High: 56 °F', 'Low: 36 °F', 'High: 58 °F', 'Low: 37 °F', 'High: 56 °F', 'Low: 36 °F']


### Now we have to merge the above extracted list into a Dataframe

In [20]:
weather_data = pd.DataFrame({'Period_Names': period_names, 'Short_Desc': short_desc, 'Temperature': temp})

weather_data

Unnamed: 0,Period_Names,Short_Desc,Temperature
0,Tonight,Mostly Cloudy,Low: 40 °F
1,Monday,Mostly Sunnythen ChanceShowers,High: 61 °F
2,MondayNight,ChanceShowers thenAreas Fog,Low: 39 °F
3,Tuesday,Areas Fogthen PartlySunny,High: 56 °F
4,TuesdayNight,Mostly Cloudy,Low: 36 °F
5,Wednesday,Mostly Cloudy,High: 58 °F
6,WednesdayNight,Mostly Cloudy,Low: 37 °F
7,Thursday,Mostly Sunny,High: 56 °F
8,ThursdayNight,Mostly Cloudy,Low: 36 °F


In [22]:
# From here we have save the above extracted data to a csv file

# weather_data.to_csv("E:\Project Based Datasets\Web Scrapping\weather_data.csv", index=False)