In [1]:
import requests
from bs4 import BeautifulSoup

In [2]:
# creating the object to hold to page source content
page = requests.get("https://forecast.weather.gov/MapClick.php?lat=37.7772&lon=-122.4168#.XpLOY8gzbcc")
#print(page)

# creating an object to store the page source in proper html formmat
soup = BeautifulSoup(page.content , 'html.parser')
#print(soup)

# storing the page source belonging to a particular id tag on the page
seven_day = soup.find(id = "seven-day-forecast")
#print(seven_day)

# finding all instances of a particular class tag within the id and storing it to object
forecast_items = seven_day.find_all(class_ = "tombstone-container")
#print(forecast_items)

tonight = forecast_items[0]
print(tonight.prettify())

<div class="tombstone-container">
 <p class="period-name">
  Overnight
  <br/>
  <br/>
 </p>
 <p>
  <img alt="Overnight: Mostly clear, with a low around 47. North wind 11 to 16 mph. " class="forecast-icon" src="newimages/medium/nfew.png" title="Overnight: Mostly clear, with a low around 47. North wind 11 to 16 mph. "/>
 </p>
 <p class="short-desc">
  Mostly Clear
 </p>
 <p class="temp temp-low">
  Low: 47 °F
 </p>
</div>


In [3]:
# extracting values from sub tag
period = tonight.find(class_="period-name").get_text()
short_desc = tonight.find(class_="short-desc").get_text()
temp = tonight.find(class_="temp temp-low").get_text()

print(period)
print(short_desc)
print(temp)

Overnight
Mostly Clear
Low: 47 °F


In [4]:
#extract the title part of the img tag

img = tonight.find("img")
desc = img['title']
print(desc)

Overnight: Mostly clear, with a low around 47. North wind 11 to 16 mph. 


In [5]:
# extracting all values for the period tag from the entire object

period_tags = seven_day.select(".tombstone-container .period-name")
periods = [pt.get_text() for pt in period_tags]
print(periods)

['Overnight', 'Sunday', 'SundayNight', 'Monday', 'MondayNight', 'Tuesday', 'TuesdayNight', 'Wednesday', 'WednesdayNight']


In [6]:
# extracting all values for the short-desc and temp tag from the entire object
short_descs = [sd.get_text() for sd in seven_day.select(".tombstone-container .short-desc")]
temps = [t.get_text() for t in seven_day.select(".tombstone-container .temp")]
descs = [d["title"] for d in seven_day.select(".tombstone-container img")]
print(short_descs)
print()
print(temps)
print()
print(descs)



['Mostly Clear', 'Sunny', 'Mostly Clear', 'Sunny', 'Partly Cloudy', 'Mostly Sunny', 'Partly Cloudy', 'Sunny', 'Mostly Clear']

['Low: 47 °F', 'High: 65 °F', 'Low: 45 °F', 'High: 62 °F', 'Low: 44 °F', 'High: 64 °F', 'Low: 45 °F', 'High: 62 °F', 'Low: 46 °F']

['Overnight: Mostly clear, with a low around 47. North wind 11 to 16 mph. ', 'Sunday: Sunny, with a high near 65. North northeast wind 5 to 9 mph becoming calm. ', 'Sunday Night: Mostly clear, with a low around 45. West wind around 6 mph becoming calm  in the evening. ', 'Monday: Sunny, with a high near 62. Light and variable wind becoming southwest 5 to 10 mph in the afternoon. ', 'Monday Night: Partly cloudy, with a low around 44. West southwest wind around 5 mph becoming calm. ', 'Tuesday: Mostly sunny, with a high near 64.', 'Tuesday Night: Partly cloudy, with a low around 45.', 'Wednesday: Sunny, with a high near 62.', 'Wednesday Night: Mostly clear, with a low around 46.']


In [7]:
# combining all tags and creating a dataframe our of it

import pandas as pd
weather = pd.DataFrame({
"period": periods,
"short_desc": short_descs,
"temp": temps,
"desc":descs
})
weather

Unnamed: 0,period,short_desc,temp,desc
0,Overnight,Mostly Clear,Low: 47 °F,"Overnight: Mostly clear, with a low around 47...."
1,Sunday,Sunny,High: 65 °F,"Sunday: Sunny, with a high near 65. North nort..."
2,SundayNight,Mostly Clear,Low: 45 °F,"Sunday Night: Mostly clear, with a low around ..."
3,Monday,Sunny,High: 62 °F,"Monday: Sunny, with a high near 62. Light and ..."
4,MondayNight,Partly Cloudy,Low: 44 °F,"Monday Night: Partly cloudy, with a low around..."
5,Tuesday,Mostly Sunny,High: 64 °F,"Tuesday: Mostly sunny, with a high near 64."
6,TuesdayNight,Partly Cloudy,Low: 45 °F,"Tuesday Night: Partly cloudy, with a low aroun..."
7,Wednesday,Sunny,High: 62 °F,"Wednesday: Sunny, with a high near 62."
8,WednesdayNight,Mostly Clear,Low: 46 °F,"Wednesday Night: Mostly clear, with a low arou..."


In [8]:
# desc has the text that contains the value of period also
# trying to split the desc string such that the period part is removed

new = weather["desc"].str.split(": ",expand=True)
print(new)

                 0                                                  1
0        Overnight  Mostly clear, with a low around 47. North wind...
1           Sunday  Sunny, with a high near 65. North northeast wi...
2     Sunday Night  Mostly clear, with a low around 45. West wind ...
3           Monday  Sunny, with a high near 62. Light and variable...
4     Monday Night  Partly cloudy, with a low around 44. West sout...
5          Tuesday                 Mostly sunny, with a high near 64.
6    Tuesday Night               Partly cloudy, with a low around 45.
7        Wednesday                        Sunny, with a high near 62.
8  Wednesday Night                Mostly clear, with a low around 46.


In [9]:
#fetching the other part and replacing the desc variable

weather["desc"] = new[1]

In [10]:
weather

Unnamed: 0,period,short_desc,temp,desc
0,Overnight,Mostly Clear,Low: 47 °F,"Mostly clear, with a low around 47. North wind..."
1,Sunday,Sunny,High: 65 °F,"Sunny, with a high near 65. North northeast wi..."
2,SundayNight,Mostly Clear,Low: 45 °F,"Mostly clear, with a low around 45. West wind ..."
3,Monday,Sunny,High: 62 °F,"Sunny, with a high near 62. Light and variable..."
4,MondayNight,Partly Cloudy,Low: 44 °F,"Partly cloudy, with a low around 44. West sout..."
5,Tuesday,Mostly Sunny,High: 64 °F,"Mostly sunny, with a high near 64."
6,TuesdayNight,Partly Cloudy,Low: 45 °F,"Partly cloudy, with a low around 45."
7,Wednesday,Sunny,High: 62 °F,"Sunny, with a high near 62."
8,WednesdayNight,Mostly Clear,Low: 46 °F,"Mostly clear, with a low around 46."


In [53]:
# To store the dataframe as an excel

weather.to_excel("weather_details.xlsx" , header = True)

In [58]:
import os
os.getcwd()

'C:\\Users\\hp'