# Working with BeautifulSoup

In [30]:
##https://www.dataquest.io/blog/web-scraping-tutorial-python/

In [3]:
from urllib.request import urlopen
from bs4 import BeautifulSoup

In [4]:
page=urlopen("http://dataquestio.github.io/web-scraping-pages/simple.html")

In [5]:
soup = BeautifulSoup(page, 'html.parser')

In [6]:
print(soup.prettify())

<!DOCTYPE html>
<html>
 <head>
  <title>
   A simple example page
  </title>
 </head>
 <body>
  <p>
   Here is some simple content for this page.
  </p>
 </body>
</html>


In [7]:
print(soup)

<!DOCTYPE html>

<html>
<head>
<title>A simple example page</title>
</head>
<body>
<p>Here is some simple content for this page.</p>
</body>
</html>


In [8]:
list(soup.children)

['html', '\n', <html>
 <head>
 <title>A simple example page</title>
 </head>
 <body>
 <p>Here is some simple content for this page.</p>
 </body>
 </html>]

In [9]:
[type(item) for item in list(soup.children)] 

[bs4.element.Doctype, bs4.element.NavigableString, bs4.element.Tag]

In [10]:
html = list(soup.children)[2]

In [11]:
list(html.children)

['\n', <head>
 <title>A simple example page</title>
 </head>, '\n', <body>
 <p>Here is some simple content for this page.</p>
 </body>, '\n']

In [12]:
body = list(html.children)[3]

In [13]:
list(body.children)

['\n', <p>Here is some simple content for this page.</p>, '\n']

In [14]:
p = list(body.children)[1]

In [15]:
print(p)

<p>Here is some simple content for this page.</p>


In [16]:
p.get_text()

'Here is some simple content for this page.'

In [17]:
p_new=soup.find_all('p')

In [18]:
print(p.get_text())

Here is some simple content for this page.


In [19]:
page2=urlopen("http://dataquestio.github.io/web-scraping-pages/ids_and_classes.html")

In [20]:
soup2 = BeautifulSoup(page2, 'html.parser')

In [21]:
print(soup2.prettify())

<html>
 <head>
  <title>
   A simple example page
  </title>
 </head>
 <body>
  <div>
   <p class="inner-text first-item" id="first">
    First paragraph.
   </p>
   <p class="inner-text">
    Second paragraph.
   </p>
  </div>
  <p class="outer-text first-item" id="second">
   <b>
    First outer paragraph.
   </b>
  </p>
  <p class="outer-text">
   <b>
    Second outer paragraph.
   </b>
  </p>
 </body>
</html>


In [22]:
print(soup2.find_all('p'))

[<p class="inner-text first-item" id="first">
                First paragraph.
            </p>, <p class="inner-text">
                Second paragraph.
            </p>, <p class="outer-text first-item" id="second">
<b>
                First outer paragraph.
            </b>
</p>, <p class="outer-text">
<b>
                Second outer paragraph.
            </b>
</p>]


In [23]:
soup2.find_all('p')[3].get_text()

'\n\n                Second outer paragraph.\n            \n'

In [24]:
soup2.find_all('p', class_='outer-text')

[<p class="outer-text first-item" id="second">
 <b>
                 First outer paragraph.
             </b>
 </p>, <p class="outer-text">
 <b>
                 Second outer paragraph.
             </b>
 </p>]

In [25]:
page3=urlopen("http://forecast.weather.gov/MapClick.php?lat=37.7772&lon=-122.4168")

In [26]:
soup3 = BeautifulSoup(page3, 'html.parser')

In [27]:
temp1=soup3.find_all('div',class_="tombstone-container")

In [28]:
seven_day = soup.find(id="seven-day-forecast")

In [53]:
#temp1

In [54]:
temp_main=temp1[1]

In [55]:
print(temp_main.prettify())

<div class="tombstone-container">
 <p class="period-name">
  Today
  <br>
   <br/>
  </br>
 </p>
 <p>
  <img alt="Today: Rain.  High near 58. Breezy, with a south wind 22 to 26 mph, with gusts as high as 34 mph.  Chance of precipitation is 100%. New precipitation amounts between a quarter and half of an inch possible. " class="forecast-icon" src="newimages/medium/ra100.png" title="Today: Rain.  High near 58. Breezy, with a south wind 22 to 26 mph, with gusts as high as 34 mph.  Chance of precipitation is 100%. New precipitation amounts between a quarter and half of an inch possible. "/>
 </p>
 <p class="short-desc">
  Rain and
  <br>
   Breezy
  </br>
 </p>
 <p class="temp temp-high">
  High: 58 °F
 </p>
</div>


In [56]:
period = temp_main.find(class_="period-name").get_text()

In [57]:
short_desc = temp_main.find(class_="short-desc").get_text()

In [63]:
temp = temp_main.find(class_="temp").get_text()

In [64]:
img=temp_main.find(class_="forecast-icon")
desc = img['title']

In [65]:
print(period)
print("Clouds are",short_desc)
print(temp)
print(desc)


Today
Clouds are Rain andBreezy
High: 58 °F
Today: Rain.  High near 58. Breezy, with a south wind 22 to 26 mph, with gusts as high as 34 mph.  Chance of precipitation is 100%. New precipitation amounts between a quarter and half of an inch possible. 


# Print this in text file

In [66]:
raw_data= period +' '+ short_desc +' '+ temp +' '+ desc
raw_data

'Today Rain andBreezy High: 58 °F Today: Rain.  High near 58. Breezy, with a south wind 22 to 26 mph, with gusts as high as 34 mph.  Chance of precipitation is 100%. New precipitation amounts between a quarter and half of an inch possible. '

In [105]:
import csv  
from datetime import datetime  

In [113]:
with open('Current temp.txt', 'a') as csv_file:  
    writer = csv.writer(csv_file)
    writer.writerow([raw_data, datetime.now()])

In [67]:
seven_day = soup3.find(id="seven-day-forecast")

In [68]:
period_tags = seven_day.select(".tombstone-container .period-name")

In [69]:
periods = [pt.get_text() for pt in period_tags]
periods

['NOW until1:00pm Sat',
 'Today',
 'Tonight',
 'Sunday',
 'SundayNight',
 'Monday',
 'MondayNight',
 'Tuesday',
 'TuesdayNight']

In [70]:
short_descs = [sd.get_text() for sd in seven_day.select(".tombstone-container .short-desc")]
temps = [t.get_text() for t in seven_day.select(".tombstone-container .temp")]
descs = [d["title"] for d in seven_day.select(".tombstone-container img")]

print(short_descs)
print(temps)
print(descs)

['Wind Advisory', 'Rain andBreezy', 'Heavy Rain', 'Heavy Rainand Windy', 'Rain', 'Chance Rain', 'Chance Rainand Breezy', 'Chance Rain', 'Rain Likely']
['High: 58 °F', 'Low: 53 °F', 'High: 61 °F', 'Low: 50 °F', 'High: 56 °F', 'Low: 50 °F', 'High: 56 °F', 'Low: 51 °F']
['', 'Today: Rain.  High near 58. Breezy, with a south wind 22 to 26 mph, with gusts as high as 34 mph.  Chance of precipitation is 100%. New precipitation amounts between a quarter and half of an inch possible. ', 'Tonight: Rain. The rain could be heavy at times.  Low around 53. South southeast wind 10 to 15 mph becoming light and variable. Winds could gust as high as 18 mph.  Chance of precipitation is 100%. New precipitation amounts between three quarters and one inch possible. ', 'Sunday: Rain. The rain could be heavy at times.  High near 61. Windy, with a south wind 27 to 32 mph decreasing to 20 to 25 mph in the morning. Winds could gust as high as 41 mph.  Chance of precipitation is 90%. New precipitation amounts bet

In [80]:
temps.append('Low: 55°F')

temps

['High: 58 °F',
 'Low: 53 °F',
 'High: 61 °F',
 'Low: 50 °F',
 'High: 56 °F',
 'Low: 50 °F',
 'High: 56 °F',
 'Low: 51 °F',
 'Low: 55°F',
 'Low: 55°F']

In [92]:
temps

['High: 58 °F',
 'Low: 53 °F',
 'Low: 50 °F',
 'High: 56 °F',
 'Low: 50 °F',
 'High: 56 °F',
 'Low: 51 °F',
 'Low: 55°F',
 'Low: 55°F']

In [90]:
temp_nums = temps.pop(2)

In [93]:
import pandas as pd
weather = pd.DataFrame({
        "period": periods, 
        "desc":descs,
        "short_desc": short_descs,
        "temp": temps
        
            })
weather

Unnamed: 0,desc,period,short_desc,temp
0,,NOW until1:00pm Sat,Wind Advisory,High: 58 °F
1,"Today: Rain. High near 58. Breezy, with a sou...",Today,Rain andBreezy,Low: 53 °F
2,Tonight: Rain. The rain could be heavy at time...,Tonight,Heavy Rain,Low: 50 °F
3,Sunday: Rain. The rain could be heavy at times...,Sunday,Heavy Rainand Windy,High: 56 °F
4,"Sunday Night: Rain, mainly before 4am. Low ar...",SundayNight,Rain,Low: 50 °F
5,"Monday: A chance of rain before 10am, then a c...",Monday,Chance Rain,High: 56 °F
6,Monday Night: A 30 percent chance of rain. Mo...,MondayNight,Chance Rainand Breezy,Low: 51 °F
7,"Tuesday: A 50 percent chance of rain. Cloudy,...",Tuesday,Chance Rain,Low: 55°F
8,"Tuesday Night: Rain likely. Cloudy, with a lo...",TuesdayNight,Rain Likely,Low: 55°F


In [128]:
weather.to_csv('weather.csv')

In [94]:
#github=urlopen("https://github.com/j-delaney/easy-application")

In [95]:
#github2 = BeautifulSoup(github, 'html.parser')

In [96]:
#raw_companies=github2.find_all('table')[1]

In [97]:
#Generate lists
#A=[]
#B=[]


In [98]:
#companies=raw_companies.find_all('a')

In [99]:
#locations=raw_companies.find_all("td")


In [121]:
#(locations)

In [122]:
temp_nums = weather["temp"].str.extract("(?P<temp_num>\d+)", expand=False)

In [123]:
weather["temp_num"] = temp_nums.astype('int')

In [125]:
weather

Unnamed: 0,desc,period,short_desc,temp,temp_num
0,,NOW until1:00pm Sat,Wind Advisory,High: 58 °F,58
1,"Today: Rain. High near 58. Breezy, with a sou...",Today,Rain andBreezy,Low: 53 °F,53
2,Tonight: Rain. The rain could be heavy at time...,Tonight,Heavy Rain,Low: 50 °F,50
3,Sunday: Rain. The rain could be heavy at times...,Sunday,Heavy Rainand Windy,High: 56 °F,56
4,"Sunday Night: Rain, mainly before 4am. Low ar...",SundayNight,Rain,Low: 50 °F,50
5,"Monday: A chance of rain before 10am, then a c...",Monday,Chance Rain,High: 56 °F,56
6,Monday Night: A 30 percent chance of rain. Mo...,MondayNight,Chance Rainand Breezy,Low: 51 °F,51
7,"Tuesday: A 50 percent chance of rain. Cloudy,...",Tuesday,Chance Rain,Low: 55°F,55
8,"Tuesday Night: Rain likely. Cloudy, with a lo...",TuesdayNight,Rain Likely,Low: 55°F,55


# D3 PlotlyVisualization

In [100]:
import plotly.plotly as py
import plotly.graph_objs as go

In [101]:
#Setting plotly credentials
py.sign_in('mantejsingh','hQnYIqbxnsuRvQxkVFVW')

In [130]:
#data = [go.Histogram(x=weather['period'],y=weather['temp_num'])]

In [146]:
color=['red','orange','blue','darkgreen','chocolate','gold','cyan','magenta','lime']

In [153]:
google_material_color=['f44336','9C27B0','3F51B5','2196F3','00BCD4','4CAF50','FFC107','795548','607D8B']

In [147]:
#data = [go.Scatter(x = weather['period'],y = weather['temp_num'],mode = 'markers',marker=dict(color = weather['temp_num']))]
#If you want Scatter graph
#data = [go.Scatter(x = weather['period'],y = weather['temp_num'],mode = 'markers',marker=dict(color =color))]

In [154]:
data = [go.Bar(x = weather['period'],y = weather['temp_num'],marker=dict(color =google_material_color))]

In [156]:
layout = go.Layout(
title='This weeks Temp',
xaxis=dict(title='Days'),
yaxis=dict(title='Temp')
)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig)