# <center> WEB_SCRAPING </center>

### 1 ) REQUEST 

In [1]:
from lxml import html
import requests

In [2]:
page = requests.get('http://econpy.pythonanywhere.com/ex/001.html')
tree = html.fromstring(page.content)

In [3]:
tree = html.fromstring(page.content)
tree

<Element html at 0x7f6a1107dcc8>

In [4]:
#This will create a list of buyers:
buyers = tree.xpath('//div[@title="buyer-name"]/text()')
#This will create a list of prices
prices = tree.xpath('//span[@class="item-price"]/text()')

In [5]:
print('Buyers:-',buyers)
print('Prices:-',prices)

Buyers:- ['Carson Busses', 'Earl E. Byrd', 'Patty Cakes', 'Derri Anne Connecticut', 'Moe Dess', 'Leda Doggslife', 'Dan Druff', 'Al Fresco', 'Ido Hoe', 'Howie Kisses', 'Len Lease', 'Phil Meup', 'Ira Pent', 'Ben D. Rules', 'Ave Sectomy', 'Gary Shattire', 'Bobbi Soks', 'Sheila Takya', 'Rose Tattoo', 'Moe Tell']
Prices:- ['$29.95', '$8.37', '$15.26', '$19.25', '$19.25', '$13.99', '$31.57', '$8.49', '$14.47', '$15.86', '$11.11', '$15.98', '$16.27', '$7.50', '$50.85', '$14.26', '$5.68', '$15.00', '$114.07', '$10.09']


### 2) BeautifulSoup

In [6]:
from bs4 import BeautifulSoup
soup = BeautifulSoup(page.content, 'html.parser')

In [7]:
page = requests.get("http://forecast.weather.gov/MapClick.php?lat=37.7772&lon=-122.4168")
soup = BeautifulSoup(page.content, 'html.parser')
seven_day = soup.find(id="seven-day-forecast")
forecast_items = seven_day.find_all(class_="tombstone-container")
tonight = forecast_items[0]
print(tonight.prettify())

<div class="tombstone-container">
 <p class="period-name">
  Today
  <br/>
  <br/>
 </p>
 <p>
  <img alt="Today: Areas of fog before 10am.  Otherwise, mostly cloudy through mid morning, then gradual clearing, with a high near 70. West southwest wind 5 to 13 mph. " class="forecast-icon" src="DualImage.php?i=fg&amp;j=few" title="Today: Areas of fog before 10am.  Otherwise, mostly cloudy through mid morning, then gradual clearing, with a high near 70. West southwest wind 5 to 13 mph. "/>
 </p>
 <p class="short-desc">
  Areas Fog
  <br/>
  then Sunny
 </p>
 <p class="temp temp-high">
  High: 70 °F
 </p>
</div>


In [8]:
period = tonight.find(class_="period-name").get_text()
short_desc = tonight.find(class_="short-desc").get_text()
temp = tonight.find(class_="temp").get_text()

print(period)
print(short_desc)
print(temp)

Today
Areas Fogthen Sunny
High: 70 °F


In [9]:
img = tonight.find("img")
desc = img['title']

print(desc)

Today: Areas of fog before 10am.  Otherwise, mostly cloudy through mid morning, then gradual clearing, with a high near 70. West southwest wind 5 to 13 mph. 


In [10]:
period_tags = seven_day.select(".tombstone-container .period-name")
periods = [pt.get_text() for pt in period_tags]
periods

['Today',
 'Tonight',
 'Saturday',
 'SaturdayNight',
 'Sunday',
 'SundayNight',
 'Monday',
 'MondayNight',
 'Tuesday']

In [11]:
short_descs = [sd.get_text() for sd in seven_day.select(".tombstone-container .short-desc")]
temps = [t.get_text() for t in seven_day.select(".tombstone-container .temp")]
descs = [d["title"] for d in seven_day.select(".tombstone-container img")]
print(short_descs)
print(temps)
print(descs)

['Areas Fogthen Sunny', 'Partly Cloudy', 'Mostly Sunny', 'Mostly Clear', 'Sunny', 'Mostly Clear', 'Mostly Sunny', 'Partly Cloudy', 'Mostly Sunny']
['High: 70 °F', 'Low: 57 °F', 'High: 72 °F', 'Low: 57 °F', 'High: 72 °F', 'Low: 57 °F', 'High: 69 °F', 'Low: 56 °F', 'High: 68 °F']
['Today: Areas of fog before 10am.  Otherwise, mostly cloudy through mid morning, then gradual clearing, with a high near 70. West southwest wind 5 to 13 mph. ', 'Tonight: Partly cloudy, with a low around 57. West southwest wind 7 to 15 mph, with gusts as high as 20 mph. ', 'Saturday: Mostly sunny, with a high near 72. West southwest wind 6 to 13 mph. ', 'Saturday Night: Mostly clear, with a low around 57. West wind 7 to 15 mph, with gusts as high as 18 mph. ', 'Sunday: Sunny, with a high near 72. West southwest wind 5 to 13 mph. ', 'Sunday Night: Mostly clear, with a low around 57.', 'Monday: Mostly sunny, with a high near 69.', 'Monday Night: Partly cloudy, with a low around 56.', 'Tuesday: Mostly sunny, with 

In [12]:
import pandas as pd
weather = pd.DataFrame({
        "period": periods,
         "short_desc": short_descs,
         "temp": temps,
         "desc":descs
    })
weather

Unnamed: 0,period,short_desc,temp,desc
0,Today,Areas Fogthen Sunny,High: 70 °F,"Today: Areas of fog before 10am. Otherwise, m..."
1,Tonight,Partly Cloudy,Low: 57 °F,"Tonight: Partly cloudy, with a low around 57. ..."
2,Saturday,Mostly Sunny,High: 72 °F,"Saturday: Mostly sunny, with a high near 72. W..."
3,SaturdayNight,Mostly Clear,Low: 57 °F,"Saturday Night: Mostly clear, with a low aroun..."
4,Sunday,Sunny,High: 72 °F,"Sunday: Sunny, with a high near 72. West south..."
5,SundayNight,Mostly Clear,Low: 57 °F,"Sunday Night: Mostly clear, with a low around 57."
6,Monday,Mostly Sunny,High: 69 °F,"Monday: Mostly sunny, with a high near 69."
7,MondayNight,Partly Cloudy,Low: 56 °F,"Monday Night: Partly cloudy, with a low around..."
8,Tuesday,Mostly Sunny,High: 68 °F,"Tuesday: Mostly sunny, with a high near 68."


In [13]:
temp_nums = weather["temp"].str.extract("(?P<temp_num>\d+)", expand=False)
weather["temp_num"] = temp_nums.astype('int')
temp_nums

0    70
1    57
2    72
3    57
4    72
5    57
6    69
7    56
8    68
Name: temp_num, dtype: object

In [14]:
weather["temp_num"].mean()

64.22222222222223

In [15]:
is_night = weather["temp"].str.contains("Low")
weather["is_night"] = is_night
is_night

0    False
1     True
2    False
3     True
4    False
5     True
6    False
7     True
8    False
Name: temp, dtype: bool

In [16]:
weather[is_night]

Unnamed: 0,period,short_desc,temp,desc,temp_num,is_night
1,Tonight,Partly Cloudy,Low: 57 °F,"Tonight: Partly cloudy, with a low around 57. ...",57,True
3,SaturdayNight,Mostly Clear,Low: 57 °F,"Saturday Night: Mostly clear, with a low aroun...",57,True
5,SundayNight,Mostly Clear,Low: 57 °F,"Sunday Night: Mostly clear, with a low around 57.",57,True
7,MondayNight,Partly Cloudy,Low: 56 °F,"Monday Night: Partly cloudy, with a low around...",56,True


### 3) Selenium

Selenium Python bindings provide a convenient API to access Selenium WebDrivers like Firefox, Ie, Chrome, Remote

### 4) Scrapy 