# Web Scraping Project

### Importing Libraries

In [35]:
from bs4 import BeautifulSoup
import pandas
import requests
import smtplib
import time
import datetime

### Scraping Data from website

In [44]:
# URL to a website which displays daily gas prices in different provinces in Canada

URL = "https://www.caa.ca/gas-prices/"

# headers can be obtained from https://httpbin.org/get

headers = {
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", 
    "Accept-Encoding": "gzip, deflate, br", 
    "Accept-Language": "en-GB,en-US;q=0.9,en;q=0.8,ta;q=0.7", 
    "Host": "httpbin.org", 
    "Referer": "https://www.youtube.com/", 
    "Sec-Ch-Ua": "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"100\", \"Google Chrome\";v=\"100\"", 
    "Sec-Ch-Ua-Mobile": "?0", 
    "Sec-Ch-Ua-Platform": "\"Windows\"", 
    "Sec-Fetch-Dest": "document", 
    "Sec-Fetch-Mode": "navigate", 
    "Sec-Fetch-Site": "cross-site", 
    "Sec-Fetch-User": "?1", 
    "Upgrade-Insecure-Requests": "1", 
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.88 Safari/537.36", 
    "X-Amzn-Trace-Id": "Root=1-6255bdb0-2ab6159e31e3fc34328ce867"}

page = requests.get(URL, headers=headers)

soup1 = BeautifulSoup(page.content, "html.parser")

soup2 = BeautifulSoup(soup1.prettify(),'html.parser')

province_table = soup2.find('table', class_ ='provinces_table')

prov = []
pr   = []
for province in province_table.find_all('tbody'):
    rows = province.find_all('tr')
    for row in rows:
        name = row.find('td', class_ = 'caa_gas_city').text.strip()
        price = row.find('td', class_ = 'caa_gas_price price-down').text.strip()
        prov.append(name)
        pr.append(price[:-2])


### Getting today's date

In [27]:
today = datetime.date.today()

print(today)

2022-04-12


### Writing the obtained data into a csv

In [33]:
import csv

header = ["Province","Price/L","Date"]
data = []

#This should be run only once, rerunning will delete all data stored in the csv.
with open('GasPrices.csv', 'w', newline='', encoding='UTF8') as f:
    writer = csv.writer(f)
    writer.writerow(header)
    for i in range(len(prov)):
        data = [prov[i],pr[i],today]
        writer.writerow(data)
        


### Function which will send a mail when the price reaches a given value

In [None]:
def send_mail():
    server = smtplib.SMTP('smtp.gmail.com',587)
    sender = "xxxyyyzzz@gmail.com"
    receiver = "aaabbbccc@gmail.com"
    password = 'xxxxxxxxxxx'
    Message = "Heyy!, Gas Prices are over 1.75 CAD in your province"
    
    server.starttls()
    server.login(sender,password)
    server.sendmail(sender,receiver,Message)

### Function for appending values into the csv

In [30]:
def check_price():
    URL = "https://www.caa.ca/gas-prices/"

    headers = {
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", 
        "Accept-Encoding": "gzip, deflate, br", 
        "Accept-Language": "en-GB,en-US;q=0.9,en;q=0.8,ta;q=0.7", 
        "Host": "httpbin.org", 
        "Referer": "https://www.youtube.com/", 
        "Sec-Ch-Ua": "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"100\", \"Google Chrome\";v=\"100\"", 
        "Sec-Ch-Ua-Mobile": "?0", 
        "Sec-Ch-Ua-Platform": "\"Windows\"", 
        "Sec-Fetch-Dest": "document", 
        "Sec-Fetch-Mode": "navigate", 
        "Sec-Fetch-Site": "cross-site", 
        "Sec-Fetch-User": "?1", 
        "Upgrade-Insecure-Requests": "1", 
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.88 Safari/537.36", 
        "X-Amzn-Trace-Id": "Root=1-6255bdb0-2ab6159e31e3fc34328ce867"}

    page = requests.get(URL, headers=headers)

    soup1 = BeautifulSoup(page.content, "html.parser")

    soup2 = BeautifulSoup(soup1.prettify(),'html.parser')

    province_table = soup2.find('table', class_ ='provinces_table')
    
    today = datetime.date.today()

    prov = []
    pr   = []
    for province in province_table.find_all('tbody'):
        rows = province.find_all('tr')
        for row in rows:
            name = row.find('td', class_ = 'caa_gas_city').text.strip()
            price = row.find('td', class_ = 'caa_gas_price price-down').text.strip()
            prov.append(name)
            pr.append(price[:-2])
            if name == 'Ontario' and int(price) > 175:
                #send_mail() function given below, so 
                send_mail()
    
    import csv
    

    header = ["Province","Price/L","Date"]
    data = []
    
    #Appending data into the csv
    with open('GasPrices.csv', 'a+', newline='', encoding='UTF8') as f:
        writer = csv.writer(f)
        
        for i in range(len(prov)):
            data = [prov[i],pr[i],today]
            writer.writerow(data)

### The code is run once everyday to collect data

In [None]:
while(True):
    check_price()
    time.sleep(86400)