## LTC Web Scraping

[How Ontario is responding to Covid-19](https://www.ontario.ca/page/how-ontario-is-responding-covid-19)

**Authors:** KT and Shreeram

*Note:* must run selenium server in command prompt

`java -jar /path/to/selenium-server`

must also have installed

`brew cask install chromedriver`

`brew cask install chromium`

---

In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
r = requests.get('https://www.ontario.ca/page/how-ontario-is-responding-covid-19#section-1')
s = BeautifulSoup(r.text,'html5lib')

In [2]:
r.request.headers

{'User-Agent': 'python-requests/2.23.0', 'Accept-Encoding': 'gzip, deflate', 'Accept': '*/*', 'Connection': 'keep-alive'}

In [3]:
r.headers['Last-Modified']

'Thu, 07 May 2020 22:05:05 GMT'

#### Method 5: Selenium Server

In [4]:
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities

driver = webdriver.Remote(
   command_executor='http://127.0.0.1:4444/wd/hub',
   desired_capabilities=DesiredCapabilities.CHROME)

In [5]:
my_url = 'https://www.ontario.ca/page/how-ontario-is-responding-covid-19#section-1'
driver.get(my_url)

In [6]:
df = pd.read_html(driver.page_source)[3]
df2 = pd.read_html(driver.page_source)[4]

In [7]:
df.to_csv('ltc-active.csv')
df2.to_csv('ltc-inactive.csv')

In [8]:
df2['Status'] = 'Inactive'

In [9]:
df['Status'] = 'Active'

In [10]:
all_ltc = df.append(df2)

In [12]:
all_ltc.to_csv('../data/merged_ltc.csv')

---
# Other Attempts:

---

#### Methods 1: Firefox driver with options and binary

In [109]:
import selenium 
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary

options = Options()
options.binary_location = FirefoxBinary('/usr/local/bin')
selenium.webdriver.firefox.webdriver.WebDriver(firefox_options=options)

  import sys


PermissionError: [Errno 13] Permission denied: '/usr/local/bin'

#### Methods 2: Firefox driver with binary and desired capabilities

In [110]:
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities

cap = DesiredCapabilities().FIREFOX
cap["marionette"] = False
browser = webdriver.Firefox(capabilities=cap, executable_path="/usr/local/bin/geckodriver.exe")
browser.get(my_url)
browser.quit()

AttributeError: 'str' object has no attribute 'launch_browser'

#### Methods 3: Firefox driver

In [106]:
with webdriver.Firefox() as driver:
    driver.get('my_url')
    tds = driver.get_element_by_tag_name('table')
    for i in range(0, len(table)):
        bs = bs4.BeautifulSoup(i, 'html.parser')
        print(bs)

AttributeError: 'str' object has no attribute 'launch_browser'

#### Methods 4: Faking a browser visit

In [111]:
import requests

headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}

response = requests.get(my_url, headers=headers)
print(response.content)






In [115]:
profile = webdriver.FirefoxProfile('/usr/local/bin')
driver = webdriver.Firefox(profile)

AttributeError: 'str' object has no attribute 'launch_browser'

In [34]:
s

<!DOCTYPE html>
<html class="no-js" lang="en"><head> <meta charset="utf-8"/> <meta content="width=device-width,initial-scale=1" name="viewport"/> <meta content="IE=edge" http-equiv="X-UA-Compatible"/> <meta content="0f38a1022cdb82bc265752c28d3c370df5c9dafd" name="sha"/> <title></title> <base href="/"/> <link href="/css/combined.css" media="screen" rel="stylesheet"/> <link href="/css/print.css" media="print" rel="stylesheet"/> <script src="/vendor/modernizr/modernizr.js"></script> <script>!function(a,e,o,n,c,i,t){a.GoogleAnalyticsObject=c,a.ga=a.ga||function(){(a.ga.q=a.ga.q||[]).push(arguments)},a.ga.l=1*new Date,i=e.createElement(o),t=e.getElementsByTagName(o)[0],i.async=1,i.src="//www.google-analytics.com/analytics.js",t.parentNode.insertBefore(i,t)}(window,document,"script",0,"ga"),ga("create","UA-21003310-6",{cookieDomain:"auto",allowLinker:!0}),ga("create","UA-48649528-1",{cookieDomain:"auto",allowLinker:!0,name:"ServiceOntario"}),ga("require","linker"),ga("linker:autoLink",["serv