# Major Philippine Labor Law Resources

## import libraries

In [63]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.common.exceptions import NoSuchElementException, StaleElementReferenceException
import pandas as pd

## setup driver

In [64]:
PATH = "C:\Program Files (x86)\Chrome Driver\chromedriver.exe"
service = Service(executable_path=PATH)
driver = webdriver.Chrome(service=service)

# used so that everytime browser updates there is no need for downloading a new
# version of the chromedriver.exe application to match version of newly updated browser
# from webdriver_manager.chrome import ChromeDriverManager
# service = Service(executable_path=ChromeDriverManager().install())

## returns links and link names

In [65]:
def collect_link_details(driver: webdriver.Chrome, link_selector: str) -> pd.DataFrame:
    try:
        driver.get("https://www.chanrobles.com/majorphilippinelaborlawresources.html")
        wait_val = WebDriverWait(driver, timeout=10).until(lambda driver: driver.execute_script('return document.readyState === "complete"'))

        # narrow down using selector 
        content = driver.find_element(By.CSS_SELECTOR, link_selector)
        links = content.find_elements(By.TAG_NAME, "a")

        # collect link texts and hrefs
        link_texts = []
        link_hrefs = []
        for link in links:
            # extract href of link through get dom attribute
            # extract text of link through self.text
            link_hrefs.append(link.get_attribute('href'))
            link_texts.append(link.text)
        return pd.DataFrame({'link_text': link_texts, 'href': link_hrefs})
    except TimeoutError as error:
        print("Error {} has occured".format(error))
    
    

html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr:first-of-type > td > div > div:first-of-type > div > table > tbody > tr > td > div:nth-of-type(2) > center:nth-of-type(2) > center:nth-of-type(2) > center > table > tbody

use this selector to extract all links

In [66]:
link_details = collect_link_details(
    driver, 
    "html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr:first-of-type > td > div > div:first-of-type > div > table > tbody > tr > td > div:nth-of-type(2) > center:nth-of-type(2) > center:nth-of-type(2) > center > table > tbody"    
)

In [67]:
link_details

Unnamed: 0,link_text,href
0,Labor Circular On-Line,https://www.chanrobles.com/legal4circular.htm#...
1,Labor Code of the Philippines,https://www.chanrobles.com/legal4labor.htm#LAB...
2,Amendments to the Labor Code (Republic Act No....,https://www.chanrobles.com/republicactno6715.html
3,Amendments to the Labor Code (Presidential Dec...,https://www.chanrobles.com/presidentialdecreen...
4,"An Act to Amend Articles 212, 217, 218, 255, 2...",https://www.chanrobles.com/bataspambansabilang...
...,...,...
65,AN ACT AUTHORIZING ANNUAL CHRISTMAS BONUS TO N...,https://www.chanrobles.com/republicactno6686.htm
66,COMPENSATION AND POSITION CLASSIFICATION ACT O...,https://www.chanrobles.com/republicactno6758.htm
67,AN ACT TO GRANT CIVIL SERVICE ELIGIBILITY UNDE...,https://www.chanrobles.com/republicactno6850.htm
68,Cooperative Code of the Philippines [Republic ...,https://www.chanrobles.com/republicactno6938.htm


# Export DataFrame object to .csv file

In [68]:
link_details.to_csv('Major Philippine Labor Law Resources link details data.csv')

## returns data frame of every law under the Major Philippine Labor Law Resources category

# Main

In [69]:
import test

# https://www.chanrobles.com/majorphilippinelaborlawresources.html
major_philippine_labor_law_resources_links = collect_link_details(
    driver, 
    "html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr:first-of-type > td > div > div:first-of-type > div > table > tbody > tr > td > div:nth-of-type(2) > center:nth-of-type(2) > center:nth-of-type(2) > center > table > tbody"    
)

page_details = test.collect_content(
    driver, 
    major_philippine_labor_law_resources_links['href'], 
    "html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr:first-of-type > td > div > div:first-of-type > h2",
    "html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr:first-of-type > td > div > div:first-of-type > div > table > tbody > tr > td > div:nth-of-type(2)",
    threshold=800
)

Wait value: True
Page title: LABOR CIRCULAR ON-LINE - CHAN ROBLES AND ASSOCIATES LAW FIRM

[92m Header: LABOR CIRCULAR ON-LINE - CHAN ROBLES AND ASSOCIATES LAW FIRM
[00m
[96m Text content:  


ON-LINE


This web page features the internet version of the
Labor Circular
regularly issued by the
Labor Division of Chan Robles & Associates Law Firm
to its clients and friends since 1991.
Related labor laws and issuances are contained in other pages linked to this page.
 
   LABOR CIRCULAR
ON-LINE
  No. 61                                       Series of 1998
  [previous issue, click here]
TOPIC At a Glance 
PETITIONS FOR CERTIORARI UNDER RULE 65 
OF THE RULES OF COURT
FROM DECISIONS OF THE NLRC NOW TO BE INITIALLY FILED 
WITH THE COURT OF APPEALS AND NO LONGER 
DIRECTLY WITH THE SUPREME COURT
[en banc]
[New Interpretation of "Appeals" from NLRC Decisions]
Case Title:
ST. MARTIN FUNERAL HOME VS.
NATIONAL LABOR RELATIONS COMMISSION, ET AL.
[G. R. No. 130866, September 16, 1998]
[en banc]
FACT

In [70]:
page_details

Unnamed: 0,page_link,page_header,page_text_content
0,https://www.chanrobles.com/legal4circular.htm#...,LABOR CIRCULAR ON-LINE - CHAN ROBLES AND ASSOC...,\n\n\nON-LINE\n\n\nThis web page features the...
1,https://www.chanrobles.com/presidentialdecreen...,AMENDMENTS TO THE LABOR CODE (PRESIDENTIAL DEC...,PHILIPPINE LABOR CIRCULAR\nON-LINE\n\n\n\nThis...
2,https://www.chanrobles.com/bataspambansabilang...,"PHILIPPINE LAWS, STATUTES AND CODES - CHAN ROB...","A collection of Philippine laws, statutes and ..."
3,https://www.chanrobles.com/bataspambansabilang...,"PHILIPPINE LAWS, STATUTES AND CODES - CHAN ROB...","A collection of Philippine laws, statutes an..."
4,https://www.chanrobles.com/bataspambansabilang...,"PHILIPPINE LAWS, STATUTES AND CODES - CHAN ROB...","A collection of Philippine laws, statutes an..."
5,https://www.chanrobles.com/bataspambansabilang...,"PHILIPPINE LAWS, STATUTES AND CODES - CHAN ROB...","A collection of Philippine laws, statutes and ..."
6,https://www.chanrobles.com/guidelinesgoverning...,GUIDELINES GOVERNING LABOR RELATIONS - CHAN RO...,This web page features the full text of the\nG...
7,https://www.chanrobles.com/guidelinesonpolicec...,GUIDELINES FOR THE CONDUCT OF INP/AFP PERSONNE...,This web page features the full text of the\nG...
8,https://www.chanrobles.com/amendedrulesonemplo...,AMENDED RULES ON EMPLOYEES' COMPENSATION - CHA...,This web page features the full text of the\nA...
9,https://www.chanrobles.com/doledepartmentorder...,"DOLE DEPARTMENT ORDER NO. 9, SERIES OF 1997 (A...",This web page features the full text of\nDepar...


## To do list
problem 1: since we already have the link for each page that
contains the text content we need now only extract the text
via the html tag that holds it the problem is would the html 
tag be the same for each page such that we can only use one
statement for each page and grab the non unique html tag 
that contains this text content, such that we can replicate 
this use of the html tag across all pages? if it's not we 
need to figure out a way to grab the text content of each page
iteratively taking into account pages with different html tags
that contain the text content

idea: 
1. if there is a new kind of xpath or html path that contains the
text content then add this to the list of lookups to use when opening
each link to see what xpath matches to the html element containing 
the text content of the page 

check if the first xpath of the first page is the same as the page in
the next iteration if not add this xpath to the list
in next iteration check if xpath matches other xpaths in list, and then
extract its content as done in the other xpaths

2. but since xpaths can't be accessed in chromes devtools inspect the
page for its content manually and search the element containing the
text content and find this element by its path, class, id, or css selector
once you loop there is a chance that other pages may not contain this
element and so what have you got to lose??

when other links cause a NoSuchElementException it is fine however in
links that cause no NoSuchElementException error save the link and mark it
so you can check the other links that raise the error and inspect manually
the html element that hold the text content

method:

figures:

cases:

problem 2: NoSuchElementException errors can sometimes occur in the
text_content_selector


problem 3: list out important details like:
* number of links all in all
* number of links that worked without raising NoSuchElementException error
either in text_content_selector or in header_selector
* number of links that did raise the NoSuchElementException error either in
text_content_selector or in header_selector
* the links that did work
* the links that raised a NoSuchElementException that will be up for
manual inspection and collect their xpaths to be converted to css selectors
for next job

problem 4: structuring the data frame
* see pandas documentation to setup columns/fields/attributes of dataset/data frame
* fields will be link, link text, page header, and pages text content, date when law was passed
* aside from header and text content date when law was passed must be also a part of it




## inspecting reject links manually

ACCEPTED
https://www.chanrobles.com/legal4circular.htm#LABOR%20CIRCULAR%20ON-LINE

header:
/html/body/div[1]/div[2]/div/div[5]/div/table/tbody/tr[1]/td/div/div[1]/h2
    html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr:first-of-type > td > div > div:first-of-type > h2

text content:
/html/body/div[1]/div[2]/div/div[5]/div/table/tbody/tr[1]/td/div/div[1]/div/table/tbody/tr/td/div[2]
html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr:first-of-type > td > div > div:first-of-type > div > table > tbody > tr > td > div:nth-of-type(2)

REJECTED
https://www.chanrobles.com/legal4labor.htm#LABOR%20CODE%20OF%20THE%20PHILIPPINES%20%5BFULL%20TEXT%5D

header:
/html/body/div[1]/div/div/div[2]/div/div/div[1]/div/h1[2]/strong
html > body > div:first-of-type > div > div > div:nth-of-type(2) > div > div > div:first-of-type > div > h1:nth-of-type(2) > strong

text content:
/html/body/div[1]/div/div/div[2]/div/div/div[1]/div/div/div[1]
html > body > div:first-of-type > div > div > div:nth-of-type(2) > div > div > div:first-of-type > div > div > div:first-of-type
/html/body/div[1]/div/div/div[2]/div/div/div[1]/div/div
html > body > div:first-of-type > div > div > div:nth-of-type(2) > div > div > div:first-of-type > div > div

https://www.chanrobles.com/republicactno6715.html
header:
html > body > div:first-of-type > div:first-of-type > div > div > div:first-of-type > h3

text content:
html > body > div:first-of-type > div:first-of-type > div > div

https://www.chanrobles.com/republicactno7730.htm#.Y1pFTHZByUk
header:
html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr:first-of-type > td > div:first-of-type > div > table > tbody > tr > td > div:nth-of-type(2) > center > center:first-of-type

text content:
html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr:first-of-type > td > div:first-of-type > div > table > tbody > tr > td > div:nth-of-type(2) > center > div

## 2nd pass

https://www.chanrobles.com/legal4labor.htm#LABOR%20CODE%20OF%20THE%20PHILIPPINES%20%5BFULL%20TEXT%5D

header:
html > body > div:first-of-type > div > div > div:nth-of-type(2) > div > div > div:first-of-type > div > h1:nth-of-type(2) > strong

text content:
html > body > div:first-of-type > div > div > div:nth-of-type(2) > div > div > div:first-of-type > div > div > div:first-of-type
html > body > div:first-of-type > div > div > div:nth-of-type(2) > div > div > div:first-of-type > div > div

In [71]:
reject_links = ['https://www.chanrobles.com/legal4labor.htm#LABOR%20CODE%20OF%20THE%20PHILIPPINES%20%5BFULL%20TEXT%5D', 'https://www.chanrobles.com/republicactno6715.html', 'https://www.chanrobles.com/implementingrulesofthelaborcode.htm', 'https://www.chanrobles.com/republicactno7730.htm', 'https://www.chanrobles.com/republicactno7699.htm', 'https://www.chanrobles.com/republicactno9241.html', 'https://www.chanrobles.com/republicactno7171.htm', 'https://www.chanrobles.com/republicactno7881.htm', 'https://www.chanrobles.com/republicactno7305.htm', 'https://www.chanrobles.com/republicactno8759.htm', 'https://www.chanrobles.com/republicactno7641.htm', 'https://www.chanrobles.com/republicactno8558.htm', 'https://www.chanrobles.com/republicactno7277.htm', 'https://www.chanrobles.com/republicactno7610.html', 'https://www.chanrobles.com/republicactno7658.htm', 'https://www.chanrobles.com/republicactno9231.html', 'https://www.chanrobles.com/republicactno9231rules.html', 'https://www.chanrobles.com/republicactno7610trafficofchildren.html', 'https://www.chanrobles.com/republicactno7877rules.htm', 'https://www.chanrobles.com/republicactno7192.htm', 'https://www.chanrobles.com/republicactno7882.htm', 'https://www.chanrobles.com/republicactno6949.htm', 'https://www.chanrobles.com/republicactno9177.html', 'https://www.chanrobles.com/republicactno6725.htm', 'https://www.chanrobles.com/pslmcresolution032001.htm', 'https://www.chanrobles.com/republicactno7686.htm', 'https://www.chanrobles.com/republicactno7368.htm', 'https://www.chanrobles.com/republicactno6810.htm', 'https://www.chanrobles.com/republicactno7883.htm', 'https://www.chanrobles.com/republicactno7323.htm', 'https://www.chanrobles.com/republicactno6982.htm', 'https://www.chanrobles.com/republicactno6640.htm', 'https://www.chanrobles.com/republicactno6728.htm', 'https://www.chanrobles.com/republicactno6971.htm', 'https://www.chanrobles.com/republicactno6654.htm', 'https://www.chanrobles.com/republicactno6656.htm', 'https://www.chanrobles.com/republicactno6683.htm', 'https://www.chanrobles.com/republicactno6686.htm', 'https://www.chanrobles.com/republicactno6758.htm', 'https://www.chanrobles.com/republicactno6850.htm', 'https://www.chanrobles.com/republicactno6938.htm', 'https://www.chanrobles.com/republicactno6939.htm']

page_details = pd.concat([page_details.copy(), test.collect_content(
    driver, 
    reject_links,
    "html > body > div:first-of-type > div > div > div:nth-of-type(2) > div > div > div:first-of-type > div > h1:nth-of-type(2) > strong",
    "html > body > div:first-of-type > div > div > div:nth-of-type(2) > div > div > div:first-of-type > div > div > div:first-of-type",
    threshold=800
)], ignore_index=True) 

Wait value: True
Page title: Preliminary Title : Labor Code of the Philippines : Presidential Decree No. 442, as Amended

[92m Header: THE
LABOR CODE
OF THE
PHILIPPINES
PRESIDENTIAL DECREE NO. 442,
AS AMENDED
[00m
[96m Text content: PRELIMINARY TITLE
  Chapter I
GENERAL PROVISIONS
  ARTICLE 1. Name of Decree. - This Decree shall be known as the "Labor Code of the Philippines".
  ART. 2. Date of effectivity. - This Code shall take effect six (6) months after its promulgation.
ART. 3. Declaration of basic policy. - The State shall afford protection to labor, promote full employment, ensure equal work opportunities regardless of sex, race or creed and regulate the relations between workers and employers. The State shall assure the rights of workers to self-organization, collective bargaining, security of tenure, and just and humane conditions of work.
  ART. 4. Construction in favor of labor. - All doubts in the implementation and interpretation of the provisions of this Code, includin

In [72]:
page_details

Unnamed: 0,page_link,page_header,page_text_content
0,https://www.chanrobles.com/legal4circular.htm#...,LABOR CIRCULAR ON-LINE - CHAN ROBLES AND ASSOC...,\n\n\nON-LINE\n\n\nThis web page features the...
1,https://www.chanrobles.com/presidentialdecreen...,AMENDMENTS TO THE LABOR CODE (PRESIDENTIAL DEC...,PHILIPPINE LABOR CIRCULAR\nON-LINE\n\n\n\nThis...
2,https://www.chanrobles.com/bataspambansabilang...,"PHILIPPINE LAWS, STATUTES AND CODES - CHAN ROB...","A collection of Philippine laws, statutes and ..."
3,https://www.chanrobles.com/bataspambansabilang...,"PHILIPPINE LAWS, STATUTES AND CODES - CHAN ROB...","A collection of Philippine laws, statutes an..."
4,https://www.chanrobles.com/bataspambansabilang...,"PHILIPPINE LAWS, STATUTES AND CODES - CHAN ROB...","A collection of Philippine laws, statutes an..."
5,https://www.chanrobles.com/bataspambansabilang...,"PHILIPPINE LAWS, STATUTES AND CODES - CHAN ROB...","A collection of Philippine laws, statutes and ..."
6,https://www.chanrobles.com/guidelinesgoverning...,GUIDELINES GOVERNING LABOR RELATIONS - CHAN RO...,This web page features the full text of the\nG...
7,https://www.chanrobles.com/guidelinesonpolicec...,GUIDELINES FOR THE CONDUCT OF INP/AFP PERSONNE...,This web page features the full text of the\nG...
8,https://www.chanrobles.com/amendedrulesonemplo...,AMENDED RULES ON EMPLOYEES' COMPENSATION - CHA...,This web page features the full text of the\nA...
9,https://www.chanrobles.com/doledepartmentorder...,"DOLE DEPARTMENT ORDER NO. 9, SERIES OF 1997 (A...",This web page features the full text of\nDepar...


## 3rd pass

https://www.chanrobles.com/republicactno6715.html
header:
html > body > div:first-of-type > div:first-of-type > div > div > div:first-of-type > h3

text content:
html > body > div:first-of-type > div:first-of-type > div > div

In [73]:
reject_links = ['https://www.chanrobles.com/republicactno6715.html', 'https://www.chanrobles.com/implementingrulesofthelaborcode.htm', 'https://www.chanrobles.com/republicactno7730.htm', 'https://www.chanrobles.com/republicactno7699.htm', 'https://www.chanrobles.com/republicactno9241.html', 'https://www.chanrobles.com/republicactno7171.htm', 'https://www.chanrobles.com/republicactno7881.htm', 'https://www.chanrobles.com/republicactno7305.htm', 'https://www.chanrobles.com/republicactno8759.htm', 'https://www.chanrobles.com/republicactno7641.htm', 'https://www.chanrobles.com/republicactno8558.htm', 'https://www.chanrobles.com/republicactno7277.htm', 'https://www.chanrobles.com/republicactno7610.html', 'https://www.chanrobles.com/republicactno7658.htm', 'https://www.chanrobles.com/republicactno9231.html', 'https://www.chanrobles.com/republicactno9231rules.html', 'https://www.chanrobles.com/republicactno7610trafficofchildren.html', 'https://www.chanrobles.com/republicactno7877rules.htm', 'https://www.chanrobles.com/republicactno7192.htm', 'https://www.chanrobles.com/republicactno7882.htm', 'https://www.chanrobles.com/republicactno6949.htm', 'https://www.chanrobles.com/republicactno9177.html', 'https://www.chanrobles.com/republicactno6725.htm', 'https://www.chanrobles.com/pslmcresolution032001.htm', 'https://www.chanrobles.com/republicactno7686.htm', 'https://www.chanrobles.com/republicactno7368.htm', 'https://www.chanrobles.com/republicactno6810.htm', 'https://www.chanrobles.com/republicactno7883.htm', 'https://www.chanrobles.com/republicactno7323.htm', 'https://www.chanrobles.com/republicactno6982.htm', 'https://www.chanrobles.com/republicactno6640.htm', 'https://www.chanrobles.com/republicactno6728.htm', 'https://www.chanrobles.com/republicactno6971.htm', 'https://www.chanrobles.com/republicactno6654.htm', 'https://www.chanrobles.com/republicactno6656.htm', 'https://www.chanrobles.com/republicactno6683.htm', 'https://www.chanrobles.com/republicactno6686.htm', 'https://www.chanrobles.com/republicactno6758.htm', 'https://www.chanrobles.com/republicactno6850.htm', 'https://www.chanrobles.com/republicactno6938.htm', 'https://www.chanrobles.com/republicactno6939.htm']

page_details = pd.concat([page_details.copy(), test.collect_content(
    driver, 
    reject_links,
    "html > body > div:first-of-type > div:first-of-type > div > div > div:first-of-type > h3",
    "html > body > div:first-of-type > div:first-of-type > div > div",
    threshold=800
)], ignore_index=True)

Wait value: True
Page title: REPUBLIC ACT NO. 6715 - AN ACT TO EXTEND PROTECTION TO LABOR, STRENGTHEN THE CONSTITUTIONAL RIGHTS OF WORKERS TO SELF-ORGANIZATION, COLLECTIVE BARGAINING AND PEACEFUL CONCERTED ACTIVITIES, FOSTER INDUSTRIAL PEACE AND HARMONY, PROMOTE THE PREFERENTIAL USE OF VOLUNTARY MODES OF SETTLING LABOR DISPUTES, AND REORGANIZE THE NATIONAL LABOR RELATIONS COMMISSION, AMENDING FOR THESE PURPOSES CERTAIN PROVISIONS OF PRESIDENTIAL DECREE NO. 442, AS AMENDED, OTHERWISE KNOWN AS THE LABOR CODE OF THE PHILIPPINES, APPROPRIATING FUNDS THEREFORE AND FOR OTHER PURPOSES

[92m Header: REPUBLIC ACT NO. 6715 - AN ACT TO EXTEND PROTECTION TO LABOR, STRENGTHEN THE CONSTITUTIONAL RIGHTS OF WORKERS TO SELF-ORGANIZATION, COLLECTIVE BARGAINING AND PEACEFUL CONCERTED ACTIVITIES, FOSTER INDUSTRIAL PEACE AND HARMONY, PROMOTE THE PREFERENTIAL USE OF VOLUNTARY MODES OF SETTLING LABOR DISPUTES, AND REORGANIZE THE NATIONAL LABOR RELATIONS COMMISSION, AMENDING FOR THESE PURPOSES CERTAIN PROVIS

In [74]:
page_details

Unnamed: 0,page_link,page_header,page_text_content
0,https://www.chanrobles.com/legal4circular.htm#...,LABOR CIRCULAR ON-LINE - CHAN ROBLES AND ASSOC...,\n\n\nON-LINE\n\n\nThis web page features the...
1,https://www.chanrobles.com/presidentialdecreen...,AMENDMENTS TO THE LABOR CODE (PRESIDENTIAL DEC...,PHILIPPINE LABOR CIRCULAR\nON-LINE\n\n\n\nThis...
2,https://www.chanrobles.com/bataspambansabilang...,"PHILIPPINE LAWS, STATUTES AND CODES - CHAN ROB...","A collection of Philippine laws, statutes and ..."
3,https://www.chanrobles.com/bataspambansabilang...,"PHILIPPINE LAWS, STATUTES AND CODES - CHAN ROB...","A collection of Philippine laws, statutes an..."
4,https://www.chanrobles.com/bataspambansabilang...,"PHILIPPINE LAWS, STATUTES AND CODES - CHAN ROB...","A collection of Philippine laws, statutes an..."
5,https://www.chanrobles.com/bataspambansabilang...,"PHILIPPINE LAWS, STATUTES AND CODES - CHAN ROB...","A collection of Philippine laws, statutes and ..."
6,https://www.chanrobles.com/guidelinesgoverning...,GUIDELINES GOVERNING LABOR RELATIONS - CHAN RO...,This web page features the full text of the\nG...
7,https://www.chanrobles.com/guidelinesonpolicec...,GUIDELINES FOR THE CONDUCT OF INP/AFP PERSONNE...,This web page features the full text of the\nG...
8,https://www.chanrobles.com/amendedrulesonemplo...,AMENDED RULES ON EMPLOYEES' COMPENSATION - CHA...,This web page features the full text of the\nA...
9,https://www.chanrobles.com/doledepartmentorder...,"DOLE DEPARTMENT ORDER NO. 9, SERIES OF 1997 (A...",This web page features the full text of\nDepar...


## 4th pass

https://www.chanrobles.com/republicactno7730.htm#.Y1pFTHZByUk

header:
html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr:first-of-type > td > div:first-of-type > div > table > tbody > tr > td > div:nth-of-type(2) > center > center:first-of-type

text content:
html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr:first-of-type > td > div:first-of-type > div > table > tbody > tr > td > div:nth-of-type(2) > center > div

In [75]:
reject_links = ['https://www.chanrobles.com/implementingrulesofthelaborcode.htm', 'https://www.chanrobles.com/republicactno7730.htm', 'https://www.chanrobles.com/republicactno7699.htm', 'https://www.chanrobles.com/republicactno7171.htm', 'https://www.chanrobles.com/republicactno7881.htm', 'https://www.chanrobles.com/republicactno7305.htm', 'https://www.chanrobles.com/republicactno8759.htm', 'https://www.chanrobles.com/republicactno7641.htm', 'https://www.chanrobles.com/republicactno8558.htm', 'https://www.chanrobles.com/republicactno7277.htm', 'https://www.chanrobles.com/republicactno7658.htm', 'https://www.chanrobles.com/republicactno7610trafficofchildren.html', 'https://www.chanrobles.com/republicactno7877rules.htm', 'https://www.chanrobles.com/republicactno7192.htm', 'https://www.chanrobles.com/republicactno7882.htm', 'https://www.chanrobles.com/republicactno6949.htm', 'https://www.chanrobles.com/republicactno6725.htm', 'https://www.chanrobles.com/pslmcresolution032001.htm', 'https://www.chanrobles.com/republicactno7686.htm', 'https://www.chanrobles.com/republicactno7368.htm', 'https://www.chanrobles.com/republicactno6810.htm', 'https://www.chanrobles.com/republicactno7883.htm', 'https://www.chanrobles.com/republicactno7323.htm', 'https://www.chanrobles.com/republicactno6982.htm', 'https://www.chanrobles.com/republicactno6640.htm', 'https://www.chanrobles.com/republicactno6728.htm', 'https://www.chanrobles.com/republicactno6971.htm', 'https://www.chanrobles.com/republicactno6654.htm', 'https://www.chanrobles.com/republicactno6656.htm', 'https://www.chanrobles.com/republicactno6683.htm', 'https://www.chanrobles.com/republicactno6686.htm', 'https://www.chanrobles.com/republicactno6758.htm', 'https://www.chanrobles.com/republicactno6850.htm', 'https://www.chanrobles.com/republicactno6938.htm', 'https://www.chanrobles.com/republicactno6939.htm']

page_details = pd.concat([page_details.copy(), test.collect_content(
    driver, 
    reject_links,
    "html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr:first-of-type > td > div:first-of-type > div > table > tbody > tr > td > div:nth-of-type(2) > center > center:first-of-type",
    "html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr:first-of-type > td > div:first-of-type > div > table > tbody > tr > td > div:nth-of-type(2) > center > div",
    threshold=800
)], ignore_index=True)

Wait value: True
Page title: OMNIBUS RULES TO IMPLEMENT THE LABOR CODE OF THE PHILIPPINES - CHAN ROBLES VIRTUAL LAW LIBRARY

[92m Header: 
[00m
Error Message: no such element: Unable to locate element: {"method":"css selector","selector":"html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr:first-of-type > td > div:first-of-type > div > table > tbody > tr > td > div:nth-of-type(2) > center > div"}
  (Session info: chrome=107.0.5304.88)
Stacktrace:
Backtrace:
	Ordinal0 [0x00661ED3+2236115]
	Ordinal0 [0x005F92F1+1807089]
	Ordinal0 [0x005066FD+812797]
	Ordinal0 [0x005355DF+1005023]
	Ordinal0 [0x005357CB+1005515]
	Ordinal0 [0x00567632+1209906]
	Ordinal0 [0x00551AD4+1120980]
	Ordinal0 [0x005659E2+1202658]
	Ordinal0 [0x005518A6+1120422]
	Ordinal0 [0x0052A73D+960317]
	Ordinal0 [0x0052B71F+964383]
	GetHandleVerifier [0x0090E7E2+2743074]
	GetHandleVerifier [0x009008D4+2685972]
	GetHandleVerifier [0x006F2BAA+532202]
	GetHandleVerifier [0x00

In [76]:
page_details

Unnamed: 0,page_link,page_header,page_text_content
0,https://www.chanrobles.com/legal4circular.htm#...,LABOR CIRCULAR ON-LINE - CHAN ROBLES AND ASSOC...,\n\n\nON-LINE\n\n\nThis web page features the...
1,https://www.chanrobles.com/presidentialdecreen...,AMENDMENTS TO THE LABOR CODE (PRESIDENTIAL DEC...,PHILIPPINE LABOR CIRCULAR\nON-LINE\n\n\n\nThis...
2,https://www.chanrobles.com/bataspambansabilang...,"PHILIPPINE LAWS, STATUTES AND CODES - CHAN ROB...","A collection of Philippine laws, statutes and ..."
3,https://www.chanrobles.com/bataspambansabilang...,"PHILIPPINE LAWS, STATUTES AND CODES - CHAN ROB...","A collection of Philippine laws, statutes an..."
4,https://www.chanrobles.com/bataspambansabilang...,"PHILIPPINE LAWS, STATUTES AND CODES - CHAN ROB...","A collection of Philippine laws, statutes an..."
5,https://www.chanrobles.com/bataspambansabilang...,"PHILIPPINE LAWS, STATUTES AND CODES - CHAN ROB...","A collection of Philippine laws, statutes and ..."
6,https://www.chanrobles.com/guidelinesgoverning...,GUIDELINES GOVERNING LABOR RELATIONS - CHAN RO...,This web page features the full text of the\nG...
7,https://www.chanrobles.com/guidelinesonpolicec...,GUIDELINES FOR THE CONDUCT OF INP/AFP PERSONNE...,This web page features the full text of the\nG...
8,https://www.chanrobles.com/amendedrulesonemplo...,AMENDED RULES ON EMPLOYEES' COMPENSATION - CHA...,This web page features the full text of the\nA...
9,https://www.chanrobles.com/doledepartmentorder...,"DOLE DEPARTMENT ORDER NO. 9, SERIES OF 1997 (A...",This web page features the full text of\nDepar...


## 5th Pass

https://www.chanrobles.com/republicactno7881.htm

header:
html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr > td > div:first-of-type > div > table > tbody > tr:first-of-type > td > div:nth-of-type(2) > center > center > table > tbody > tr > td > strong:nth-of-type(5), html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr > td > div:first-of-type > div > table > tbody > tr:first-of-type > td > div:nth-of-type(2) > center > center > table > tbody > tr > td > strong:nth-of-type(6)

text content:
html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr > td > div:first-of-type > div > table > tbody > tr:first-of-type > td > div:nth-of-type(2) > center > center > table > tbody > tr > td > div:first-of-type

## To do list

problem 5: sometimes headers will be separated into two separate html
elements. E.g. an element with text content "republic act no. 1239" 
and then followed by another sibling element that contains "an act implementing
..."

to solve this we must select the two elements using perhaps their css selectors
and because we are selecting two elements when this translates to selenium we
will have to return more than 1 element therefore <selenium web element>.find_element()
which returns only 1 element would not be feasible to use instead 
<selenium web element>.find_elements() would be much more suitable since it returns more
than two elements

problem 6: redefine the collect_content() function and find multiple headers instead of
one and then append these multiple headers together
cases: 
case 1: returns [header1, header2]
case 2: returns [header1]
case 3: returns [header1, header2, header3]
case 4: raises StaleElementReferenceException

problem 7: data frame will be link_text, link_href, appended headers or different headers
because in each page there may be an html element or multiple html element that hold the
header/s text e.g. page 1 has a header that is contained in a single html element, page 2
has headers contained in multiple html element

In [77]:
# otherwise if the callback does not return a true value and time
# period is up, then WebDriverWait raises a timeout error
try:
    driver.get('https://www.chanrobles.com/republicactno7881.htm')
    wait_val = WebDriverWait(driver, timeout=10).until(lambda driver: driver.execute_script('return document.readyState === "complete"'))
    print("Wait value: {}\nPage title: {}\n".format(wait_val, driver.title))
    
    # grab every element by this path on every page if it exists
    headers = driver.find_elements(By.CSS_SELECTOR, "html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr > td > div:first-of-type > div > table > tbody > tr:first-of-type > td > div:nth-of-type(2) > center > center > table > tbody > tr > td > strong:nth-of-type(5), html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr > td > div:first-of-type > div > table > tbody > tr:first-of-type > td > div:nth-of-type(2) > center > center > table > tbody > tr > td > strong:nth-of-type(6), input[type='password']")
    text_content = driver.find_element(By.CSS_SELECTOR, "html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr > td > div:first-of-type > div > table > tbody > tr:first-of-type > td > div:nth-of-type(2) > center > center > table > tbody > tr > td > div:first-of-type")
    
    print("Headers: {}\nText content{}\n\n".format(headers, text_content.text))
    
except TimeoutError as error:
    print("Error {} has occured".format(error))
    driver.quit() # or .close()

except NoSuchElementException as error:
    print("Error {} has occured".format(error))

finally:
    print("finished")

Wait value: True
Page title: PHILIPPINE LAWS, STATUTES AND CODES - CHAN ROBLES VIRTUAL LAW LIBRARY

Headers: [<selenium.webdriver.remote.webelement.WebElement (session="ecc5c54d9c401df58dfec8de0c8d8ecf", element="29215d4c-8feb-4b12-80a1-c012c094150f")>, <selenium.webdriver.remote.webelement.WebElement (session="ecc5c54d9c401df58dfec8de0c8d8ecf", element="f8fc212e-e589-491d-b74b-eee83f7fba9d")>]
Text content  AN ACT AMENDING CERTAIN PROVISIONS OF REPUBLIC ACT NO. 6657, ENTITLED "AN ACT INSTITUTING A COMPREHENSIVE AGRARIAN REFORM PROGRAM TO PROMOTE SOCIAL JUSTICE AND INDUSTRIALIZATION, PROVIDING THE MECHANISM FOR ITS IMPLEMENTATION, AND FOR OTHER PURPOSES"

SECTION 1. Section 3, Paragraph (b) of Republic Act No. 6657 is hereby amended to read as follows:chanroblesvirtualawlibrary

"Sec. 3. Definitions. — For the purpose of this Act, unless the context indicates otherwise:
chan robles virtual law library
"(b) Agriculture, Agricultural Enterprise or Agricultural Activity means the cultivat

I guess the only thing I don't understand about the <selenium web element>.find_elements() method is that when a css selector like: p div, is used does it retrieve all the div elements, as well as all the p elements in the page? For instance take the selector below:

html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr > td > div:first-of-type > div > table > tbody > tr:first-of-type > td > div:nth-of-type(2) > center > center > table > tbody > tr > td > strong:nth-of-type(5), html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr > td > div:first-of-type > div > table > tbody > tr:first-of-type > td > div:nth-of-type(2) > center > center > table > tbody > tr > td > strong:nth-of-type(6)

what if there are multiple html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr > td > div:first-of-type > div > table > tbody > tr:first-of-type > td > div:nth-of-type(2) > center > center > table > tbody > tr > td > strong:nth-of-type(5) and multiple html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr > td > div:first-of-type > div > table > tbody > tr:first-of-type > td > div:nth-of-type(2) > center > center > table > tbody > tr > td > strong:nth-of-type(6) html elements with these selectors? or what if one of these kinds of selectors does not exist in any of the html elements in the page?

In [78]:
# otherwise if the callback does not return a true value and time
# period is up, then WebDriverWait raises a timeout error
try:
    driver.get('https://www.chanrobles.com/republicactno7881.htm')
    wait_val = WebDriverWait(driver, timeout=10).until(lambda driver: driver.execute_script('return document.readyState === "complete"'))
    print("Wait value: {}\nPage title: {}\n".format(wait_val, driver.title))
    
    # grab every element by this path on every page if it exists
    first_headers = driver.find_elements(By.CSS_SELECTOR, "html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr > td > div:first-of-type > div > table > tbody > tr:first-of-type > td > div:nth-of-type(2) > center > center > table > tbody > tr > td > strong:nth-of-type(5), html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr > td > div:first-of-type > div > table > tbody > tr:first-of-type > td > div:nth-of-type(2) > center > center > table > tbody > tr > td > strong:nth-of-type(6), input[type='password']")
    second_headers = driver.find_elements(By.CSS_SELECTOR, "input[type='password']")
    
    print("First headers: {}\nSecond headers: {}\nText content{}\n\n".format(first_headers, second_headers, text_content.text))
    
except TimeoutError as error:
    print("Error {} has occured".format(error))
    driver.quit() # or .close()

except (NoSuchElementException, StaleElementReferenceException) as error:
    print("Error {} has occured".format(error))

finally:
    print("finished")

Wait value: True
Page title: PHILIPPINE LAWS, STATUTES AND CODES - CHAN ROBLES VIRTUAL LAW LIBRARY

Error Message: stale element reference: element is not attached to the page document
  (Session info: chrome=107.0.5304.88)
Stacktrace:
Backtrace:
	Ordinal0 [0x00661ED3+2236115]
	Ordinal0 [0x005F92F1+1807089]
	Ordinal0 [0x005066FD+812797]
	Ordinal0 [0x005092B4+823988]
	Ordinal0 [0x00509165+823653]
	Ordinal0 [0x00509400+824320]
	Ordinal0 [0x00530FD7+987095]
	Ordinal0 [0x00551A8C+1120908]
	Ordinal0 [0x0052BD84+966020]
	Ordinal0 [0x00551CA4+1121444]
	Ordinal0 [0x005659E2+1202658]
	Ordinal0 [0x005518A6+1120422]
	Ordinal0 [0x0052A73D+960317]
	Ordinal0 [0x0052B71F+964383]
	GetHandleVerifier [0x0090E7E2+2743074]
	GetHandleVerifier [0x009008D4+2685972]
	GetHandleVerifier [0x006F2BAA+532202]
	GetHandleVerifier [0x006F1990+527568]
	Ordinal0 [0x0060080C+1837068]
	Ordinal0 [0x00604CD8+1854680]
	Ordinal0 [0x00604DC5+1854917]
	Ordinal0 [0x0060ED64+1895780]
	BaseThreadInitThunk [0x753D6359+25]
	RtlGetA

So in the test above even including a css selector that does not exist in a series of css selectors that do exist the method returns only the html elements with in fact existing css selectors. With css selectors that do not exist that is passed in the <selenium web element>.find_elements() method the difference to that of the <selenium web element>.find_element() method is if no element with the specified selector/s exists at all then it raises the NoSuchElementException error while the former returns a StaleElementReferenceException error

existing -> list[WebElement]
existing, non-existing -> list[WebElement]
non-existing -> StaleElementReferenceException

The difference between stale element exception and no such element exception is that if you use the stale element exception, you know that the element is not found in the DOM, but it might appear again if you navigate to a different page.

# Redefined collect_content()

redefine the collect_content() function such that it can take in a selector and output one or more html elements that do in fact contain the header/s. E.g. at page 1 selector is passed in but outputs a list of only one element since the html element with that selector is only one, and so append this to an empty string once all elements of the list are exhausted, but since there is only one then only one string of the single elmeent is appended to an empty string. If it was the case that

![title](./find_elements%20instead%20of%20find_element.png)

In [79]:
# 5th pass code
reject_links = ['https://www.chanrobles.com/implementingrulesofthelaborcode.htm', 'https://www.chanrobles.com/republicactno7881.htm', 'https://www.chanrobles.com/republicactno7305.htm', 'https://www.chanrobles.com/republicactno8759.htm', 'https://www.chanrobles.com/republicactno7641.htm', 'https://www.chanrobles.com/republicactno8558.htm', 'https://www.chanrobles.com/republicactno7610trafficofchildren.html', 'https://www.chanrobles.com/republicactno7877rules.htm', 'https://www.chanrobles.com/republicactno7192.htm', 'https://www.chanrobles.com/republicactno6725.htm', 'https://www.chanrobles.com/pslmcresolution032001.htm', 'https://www.chanrobles.com/republicactno7686.htm', 'https://www.chanrobles.com/republicactno7883.htm', 'https://www.chanrobles.com/republicactno6640.htm', 'https://www.chanrobles.com/republicactno6654.htm', 'https://www.chanrobles.com/republicactno6758.htm', 'https://www.chanrobles.com/republicactno6938.htm']

page_details = pd.concat([page_details.copy(), test.collect_content(
    driver, 
    reject_links,
    "html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr > td > div:first-of-type > div > table > tbody > tr:first-of-type > td > div:nth-of-type(2) > center > center > table > tbody > tr > td > strong:nth-of-type(5), html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr > td > div:first-of-type > div > table > tbody > tr:first-of-type > td > div:nth-of-type(2) > center > center > table > tbody > tr > td > strong:nth-of-type(6)",
    "html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr > td > div:first-of-type > div > table > tbody > tr:first-of-type > td > div:nth-of-type(2) > center > center > table > tbody > tr > td > div:first-of-type",
    threshold=800
)], ignore_index=True)

Wait value: True
Page title: OMNIBUS RULES TO IMPLEMENT THE LABOR CODE OF THE PHILIPPINES - CHAN ROBLES VIRTUAL LAW LIBRARY

[92m Header: 
[00m
Error Message: no such element: Unable to locate element: {"method":"css selector","selector":"html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr > td > div:first-of-type > div > table > tbody > tr:first-of-type > td > div:nth-of-type(2) > center > center > table > tbody > tr > td > div:first-of-type"}
  (Session info: chrome=107.0.5304.88)
Stacktrace:
Backtrace:
	Ordinal0 [0x00661ED3+2236115]
	Ordinal0 [0x005F92F1+1807089]
	Ordinal0 [0x005066FD+812797]
	Ordinal0 [0x005355DF+1005023]
	Ordinal0 [0x005357CB+1005515]
	Ordinal0 [0x00567632+1209906]
	Ordinal0 [0x00551AD4+1120980]
	Ordinal0 [0x005659E2+1202658]
	Ordinal0 [0x005518A6+1120422]
	Ordinal0 [0x0052A73D+960317]
	Ordinal0 [0x0052B71F+964383]
	GetHandleVerifier [0x0090E7E2+2743074]
	GetHandleVerifier [0x009008D4+2685972]
	GetHandleVeri

In [80]:
page_details

Unnamed: 0,page_link,page_header,page_text_content
0,https://www.chanrobles.com/legal4circular.htm#...,LABOR CIRCULAR ON-LINE - CHAN ROBLES AND ASSOC...,\n\n\nON-LINE\n\n\nThis web page features the...
1,https://www.chanrobles.com/presidentialdecreen...,AMENDMENTS TO THE LABOR CODE (PRESIDENTIAL DEC...,PHILIPPINE LABOR CIRCULAR\nON-LINE\n\n\n\nThis...
2,https://www.chanrobles.com/bataspambansabilang...,"PHILIPPINE LAWS, STATUTES AND CODES - CHAN ROB...","A collection of Philippine laws, statutes and ..."
3,https://www.chanrobles.com/bataspambansabilang...,"PHILIPPINE LAWS, STATUTES AND CODES - CHAN ROB...","A collection of Philippine laws, statutes an..."
4,https://www.chanrobles.com/bataspambansabilang...,"PHILIPPINE LAWS, STATUTES AND CODES - CHAN ROB...","A collection of Philippine laws, statutes an..."
5,https://www.chanrobles.com/bataspambansabilang...,"PHILIPPINE LAWS, STATUTES AND CODES - CHAN ROB...","A collection of Philippine laws, statutes and ..."
6,https://www.chanrobles.com/guidelinesgoverning...,GUIDELINES GOVERNING LABOR RELATIONS - CHAN RO...,This web page features the full text of the\nG...
7,https://www.chanrobles.com/guidelinesonpolicec...,GUIDELINES FOR THE CONDUCT OF INP/AFP PERSONNE...,This web page features the full text of the\nG...
8,https://www.chanrobles.com/amendedrulesonemplo...,AMENDED RULES ON EMPLOYEES' COMPENSATION - CHA...,This web page features the full text of the\nA...
9,https://www.chanrobles.com/doledepartmentorder...,"DOLE DEPARTMENT ORDER NO. 9, SERIES OF 1997 (A...",This web page features the full text of\nDepar...


# 6th pass

https://www.chanrobles.com/republicactno7305.htm#.Y1u0_HZByUk

header:
html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr:first-of-type > td > div:first-of-type > div > table > tbody > tr > td > div:nth-of-type(2) > center:nth-of-type(6) > strong, html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr:first-of-type > td > div:first-of-type > div > table > tbody > tr > td > div:nth-of-type(2) > center:nth-of-type(6) > p

text content:
html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr:first-of-type > td > div:first-of-type > div > table > tbody > tr > td > div:nth-of-type(2)

In [81]:
# 6th pass code
reject_links = ['https://www.chanrobles.com/implementingrulesofthelaborcode.htm', 'https://www.chanrobles.com/republicactno7305.htm', 'https://www.chanrobles.com/republicactno8759.htm', 'https://www.chanrobles.com/republicactno7641.htm', 'https://www.chanrobles.com/republicactno8558.htm', 'https://www.chanrobles.com/republicactno7610trafficofchildren.html', 'https://www.chanrobles.com/republicactno7877rules.htm', 'https://www.chanrobles.com/republicactno7192.htm', 'https://www.chanrobles.com/pslmcresolution032001.htm', 'https://www.chanrobles.com/republicactno7686.htm', 'https://www.chanrobles.com/republicactno7883.htm', 'https://www.chanrobles.com/republicactno6640.htm', 'https://www.chanrobles.com/republicactno6654.htm', 'https://www.chanrobles.com/republicactno6758.htm', 'https://www.chanrobles.com/republicactno6938.htm']

page_details = pd.concat([page_details.copy(), test.collect_content(
    driver, 
    reject_links,
    "html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr:first-of-type > td > div:first-of-type > div > table > tbody > tr > td > div:nth-of-type(2) > center:nth-of-type(6) > strong, html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr:first-of-type > td > div:first-of-type > div > table > tbody > tr > td > div:nth-of-type(2) > center:nth-of-type(6) > p",
    "html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr:first-of-type > td > div:first-of-type > div > table > tbody > tr > td > div:nth-of-type(2)",
    threshold=800
)], ignore_index=True)

Wait value: True
Page title: OMNIBUS RULES TO IMPLEMENT THE LABOR CODE OF THE PHILIPPINES - CHAN ROBLES VIRTUAL LAW LIBRARY

[92m Header: 
[00m
Error Message: no such element: Unable to locate element: {"method":"css selector","selector":"html > body > div:first-of-type > div:nth-of-type(2) > div > div:nth-of-type(5) > div > table > tbody > tr:first-of-type > td > div:first-of-type > div > table > tbody > tr > td > div:nth-of-type(2)"}
  (Session info: chrome=107.0.5304.88)
Stacktrace:
Backtrace:
	Ordinal0 [0x00661ED3+2236115]
	Ordinal0 [0x005F92F1+1807089]
	Ordinal0 [0x005066FD+812797]
	Ordinal0 [0x005355DF+1005023]
	Ordinal0 [0x005357CB+1005515]
	Ordinal0 [0x00567632+1209906]
	Ordinal0 [0x00551AD4+1120980]
	Ordinal0 [0x005659E2+1202658]
	Ordinal0 [0x005518A6+1120422]
	Ordinal0 [0x0052A73D+960317]
	Ordinal0 [0x0052B71F+964383]
	GetHandleVerifier [0x0090E7E2+2743074]
	GetHandleVerifier [0x009008D4+2685972]
	GetHandleVerifier [0x006F2BAA+532202]
	GetHandleVerifier [0x006F1990+527568]


In [82]:
page_details

Unnamed: 0,page_link,page_header,page_text_content
0,https://www.chanrobles.com/legal4circular.htm#...,LABOR CIRCULAR ON-LINE - CHAN ROBLES AND ASSOC...,\n\n\nON-LINE\n\n\nThis web page features the...
1,https://www.chanrobles.com/presidentialdecreen...,AMENDMENTS TO THE LABOR CODE (PRESIDENTIAL DEC...,PHILIPPINE LABOR CIRCULAR\nON-LINE\n\n\n\nThis...
2,https://www.chanrobles.com/bataspambansabilang...,"PHILIPPINE LAWS, STATUTES AND CODES - CHAN ROB...","A collection of Philippine laws, statutes and ..."
3,https://www.chanrobles.com/bataspambansabilang...,"PHILIPPINE LAWS, STATUTES AND CODES - CHAN ROB...","A collection of Philippine laws, statutes an..."
4,https://www.chanrobles.com/bataspambansabilang...,"PHILIPPINE LAWS, STATUTES AND CODES - CHAN ROB...","A collection of Philippine laws, statutes an..."
...,...,...,...
63,https://www.chanrobles.com/republicactno7686.htm,Republic Act No. 7686,Sponsored by: The ChanRobles Group\n\n\n\n\...
64,https://www.chanrobles.com/republicactno7883.htm,REPUBLIC ACT No. 7883,Sponsored by: The ChanRobles Group\n\n\n\n\n\...
65,https://www.chanrobles.com/republicactno6654.htm,,Sponsored by: The ChanRobles Group\n\nA colle...
66,https://www.chanrobles.com/republicactno6758.htm,,Sponsored by: The ChanRobles Group\n\n\n\nA c...


# final cleanups

Note that https://www.chanrobles.com/implementingrulesofthelaborcode.htm is another link that contains sublinks in of itself unfortunately so extract links and remove this link in the link details 

In [83]:
reject_links = [
    'https://www.chanrobles.com/implementingrulesofthelaborcode.htm', 
    'https://www.chanrobles.com/republicactno6640.htm'
]

In [84]:
reject_link = 'https://www.chanrobles.com/republicactno6640.htm'

header = 'Republic Act No. 6640 AN ACT PROVIDING FOR AN INCREASE IN THE WAGE OF PUBLIC OR GOVERNMENT SECTOR EMPLOYEES ON A DAILY WAGE BASIS AND IN THE STATUTORY MINIMUM WAGE AND SALARY RATES OF EMPLOYEES AND WORKERS IN THE PRIVATE SECTOR AND FOR OTHER PURPOSES'

text_content = test.collect_content_individually_bs("""<div class="Section1">
<center><strong><font face="Bookman Old Style"><font color="#8b7b45"><font size="+1">AN
ACT PROVIDING FOR AN INCREASE IN THE WAGE OF PUBLIC OR GOVERNMENT
SECTOR
EMPLOYEES ON A DAILY WAGE BASIS AND IN THE STATUTORY MINIMUM WAGE AND
SALARY
RATES OF EMPLOYEES AND WORKERS IN THE PRIVATE SECTOR AND FOR OTHER
PURPOSES.</font></font></font></strong></center>
<br><br><div style="text-align: justify;" class="MsoNormal">
<p><strong><font face="Bookman Old Style"><font color="#524744"><font size="+1">Section
1.&nbsp;All workers and employees
in the public or government sector on a daily wage basis shall receive
an increase in the sum of ten pesos (P10.00) a day.
</font></font></font></strong><font face="Arial Narrow"><span style="color: #dadad2; font-size: 1pt;">chan
robles virtual law library</span></font></p>
</div><br>
<div style="text-align: justify;" class="MsoNormal"><strong><font face="Bookman Old Style"><font color="#524744"><font size="+1">The
Department of Budget and Management shall promulgate such rules and
regulations
to carry out the salary adjustment under this section.</font></font></font></strong></div>
<p style="text-align: justify;" class="MsoNormal"><strong><font face="Bookman Old Style"><font color="#524744"><font size="+1">Sec.
2.&nbsp;The statutory minimum wage
rates of workers and employees in the private sector, whether
agricultural
or non-agricultural, shall be increased by ten pesos (P10.00) per day,
except non-agricultural workers and employees outside Metro Manila who
shall receive an increase of eleven pesos (P11.00) per day: Provided,
That
those already receiving above the minimum wage up to one hundred pesos
(P100.00) shall receive an increase of ten pesos (P10.00) per day.
Excepted
from the provisions of this Act are domestic helpers and persons
employed
in the personal service of another.&nbsp;</font></font></font></strong><font face="Arial Narrow"><span style="color: #dadad2; font-size: 1pt;">chan
robles virtual law library</span></font>
</p>
<p style="text-align: justify;" class="MsoNormal"><strong><font face="Bookman Old Style"><font color="#524744"><font size="+1">Sec.&nbsp;
3.&nbsp;Where the application of the
minimum wage increase prescribed under Section 2 results in distortions
in the wage structure within an establishment which gives rise to a
dispute
therein, such dispute shall first be settled voluntarily between the
parties
and in the event of a deadlock, such dispute shall be finally resolved
through compulsory arbitration by the National Labor Relations
Commission's
arbitration branch having jurisdiction over the workplace.<span style="color: #dadad2; font-size: 1pt;">cralaw</span>&nbsp;</font></font></font></strong>
</p>
<p style="text-align: justify;" class="MsoNormal"><strong><font face="Bookman Old Style"><font color="#524744"><font size="+1">It
shall be mandatory for the NLRC to conduct continuous hearings and
decide
any dispute arising under this section within thirty (30) days from the
time said dispute is formally submitted to it for arbitration. The
pendency
of a dispute arising from a wage distortion shall not in any way delay
the applicability of the wage increase covered by this Act.&nbsp;</font></font></font></strong><font face="Arial Narrow"><span style="color: #dadad2; font-size: 1pt;">chan
robles virtual law library</span></font>
</p>
<p style="text-align: justify;" class="MsoNormal"><strong><font face="Bookman Old Style"><font color="#524744"><font size="+1">For
the purpose of this Act, wage distortion shall mean a situation where a
legislated increase in minimum wages results in the elimination or
severe
contraction on intentional quantitative differences in wage or salary
rates
between and among employee groups in an establishment as to effectively
obliterate the distinctions embodied in such wage structure based on
skills,
length of service, or other logical basis of differentiation.<span style=""></span></font></font></font></strong><font face="Arial Narrow"><span style="color: #dadad2; font-size: 1pt;">chan
robles virtual law library</span></font>
</p>
<p style="text-align: justify;" class="MsoNormal"><strong><font face="Bookman Old Style"><font color="#524744"><font size="+1">Sec.&nbsp;
4.&nbsp;All workers paid by result,
including those who are paid on piecework, takay, pakyaw or task basis,
shall receive not less than the applicable statutory minimum wage
prescribed
herein per eight (8) hours work a day, or a proportion thereof for
working
less than eight (8) hours.<span style="color: #dadad2; font-size: 1pt;">cralaw</span>&nbsp;</font></font></font></strong>
</p>
<p style="text-align: justify;" class="MsoNormal"><strong><font face="Bookman Old Style"><font color="#524744"><font size="+1">Sec.&nbsp;
5.&nbsp;Learners, apprentices and
handicapped workers shall be entitled to not less than seventy-five
percent
(75%) of the applicable adjusted minimum wage. A handicapped worker is
one whose efficiency or quality of work is impaired by his disability
in
relation to the work performed.<span style="color: #dadad2; font-size: 1pt;">cralaw</span>&nbsp;</font></font></font></strong>
</p>
<p style="text-align: justify;" class="MsoNormal"><strong><font face="Bookman Old Style"><font color="#524744"><font size="+1">All
recognized learnership and apprenticeship agreements entered into
before
the effective date of this Act shall be considered as automatically
modified
insofar as their wage clauses are concerned to reflect the increase
provided
in this Act.<span style="color: #dadad2; font-size: 1pt;">cralaw</span>&nbsp;</font></font></font></strong>
</p>
<p style="text-align: justify;" class="MsoNormal"><strong><font face="Bookman Old Style"><font color="#524744"><font size="+1">Sec.&nbsp;
6.&nbsp;In this case of contracts
for construction projects and for security, janitorial and similar
services,
the increase in the minimum wage of the workers shall be borne by the
employers
of the construction workers, security guards, janitors, and others
similarly
situated: Provided, however, That the principal or client of the
construction
and service contractor shall be subsidiarily liable: Provided, further,
That the subsidiary liability shall not apply to construction of family
homes worth not more than two hundred thousand pesos
(P200,000.00).<span style="color: #dadad2; font-size: 1pt;">cralaw</span>&nbsp;</font></font></font></strong>
</p>
<p style="text-align: justify;" class="MsoNormal"><strong><font face="Bookman Old Style"><font color="#524744"><font size="+1">Sec.&nbsp;
7.&nbsp;Nothing in this Act shall
be construed to reduce any existing allowances and benefits of any form
under existing laws, decrees, issuances, executive orders, and/or under
any contract or agreement between workers and employers.<span style="color: #dadad2; font-size: 1pt;">cralaw</span>&nbsp;</font></font></font></strong>
</p>
<p style="text-align: justify;" class="MsoNormal"><strong><font face="Bookman Old Style"><font color="#524744"><font size="+1">Sec.&nbsp;
8.&nbsp;The Department of Labor and
Employment may, upon application by an employer in the retail business
regularly employing not more than ten (10) workers, grant him exemption
from compliance with the provisions of this Act.<span style="color: #dadad2; font-size: 1pt;">cralaw</span>&nbsp;</font></font></font></strong>
</p>
<p style="text-align: justify;" class="MsoNormal"><strong><font face="Bookman Old Style"><font color="#524744"><font size="+1">Whenever
an application for exemption has been duly filed with the National
Wages
Council, action by the Regional Office of the Department on any
complaint
for alleged non-compliance with this Act shall be deferred pending
resolution
of the application for exemption by the Council.<span style="color: #dadad2; font-size: 1pt;">cralaw</span>&nbsp;</font></font></font></strong>
</p>
<p style="text-align: justify;" class="MsoNormal"><strong><font face="Bookman Old Style"><font color="#524744"><font size="+1">In
the event that applications for exemption are not granted, employees
shall
receive the appropriate compensation due them as provided for by this
law
plus interest of one percent (1%) per month retroactive to the
effectivity
of this law.&nbsp;</font></font></font></strong><font face="Arial Narrow"><span style="color: #dadad2; font-size: 1pt;">chan
robles virtual law library</span></font>
</p>
<p style="text-align: justify;" class="MsoNormal"><strong><font face="Bookman Old Style"><font color="#524744"><font size="+1">Sec.&nbsp;
9.&nbsp;The Secretary of Labor and
Employment shall promulgate the necessary rules and regulations to
implement
this Act.<span style="color: #dadad2; font-size: 1pt;">cralaw</span>&nbsp;</font></font></font></strong>
</p>
<p style="text-align: justify;" class="MsoNormal"><strong><font face="Bookman Old Style"><font color="#524744"><font size="+1">Sec.&nbsp;
10.Any person, corporation, trust,
firm, partnership, association or entity violating this Act shall be
punished
by a fine not exceeding twenty-five thousand pesos (P25,000.00) and/or
imprisonment for not less than one (1) year nor more than two (2)
years:
Provided, That any person convicted under this Act shall not be
entitled
to the benefits provided for under the Probation Law.<span style="color: #dadad2; font-size: 1pt;">cralaw</span>&nbsp;</font></font></font></strong>
</p>
<p style="text-align: justify;" class="MsoNormal"><strong><font face="Bookman Old Style"><font color="#524744"><font size="+1">If
the violation is committed by a corporation, trust or firm,
partnership,
association or any other entity, the penalty of imprisonment shall be
imposed
upon the entity's responsible officers, including, but not limited to,
the president, vice-president, chief executive officer, general
manager,
managing director or partner.</font></font></font></strong><font face="Arial Narrow"><span style="color: #dadad2; font-size: 1pt;">chan
robles virtual law library</span></font>
</p>
<p style="text-align: justify;" class="MsoNormal"><strong><font face="Bookman Old Style"><font color="#524744"><font size="+1">Sec.&nbsp;
11.&nbsp;The wage increase under Section
1 hereof shall be funded from the Compensation and Organizational
Adjustment
Fund, the Contingent Fund, and other savings under Executive Order No.
87, otherwise known as the General Appropriations Act for Fiscal Year
1987,
or from any unappropriated funds of the National Treasury. Any
deficiency
shall be charged against the personal services savings of the National
Government: Provided, That the corresponding amount for the increase
therein
will be included in the annual General Appropriations Act for the
succeeding
years.<span style="color: #dadad2; font-size: 1pt;">cralaw</span>&nbsp;</font></font></font></strong>
</p>
<p style="text-align: justify;" class="MsoNormal"><strong><font face="Bookman Old Style"><font color="#524744"><font size="+1">Sec.&nbsp;
12.&nbsp;All laws, orders, issuances,
rules and regulations or part thereof inconsistent with the provisions
of this Act are hereby repealed or amended accordingly. If any
provision
or part of this Act, or the application thereof to any person or
circumstance,
as held invalid or unconstitutional, the remainder of this Act or the
application
of such provision or part thereof to other persons or circumstances,
shall
not be affected thereby.&nbsp;</font></font></font></strong><font face="Arial Narrow"><span style="color: #dadad2; font-size: 1pt;">chan
robles virtual law library</span></font>
</p>
<p style="text-align: justify;" class="MsoNormal"><strong><font face="Bookman Old Style"><font color="#524744"><font size="+1">Sec.&nbsp;
13.&nbsp;This Act shall take effect
the day following its publication in two (2) national newspapers of
general
circulation.&nbsp;</font></font></font></strong>
<br>
&nbsp;
<br>
&nbsp;
</p>
<div style="text-align: right;"><strong><font face="Bookman Old Style"><font color="#524744"><font size="+1"><em>Approved:</em>
December 10, 1987</font></font></font></strong></div>
</div>""")



  return BeautifulSoup(content).get_text()


In [85]:
page_details = pd.concat([page_details.copy(), pd.DataFrame({'page_link': reject_link, 'page_header': header, 'page_text_content': text_content}, index=[0])], ignore_index=True)

In [86]:
page_details

Unnamed: 0,page_link,page_header,page_text_content
0,https://www.chanrobles.com/legal4circular.htm#...,LABOR CIRCULAR ON-LINE - CHAN ROBLES AND ASSOC...,\n\n\nON-LINE\n\n\nThis web page features the...
1,https://www.chanrobles.com/presidentialdecreen...,AMENDMENTS TO THE LABOR CODE (PRESIDENTIAL DEC...,PHILIPPINE LABOR CIRCULAR\nON-LINE\n\n\n\nThis...
2,https://www.chanrobles.com/bataspambansabilang...,"PHILIPPINE LAWS, STATUTES AND CODES - CHAN ROB...","A collection of Philippine laws, statutes and ..."
3,https://www.chanrobles.com/bataspambansabilang...,"PHILIPPINE LAWS, STATUTES AND CODES - CHAN ROB...","A collection of Philippine laws, statutes an..."
4,https://www.chanrobles.com/bataspambansabilang...,"PHILIPPINE LAWS, STATUTES AND CODES - CHAN ROB...","A collection of Philippine laws, statutes an..."
...,...,...,...
64,https://www.chanrobles.com/republicactno7883.htm,REPUBLIC ACT No. 7883,Sponsored by: The ChanRobles Group\n\n\n\n\n\...
65,https://www.chanrobles.com/republicactno6654.htm,,Sponsored by: The ChanRobles Group\n\nA colle...
66,https://www.chanrobles.com/republicactno6758.htm,,Sponsored by: The ChanRobles Group\n\n\n\nA c...
67,https://www.chanrobles.com/republicactno6938.htm,,Sponsored by: The ChanRobles Group\n\n\n\nA...


# Export DataFrame to excel

In [87]:
page_details.to_csv('Major Philippine Labor Law Resources page details data.csv')

In [88]:
[(index, len(page_text_content)) for index, page_text_content in enumerate(page_details['page_text_content'])]

[(0, 3750),
 (1, 45398),
 (2, 9878),
 (3, 24779),
 (4, 11727),
 (5, 3278),
 (6, 12398),
 (7, 8498),
 (8, 66357),
 (9, 108721),
 (10, 24518),
 (11, 79283),
 (12, 64133),
 (13, 18895),
 (14, 71831),
 (15, 33006),
 (16, 90173),
 (17, 3912),
 (18, 4164),
 (19, 8036),
 (20, 38975),
 (21, 48868),
 (22, 26348),
 (23, 17340),
 (24, 1949),
 (25, 3152),
 (26, 5253),
 (27, 68286),
 (28, 4097),
 (29, 69665),
 (30, 20018),
 (31, 33885),
 (32, 21361),
 (33, 35311),
 (34, 6714),
 (35, 2343),
 (36, 3959),
 (37, 3711),
 (38, 46303),
 (39, 2888),
 (40, 3983),
 (41, 974),
 (42, 17334),
 (43, 6944),
 (44, 3393),
 (45, 13080),
 (46, 23911),
 (47, 9919),
 (48, 8459),
 (49, 12425),
 (50, 2926),
 (51, 1553),
 (52, 18218),
 (53, 10428),
 (54, 2081),
 (55, 27195),
 (56, 9688),
 (57, 3040),
 (58, 3297),
 (59, 9976),
 (60, 41200),
 (61, 8952),
 (62, 4977),
 (63, 15634),
 (64, 7775),
 (65, 1057),
 (66, 27936),
 (67, 105133),
 (68, 7079)]

In [89]:
[(index, len(page_text_content)) for index, page_text_content in enumerate(page_details['page_text_content']) if len(page_text_content) <= 800]

[]

In [90]:
page_details['page_text_content'][41]

"AN ACT TO DECLARE MARCH EIGHT OF EVERY YEAR AS A WORKING SPECIAL HOLIDAY TO BE KNOWN AS NATIONAL WOMEN'S DAY.\n  \nSection 1. The eighth day of March of every year is hereby declared as a working special holiday to be known as National Women's Day.cralaw \nSec. 2. To ensure meaningful observance of the holiday as herein declared, all heads of government agencies and instrumentalities, including government-owned and controlled corporations as well as local government units, and employers in the private sector shall encourage and afford sufficient time and opportunities for their employees to engage and participate in any activity conducted within the premises of their respective offices or establishments to celebrate National Women's Day.chan robles virtual law library\nSec. 3. This Act shall take effect two (2) days following its publication in at least two (2) national newspapers of general circulation. chan robles virtual law library\n \n  Approved: April 10, 1990"

In [91]:
page_details['page_text_content'][65]

'Sponsored by:  The ChanRobles Group\n\nA collection of Philippine laws, statutes and codes\nnot included or cited in the main indices\nof the Chan Robles Virtual Law Library.\n\nThis page features the full text of\nRepublic Act No. 6654\nAN ACT REPEALING SECTION FORTY OF PRESIDENTIAL DECREE NUMBERED EIGHT HUNDRED SEVEN, OTHERWISE KNOWN AS THE CIVIL SERVICE DECREE.\n\nREPUBLIC ACT NO. 6654\n\n\nAN ACT REPEALING SECTION FORTY OF PRESIDENTIAL DECREE NUMBERED EIGHT HUNDRED SEVEN, OTHERWISE KNOWN AS THE CIVIL SERVICE DECREE.\n\nSection 1. Section Forty of Presidential Decree Numbered Eight Hundred Seven, otherwise known as the Civil Service Decree, is hereby repealed.chan robles virtual law library\n\n           \nSec. 2. This Act shall take effect upon its approval. chan robles virtual law library\n\n\n \n  Approval: May 20, 1988\n\n\n\n\nTHE CHAN ROBLES VIRTUAL LAW LIBRARY - QUICK GLANCE\n Philippines| Worldwide|The Business Page\n\nBack to Top  -  Back to Main Index   -  Back to Home\n\

these links are questionable and may not contain the text content we need