In [1]:
#HTML parser for Canadian Record of Parliamentary Debates (Senate)
#version: M. Wong, 2018-02-26

######################################################################################################################
# OVERVIEW
######################################################################################################################
#Notes:     (1) Python 3.6 Anaconda
#           (2) Encoding UTF-8
#           (3) Runs from parent directory of Data folder.
#               Run this file from the folder that contains the html data folder
#               downloaded from www.sencanada.ca [TO DO: Add and Link]

## Initialization

In [2]:
#Load Modules
import os
import re
import fnmatch
import pandas as pd
import numpy as np
import math
import time
import spacy
from datetime import datetime
from bs4 import BeautifulSoup
from spacy.lang.en import English

#Set pandas to display full column widths and full column numbers
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

#Spacy Parser
nlp = spacy.load('en')

In [3]:
#Load Authority File as pandas dataframe
authorityFile_df = pd.read_json("authorityFile/authorityFile_senate.json",orient="columns")
#authorityFile_df

#Get new name column in firstnamelastname format for parsing
authorityFile_df = authorityFile_df.sort_index()
#authorityFile_df

In [14]:
#Open csv containing header categories
headers = pd.read_csv("hansardExtractedSpeechesHeaders_senate.csv", delimiter=",")
display(headers)

columns_list = list(headers.columns.values)
print(columns_list)

Unnamed: 0,parliamentNumber,parliamentSession,orderOfBusinessRubric,subjectOfBusinessTitle,subjectOfBusinessID,subjectOfBusinessQualifier,speechId,interventionId,date,dateYMD,year,month,day,weekday,timeStamp,speakerName,party,parlInfoId,fullName,firstName,lastName,middleName,sex,age,daysInOffice,visibleMinority,indigenous,dateOfBirth,isEstimateDOB,birthProvince,birthCountry,firstDay,provOfRiding,parlInfoPage,affiliationType,affiliationDbId,floorLanguage,speech,speechFiltered,mentionedDocumentsTitle,mentionedDocumentsId,mentionedDocumentsType,mentionedEntityName,mentionedEntityId,mentionedEntityType,filename


['parliamentNumber', 'parliamentSession', 'orderOfBusinessRubric', 'subjectOfBusinessTitle', 'subjectOfBusinessID', 'subjectOfBusinessQualifier', 'speechId', 'interventionId', 'date', 'dateYMD', 'year', 'month', 'day', 'weekday', 'timeStamp', 'speakerName', 'party', 'parlInfoId', 'fullName', 'firstName', 'lastName', 'middleName', 'sex', 'age', 'daysInOffice', 'visibleMinority', 'indigenous', 'dateOfBirth', 'isEstimateDOB', 'birthProvince', 'birthCountry', 'firstDay', 'provOfRiding', 'parlInfoPage', 'affiliationType', 'affiliationDbId', 'floorLanguage', 'speech', 'speechFiltered', 'mentionedDocumentsTitle', 'mentionedDocumentsId', 'mentionedDocumentsType', 'mentionedEntityName', 'mentionedEntityId', 'mentionedEntityType', 'filename']


## Function Declarations

In [6]:
## These functions specifies wanted tags within other tags (ex. <a> within <h1> or <b> within <p>). Not children
## Returns two lists, one of ids and one of lines, denoted by the inside tag that reside in the outside tag (given attributes)
def tag_specifier(soup, outside_tag, inside_tag, class_attr_outside, id_attr_outside, style_attr_outside, class_attr_inside, id_attr_inside):
    return [a[0].get("id") for a in (td.find_all(inside_tag, class_=class_attr_inside, id=id_attr_inside) for td in soup.find_all(outside_tag, class_=class_attr_outside, id=id_attr_outside, style=style_attr_outside)) if a], [a[0] for a in (td.find_all(inside_tag, class_=class_attr_inside, id=id_attr_inside) for td in soup.find_all(outside_tag, class_=class_attr_outside, id=id_attr_outside, style=style_attr_outside)) if a]

def tag_specifier_simple(soup, outside_tag, inside_tag):
    return [a[0].get("id") for a in (td.find_all(inside_tag) for td in soup.find_all(outside_tag)) if a], [a[0] for a in (td.find_all(inside_tag) for td in soup.find_all(outside_tag)) if a]

def tag_specifier_textonly(soup, outside_tag, inside_tag, class_attr_outside, id_attr_outside, style_attr_outside, class_attr_inside, id_attr_inside):
    return [a[0].get("id") for a in (td.find_all(inside_tag, class_=class_attr_inside, id=id_attr_inside) for td in soup.find_all(outside_tag, class_=class_attr_outside, id=id_attr_outside, style=style_attr_outside)) if a], [a[0].get_text() for a in (td.find_all(inside_tag, class_=class_attr_inside, id=id_attr_inside) for td in soup.find_all(outside_tag, class_=class_attr_outside, id=id_attr_outside, style=style_attr_outside)) if a]



def authorityFile_get_ID(firstname, lastname, column):
    index = authorityFile_df.loc[(authorityFile_df["firstName"] == firstname) & (authorityFile_df["lastName"] == lastname)].index[0]
    value = authorityFile_df[column][index]
    if value == "" or value == None:
        return "NA"
    else:
        return value

def authorityFile_get(speakerName, name_id_dict):
    
    ## Replace some names with authorityFule matches manually
    if "Dan Hays" in speakerName:
        speakerName = speakerName.replace("Dan Hays","Daniel Hays")
    if "Jack Wiebe" in speakerName:
        speakerName = speakerName.replace("Jack Wiebe","John Wiebe")
    if ("Francis William Mahovlich" in speakerName) or ("Francis WilliamMahovlich" in speakerName):
        speakerName = speakerName.replace("Francis William Mahovlich","Frank W. Mahovlich")
        speakerName = speakerName.replace("Francis WilliamMahovlich","Frank W. Mahovlich")
    if "Jim Tunney" in speakerName:
        speakerName = speakerName.replace("Jim Tunney","James Tunney")
    if "Ross Fitzpatrick" in speakerName:
        speakerName = speakerName.replace("Ross Fitzpatrick","D. Ross Fitzpatrick")
    if ("Marie-P. Poulin" in speakerName) or ("Marie-Paule Poulin" in speakerName):
        speakerName = speakerName.replace("Marie-P. Poulin","Marie-P. Charette-Poulin")
        speakerName = speakerName.replace("Marie-Paule Poulin","Marie-P. Charette-Poulin")
    if "Raymond C. Setlawke" in speakerName:
        speakerName = speakerName.replace("Raymond C. Setlawke","Raymond C. Setlakwe")
    if "E. Leo Kolber" in speakerName:
        speakerName = speakerName.replace("E. Leo Kolber","Ernest Leo Kolber")
    if "J. Trevor Eyton" in speakerName:
        speakerName = speakerName.replace("J. Trevor Eyton","John Trevor Eyton")
    if "William Rompkey" in speakerName:
        speakerName = speakerName.replace("William Rompkey","Bill Rompkey")
    if "Jean Robert Gauthier" in speakerName:
        speakerName = speakerName.replace("Jean Robert Gauthier","Jean-Robert Gauthier")
    if "Thomas J. McInnis" in speakerName:
        speakerName = speakerName.replace("Thomas J. McInnis","Tom McInnis")
    if "Consiglio De Nino" in speakerName:
        speakerName = speakerName.replace("Consiglio De Nino","Consiglio Di Nino")
    if "Laurier L. Lapierre" in speakerName:
        speakerName = speakerName.replace("Laurier L. Lapierre","Laurier L. LaPierre")
    if "Sabi Marwah" in speakerName:
        speakerName = speakerName.replace("Sabi Marwah","Sarabjit, Marwah")
    
    for lastname in authorityFile_df["lastName"]:
        if lastname in speakerName:
            #print(lastname)
            for firstname in authorityFile_df.loc[authorityFile_df["lastName"] == lastname]["firstName"]:
                if firstname in speakerName:
                    #print(firstname, lastname)
                    ## Get relevant info from authorityFile entry given name
                    
                    parlInfoId = int(authorityFile_get_ID(firstname, lastname, "parlInfoId"))
                    fullName = authorityFile_df[authorityFile_df["parlInfoId"] == parlInfoId]["fullName"].values[0]
                    firstName = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["firstName"].values[0]
                    lastName = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["lastName"].values[0]
                    middleName = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["middleName"].values[0]
                    sex = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["sex"].values[0]
                    dateOfBirth = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["dateOfBirth"].values[0]    
                    isEstimateDOB = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["isEstimateDOB"].values[0]
                    visibleMinority = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["visibleMinority"].values[0]
                    indigenous = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["indigenous"].values[0]
                    birthProvince = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["birthProvince"].values[0]
                    birthCountry = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["birthCountry"].values[0]
                    firstDay = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["firstDay"].values[0]
                    provOfRiding = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["provOfRiding"].values[0]
                    parlInfoPage = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["parlInfoPage"].values[0]
                    daysInOffice = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["daysInOffice"].values[0]

                    name_id_dict[lastName] = parlInfoId
                    
                    return (speakerName, parlInfoId, fullName, firstName, lastName, middleName, sex, dateOfBirth, isEstimateDOB, 
                            visibleMinority, indigenous, birthProvince, birthCountry, firstDay, provOfRiding, 
                            parlInfoPage, daysInOffice, name_id_dict)
            
                
            """for middlename in authorityFile_df.loc[authorityFile_df["lastName"] == lastname]["middleName"]:
                if middlename in speakerName:
                    #print(firstname, lastname)
                    ## Get relevant info from authorityFile entry given name

                    parlInfoId = int(authorityFile_get_ID(firstname, lastname, "parlInfoId"))
                    fullName = authorityFile_df[authorityFile_df["parlInfoId"] == parlInfoId]["fullName"].values[0]
                    firstName = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["firstName"].values[0]
                    lastName = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["lastName"].values[0]
                    middleName = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["middleName"].values[0]
                    sex = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["sex"].values[0]
                    dateOfBirth = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["dateOfBirth"].values[0]    
                    isEstimateDOB = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["isEstimateDOB"].values[0]
                    visibleMinority = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["visibleMinority"].values[0]
                    indigenous = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["indigenous"].values[0]
                    birthProvince = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["birthProvince"].values[0]
                    birthCountry = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["birthCountry"].values[0]
                    firstDay = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["firstDay"].values[0]
                    provOfRiding = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["provOfRiding"].values[0]
                    parlInfoPage = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["parlInfoPage"].values[0]
                    daysInOffice = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["daysInOffice"].values[0]

                    name_id_dict[lastName] = parlInfoId

                    return (speakerName, parlInfoId, fullName, firstName, lastName, middleName, sex, dateOfBirth, isEstimateDOB, 
                            visibleMinority, indigenous, birthProvince, birthCountry, firstDay, provOfRiding, 
                            parlInfoPage, daysInOffice, name_id_dict)"""
                
    
    else:
        parlInfoId = None
        fullName = None
        firstName = None
        lastName = None
        middleName = None
        sex = None
        age = None
        dateOfBirth = None
        isEstimateDOB = None
        visibleMinority = None
        indigenous = None
        birthProvince = None
        birthCountry = None
        firstDay = None
        provOfRiding = None
        parlInfoPage = None
        daysInOffice = None
        
        return (speakerName, parlInfoId, fullName, firstName, lastName, middleName, sex, dateOfBirth, isEstimateDOB, visibleMinority,
                indigenous, birthProvince, birthCountry, firstDay, provOfRiding, parlInfoPage, daysInOffice, 
                name_id_dict)
    

def speakerName_check(speakerName, name_id_dict, current_speaker):
    #speakerName = speakerName.replace(":","")

    #print(speakerName)                         

    ####### Check last names of authorityFile and see if they match name, and then match first names if 
    ####### multiple last names are found

    ## Speaker (the Senate position) designation converted to current senate Speaker name
    if ("Hon. the Speaker" in speakerName) or ("Hon. the Acting Speaker" in speakerName) or ("Hon. The Speaker" in speakerName):
        speakerName = current_speaker
        #print(speakerName)
        speakerName, parlInfoId, fullName, firstName, lastName, middleName, sex, dateOfBirth, isEstimateDOB, visibleMinority, indigenous, birthProvince, birthCountry, firstDay, provOfRiding, parlInfoPage, daysInOffice, name_id_dict = authorityFile_get(speakerName,name_id_dict)
        
    ## Account for some hansard formatting rules
    ## When already introduced, subsequent references sometimes refer to "Senator lastname" 
    ## Therefore, must record introduced senators with dict of parlInfoId and last name
    elif "Senator " in speakerName:
        #print(name_id_dict)
        try:
            parlInfoId = int(name_id_dict[(speakerName.split(" ",1)[1]).rstrip()])
            fullName = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["fullName"].values[0]
            firstName = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["firstName"].values[0]
            lastName = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["lastName"].values[0]
            middleName = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["middleName"].values[0]
            sex = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["sex"].values[0]
            dateOfBirth = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["dateOfBirth"].values[0]
            isEstimateDOB = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["isEstimateDOB"].values[0]
            visibleMinority = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["visibleMinority"].values[0]
            indigenous = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["indigenous"].values[0]
            birthProvince = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["birthProvince"].values[0]
            birthCountry = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["birthCountry"].values[0]
            firstDay = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["firstDay"].values[0]
            provOfRiding = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["provOfRiding"].values[0]
            parlInfoPage = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["parlInfoPage"].values[0]
            daysInOffice = authorityFile_df.loc[authorityFile_df["parlInfoId"] == parlInfoId]["daysInOffice"].values[0]
        except Exception:
            parlInfoId = None
            fullName = None
            firstName = None
            lastName = None
            middleName = None
            sex = None
            dateOfBirth = None
            isEstimateDOB = None
            visibleMinority = None
            indigenous = None
            birthProvince = None
            birthCountry = None
            firstDay = None
            provOfRiding = None
            parlInfoPage = None
            daysInOffice = None
            
    else:
        speakerName, parlInfoId, fullName, firstName, lastName, middleName, sex, dateOfBirth, isEstimateDOB, visibleMinority, indigenous, birthProvince, birthCountry, firstDay, provOfRiding, parlInfoPage, daysInOffice, name_id_dict = authorityFile_get(speakerName,name_id_dict)                                            

    return (speakerName, parlInfoId, fullName, firstName, lastName, middleName, sex, dateOfBirth, isEstimateDOB, visibleMinority,
            indigenous, birthProvince, birthCountry, firstDay, provOfRiding, parlInfoPage, daysInOffice, 
            name_id_dict)

## BeautifulSoup Parser

In [7]:
## Dry runs run through the code but do not output any data
dry_run = False #False #True

## Debug messages output relevant information
debug_messages = False #True #False

## Reset interventionId file, only set to true if starting new parsing from beginning
reset_idfile = True #True #False

In [8]:
#Set list of available parliaments and respective sessions
parliamentNumber_list = [37,38,39,40,41,42] #[35,36]
parliamentSession_list = [1,2,3]#[1,2,3]

In [9]:
## Write initial interventionId file

if reset_idfile == True:
    previous_interventionId = 0
    with open("Data/Hansard/forParser/previous_interventionId.txt", "w") as id_file:
        id_file.write(str(previous_interventionId))
else:
    pass

In [10]:
## Remove all strings within <> i.e. HTML tags
pattern = re.compile("<[^>]+>")

In [11]:
#Main parser function definition
def senate_hansard_parser(parliamentNumber,parliamentSession,Senate_final_csv,pattern):
    print("Senate Hansard Parliament "+str(parliamentNumber)+" Session "+str(parliamentSession))
    
    #Use beautifulsoup to parse HTML
    
    try:
        directory = "Data/Hansard/forParser/Senate Hansard Parliament "+str(parliamentNumber)+" Session "+str(parliamentSession)
        
        ## List of lists, which will be appended to final csv
        ## Each sublist is a row in the final csv
        ## Each list encompasses all speeches in a single HTML file (aka in one sitting)
        
        for i,html_file in enumerate(os.listdir(directory)):
            
            ## Use this to skip to specific days for troubleshooting
            #if i < 109:
            #    continue
            #else:
            #    pass
            
            all_rows = []
            filename = os.fsdecode(html_file)
            print("HTML: "+os.path.join(directory, filename))
            print("")
            
            ## Get last known interventionId from file
            with open("Data/Hansard/forParser/previous_interventionId.txt", "r") as id_file:
                previous_interventionId = int(id_file.read())
                #print(previous_interventionId)
            
            #Get date from filename
            dateYMD = (filename.split("_")[2]).split(".")[0]
            #print(dateYMD)
            year, month, day = int(dateYMD.split("-")[0]), int(dateYMD.split("-")[1]), int(dateYMD.split("-")[2])
            #print(year,month,day)
            
            date = datetime(int(year), int(month), int(day))
            date = date.strftime("%B %d %Y")  
            #print(date)
            
            orderOfBusinessRubric_list = []
            subjectOfBusinessTitle_list = []
            with open(os.path.join(directory, filename)) as file:
                soup = BeautifulSoup(file,"html.parser")
                
                
                ## IMPORTANT: Because beautifulsoup does not have line number extraction and Senate hansard HTMLs
                ## do not have a nice format, must do everything manually
                
                ## Get entire HTML as list of lines for parsing
                #print(soup.prettify())
                all_lines = soup.find_all()
                #print(len(all_lines))
                #print(all_lines)
            
                ## This is for parliaments 35-36 as they utilize another different format
                if (parliamentNumber == 35) or (parliamentNumber == 36):
                    orderOfBusinessRubric_list = soup.find_all("h1", class_=None)
                    orderOfBusinessRubric_id_list = [None for a in orderOfBusinessRubric_list]
                    
                    subjectOfBusinessTitle_list = soup.find_all("h2", class_=None)
                    subjectOfBusinessTitle_id_list = [None for a in subjectOfBusinessTitle_list]

                    subjectOfBusinessQualifier_list = soup.find_all("h3", class_=None)

                else:
                    #NOTE: orders of business and subject of business do not have class attributes and are denoted by ids in the <a> tag
                    #(soup, outside_tag, inside_tag, class_attr_outside, id_attr_outside, style_attr_outside, class_attr_inside, id_attr_inside)
                    orderOfBusinessRubric_id_list, orderOfBusinessRubric_list = tag_specifier(soup,"h1","a",None,None,None,None,True)

                    ## If format is different, this handles them
                    if (orderOfBusinessRubric_id_list == []) and (orderOfBusinessRubric_list == []):
                        orderOfBusinessRubric_id_list = [a.get("id") for a in soup.find_all("h1", id=True, style="text-transform:uppercase;", class_=None)]
                        orderOfBusinessRubric_list = soup.find_all("h1", id=True, style="text-transform:uppercase;", class_=None)

                    ## Get subject of business
                    subjectOfBusinessTitle_id_list, subjectOfBusinessTitle_list = tag_specifier(soup,"h2","a",None,None,None,None,True) 

                    if (subjectOfBusinessTitle_id_list == []) and (subjectOfBusinessTitle_list == []):
                        subjectOfBusinessTitle_id_list = [a.get("id") for a in soup.find_all("h2", id=True, class_=None)]
                        subjectOfBusinessTitle_list = soup.find_all("h2", id=True, class_=None)
                    
                    ## Get subject of business qualifiers
                    subjectOfBusinessQualifier_list = tag_specifier_simple(soup,"h3","a")[1]
                    
                    if subjectOfBusinessQualifier_list == []:
                        subjectOfBusinessQualifier_list = soup.find_all("h3", class_=None)


                ## Get name of current Senate speaker using wildcard string matches
                try:
                    current_speaker = [a.get_text() for a in soup.find_all("h2", id=None)]
                    try:
                        current_speaker = (fnmatch.filter(current_speaker,"*Honourable*Speaker*")[0].splitlines())[1]
                    except IndexError:
                        current_speaker = (fnmatch.filter(current_speaker,"*Honourable*Speaker")[0].splitlines())[1]
                
                except IndexError:
                    current_speaker = [a.get_text() for a in soup.find_all("b", id=None, class_=None)]
                    try:
                        current_speaker = fnmatch.filter(current_speaker,"*Honourable*Speaker*")[0]
                    except IndexError:
                        try:
                            current_speaker = fnmatch.filter(current_speaker,"*Honourable*Speaker")[0]
                        except IndexError:
                            current_speaker = (tag_specifier(soup,"h2","span",None,None,None,True,None))[1][0].get_text()
                
                ## Get all italicized lines
                #italicized_list = soup.find_all("i", class_=None, align=None)
                #print(italicized_list)
                #print("")
                
                ## Get all speeches (denoted by <p> with no class)
                speech_list = soup.find_all("p", class_=None, align=None)
                #if speech_list = []:
                #    speech_list = soup.find_all("")

                ## Get all speaker names (denoted by <p> with <b> inner tag), avoid those within <u> tags
                ## NOTE: the format of the hansard changes to another standard after several sessions, 
                ##       must now account for that too
                
                #speakerName_unfiltered_list = tag_specifier_textonly(soup,"p","b",None,None,True,None,None)[1]
                #speakerName_u_list = tag_specifier_textonly(soup,"p","u",None,None,True,None,None)[1]
                #speakerName_list = [name for name in speakerName_unfiltered_list if name not in speakerName_u_list]
                
                speakerName_list = tag_specifier_textonly(soup,"p","b",None,None,True,None,None)[1]
                p_tag = "<p style=\"margin-top:3mm;margin-right:0mm;margin-bottom:0mm;margin-left:0in;text-indent:0in;\">" 
                #print(speakerName_list)
                
                ## if the speeches does not have a style attribute attached to the <p> tag, and thus the previous lines 
                ## will not return proper names
                if not any("Hon. " in name for name in speakerName_list):
                    
                    #speakerName_unfiltered_list = tag_specifier_textonly(soup,"p","b",None,None,None,None,None)[1]
                    #speakerName_u_list = tag_specifier_textonly(soup,"p","u",None,None,None,None,None)[1]
                    #speakerName_list = [name for name in speakerName_unfiltered_list if name not in speakerName_u_list]
                    
                    speakerName_list = tag_specifier_textonly(soup,"p","b",None,None,None,None,None)[1]
                    p_tag = "<p>"                
                                        
                    
                if debug_messages == True:
                    print("Order of Business List and IDs")
                    print(orderOfBusinessRubric_id_list)
                    print(orderOfBusinessRubric_list)
                    print("")
                    print("Subject of Business List and IDs")
                    print(subjectOfBusinessTitle_id_list)
                    print(subjectOfBusinessTitle_list)
                    print("")
                    print("Subject of Business Qualifiers List")
                    print(subjectOfBusinessQualifier_list)
                    print("")                
                    print("Speaker for this day")
                    print(current_speaker)
                    print("")
                    print("List of speeches")
                    print(speech_list)
                    print("")
                else:
                    pass
                
                
                #Set default values
                orderOfBusinessRubric = None
                subjectOfBusinessID = None
                subjectOfBusinessTitle = None
                subjectOfBusinessQualifier = None
                speechId = None
                interventionId = None
                weekday = None
                timeStamp = None
                speakerName = None
                #Party seems difficult to retrieve
                party = None
                speechFiltered = ""
                floorLanguage = "EN"
                
                parlInfoId = None
                fullName = None
                firstName = None
                lastName = None
                middleName = None
                sex = None
                age = None
                dateOfBirth = None
                isEstimateDOB = None
                visibleMinority = None
                indigenous = None
                birthProvince = None
                birthCountry = None
                firstDay = None
                provOfRiding = None
                parlInfoPage = None
                daysInOffice = None
                
                #Not sure how to get mentioned entities, so ignore again for now
                affiliationType = None
                affiliationDbId = None
                mentionedDocumentsTitle = None
                mentionedDocumentsId = None
                mentionedDocumentsType = None
                mentionedEntityName = None
                mentionedEntityId = None
                mentionedEntityType = None
                
                name_id_dict = dict()               
                
                for line in all_lines:
                    #print(line)
                    #print("")
                    ## Order of business, subject of business and speaker names are renewed only when a valid line is encountered
                    ## Otherwise, these values are kept at the previous values
                    
                    if line in orderOfBusinessRubric_list:
                        #print(orderOfBusinessRubric_id_list[orderOfBusinessRubric_list.index(line)])
                        
                        orderOfBusinessRubric = re.sub(pattern, "", str(line))
                        #print(orderOfBusinessRubric) 
                        
                    elif line in subjectOfBusinessTitle_list:
                        subjectOfBusinessID = subjectOfBusinessTitle_id_list[subjectOfBusinessTitle_list.index(line)]
                        #print(subjectOfBusinessID)
                        
                        subjectOfBusinessTitle = re.sub(pattern, "", str(line))
                        #print(subjectOfBusinessTitle)
                        
                        ## Reset qualifier to default
                        subjectOfBusinessQualifier = None
                        
                    elif line in subjectOfBusinessQualifier_list:
                        #subjectOfBusinessQualifier = line.get_text()
                        subjectOfBusinessQualifier = re.sub(pattern, "", str(line))
                        #print(subjectOfBusinessQualifier)
                    
                    #elif line in speakerName_filtered_list:
                    #    print(i, line)
                    #    speakerName = line.get_text()
                        #print(i,speakerName)
    
                    elif line in speech_list:
                        speech = str(line)
                        #speech = line.get_text()
                        #print(speech)
                        
                        if speech == "":
                            pass
                    
                        elif (speakerName == None) and not (speech.startswith(p_tag+"<b>Hon. ") or 
                                                            speech.startswith(p_tag+"<span lang=\"en-ca\"><b>Hon. ") or 
                                                            speech.startswith(p_tag+"<span lang=\"en-gb\"><b>Hon. ") or
                                                            speech.startswith("<b>Hon. ")):
                            pass
                        
                        elif (
                              speech.startswith(p_tag+"<b>Hon. Senators") or 
                              speech.startswith(p_tag+"<b>Some Hon. Senators") or 
                              speech.startswith(p_tag+"<span lang=\"en-ca\"><b>Hon. Senators") or
                              speech.startswith(p_tag+"<span lang=\"en-ca\"><b>Some Hon. Senators") or
                              speech.startswith(p_tag+"<span lang=\"en-gb\"><b>Hon. Senators") or
                              speech.startswith(p_tag+"<span lang=\"en-gb\"><b>Some Hon. Senators")
                             ):
                            #Do not add to dataframe if there are multiple speakers
                            pass
                        
                        elif "On the Order" in speech:
                            pass

                        ## Detect language changes and other signifiers
                        #elif any(speech in s for s in italicized_list):
                        #    print(speech)
                        
                        ## Attach response identification to qualifiers if applicable
                        elif "(<i>Response to question raised" in speech:
                            try:
                                subjectOfBusinessQualifier = subjectOfBusinessQualifier + " " + re.sub(pattern, "", speech)
                                #print(subjectOfBusinessQualifier)
                                #print("")
                            except Exception:
                                pass
                        elif "[<i>Translation</i>]" in speech:
                            floorLanguage = "FR"
                            #print(floorLanguage)
                            #print("")
                        elif "[<i>English</i>]" in speech:
                            floorLanguage = "EN"
                            #print(floorLanguage)
                            #print("")
                        
                        ## Get speakerNames
                        elif (  
                                speech.startswith(p_tag+"<b>Hon. ") or 
                                speech.startswith(p_tag+"<b>The Hon. ") or 
                                speech.startswith(p_tag+"<b>Senator ") or 
                                speech.startswith("<b>The Hon. ") or 
                                speech.startswith("<b>Hon. ") or 
                                speech.startswith("<b>Senator ") or
                                speech.startswith(p_tag+"<span lang=\"en-ca\"><b>Hon. ") or
                                speech.startswith(p_tag+"<span lang=\"en-ca\"><b>The Hon. ") or
                                speech.startswith(p_tag+"<span lang=\"en-ca\"><b>Senator" ) or
                                speech.startswith(p_tag+"<span lang=\"en-gb\"><b>Hon. ") or
                                speech.startswith(p_tag+"<span lang=\"en-gb\"><b>The Hon. ") or
                                speech.startswith(p_tag+"<span lang=\"en-gb\"><b>Senator" )
                            ):
                            #print(speech)
                            
                            ## Collect all info as a series of lists to then append to the final dataframe

                            try:
                                speakerName, speech = speech.split("</b>",1)[0], line.get_text().split(":",1)[1]
                            except IndexError:
                                speakerName = speech.split("</b>",1)[0]
                                speech = ""
                
                            speakerName = speakerName.replace(",","")
                            speakerName = speakerName.replace(":","")
                            #print(speakerName)
                            
                            """
                            try:
                                speech = speech.split(", moved:</p>",1)[1]
                            except Exception:
                                pass
                            
                            try:
                                speech = speech.split("pro tempore:</p>",1)[1]
                            except Exception:
                                pass
                            """
                            
                            ## Remove all strings within <> i.e. HTML tags
                            speakerName = re.sub(pattern, "", speakerName)
                            #speech = re.sub(pattern, "", speech)
                            #print(speech)
                            
                            #if any(speakerName in s for s in speakerName_list):
                                #print(speakerName)
                            speakerName, parlInfoId, fullName, firstName, lastName, middleName, sex, dateOfBirth, isEstimateDOB, visibleMinority, indigenous, birthProvince, birthCountry, firstDay, provOfRiding, parlInfoPage, daysInOffice, name_id_dict = speakerName_check(speakerName, name_id_dict, current_speaker)
                            #print(name_id_dict)
                            #else:
                            #    pass
                            
                            # ['parliamentNumber', 'parliamentSession', 'orderOfBusinessRubric', 'subjectOfBusinessTitle', 
                            # 'subjectOfBusinessID', 'subjectOfBusinessQualifier', 'speechId', 'interventionId', 'date', 
                            # 'dateYMD', 'year', 'month', 'day', 'weekday', 'timeStamp', 'speakerName', 'party', 
                            # 'parlInfoId', 'fullName', 'firstName', 'lastName', 'middleName', 'sex', 'age', 'daysInOffice',
                            # 'visibleMinority', 'indigenous', 'dateOfBirth', 'isEstimateDOB', 'birthProvince', 
                            # 'birthCountry', 'firstDay', 'provOfRiding', 'parlInfoPage', 'affiliationType', 
                            # 'affiliationDbId', 'floorLanguage', 'speech', 'speechFiltered', 'mentionedDocumentsTitle', 
                            # 'mentionedDocumentsId', 'mentionedDocumentsType', 'mentionedEntityName', 'mentionedEntityId',
                            # 'mentionedEntityType', 'filename']
                            
                            #print(speech)
                            if speech == "":
                                pass
                            else:
                                speech_nlp = nlp(speech) # converting speech into Spacy doc

                                for token in speech_nlp:
                                    pair = "_".join([token.text, token.tag_])
                                    speechFiltered = " ".join([speechFiltered, pair])

                                row = [parliamentNumber, parliamentSession, orderOfBusinessRubric, subjectOfBusinessTitle,
                                          subjectOfBusinessID, subjectOfBusinessQualifier, speechId, interventionId, date, 
                                          dateYMD, year, month, day, weekday, timeStamp, speakerName, party, parlInfoId,
                                          fullName, firstName, lastName, middleName, sex, age, daysInOffice, visibleMinority,
                                          indigenous, dateOfBirth, isEstimateDOB, birthProvince, birthCountry, firstDay, provOfRiding,
                                          parlInfoPage, affiliationType, affiliationDbId, floorLanguage, speech,
                                          speechFiltered, mentionedDocumentsTitle, mentionedDocumentsId, mentionedDocumentsType, 
                                          mentionedEntityName, mentionedEntityId, mentionedEntityType, filename]

                                ## Reset filtered speech to default
                                speechFiltered = ""

                                #print(df_row)
                                #print(len(df_row))

                                all_rows.append(row)
                                del row

                        else:
                            speech = line.get_text()
                            
                            ## Remove all strings within <> i.e. HTML tags using regex (really slow)
                            #speech = re.sub(pattern, "", speech)
                            
                            #print(speech)
                            
                            if speech == "":
                                pass
                            else:
                                speech_nlp = nlp(speech) # converting speech into Spacy doc

                                for token in speech_nlp:
                                    pair = "_".join([token.text, token.tag_])
                                    speechFiltered = " ".join([speechFiltered, pair])

                                row = [parliamentNumber, parliamentSession, orderOfBusinessRubric, subjectOfBusinessTitle,
                                          subjectOfBusinessID, subjectOfBusinessQualifier, speechId, interventionId, date, 
                                          dateYMD, year, month, day, weekday, timeStamp, speakerName, party, parlInfoId,
                                          fullName, firstName, lastName, middleName, sex, age, daysInOffice, visibleMinority,
                                          indigenous, dateOfBirth, isEstimateDOB, birthProvince, birthCountry, firstDay, provOfRiding,
                                          parlInfoPage, affiliationType, affiliationDbId, floorLanguage, speech,
                                          speechFiltered, mentionedDocumentsTitle, mentionedDocumentsId, mentionedDocumentsType, 
                                          mentionedEntityName, mentionedEntityId, mentionedEntityType, filename]

                                ## Reset filtered speech to default
                                speechFiltered = ""

                                all_rows.append(row)
                                del row

            
            ## Once the HTML file is parsed, append to csv            
            ## Create new dataframe that aggregates consecutive speeches of the same speaker in the same language 
            ## and in the same subjectOfBusinessTitle/subjectOfBusinessQualifier
            refined_all_rows, row_previous = [], all_rows[0]
            for row in all_rows[1:]:
                
                ## fullName, floorLanguage, subjectOfBusinessTitle and subjectOfBusinessQualifier
                ## are index 18, 36, 3 and 5 respectively
                
                ## If row changes in those areas, append previous row to new frame
                if (row_previous[18] != row[18]) or (row_previous[36] != row[36]) or (row_previous[3] != row[3]) or (row_previous[5] != row[5]):
                    refined_all_rows.append(row_previous)
                    row_previous = row
                    
                ## Else, keep all column values the same as previous row but append speech and speechFiltered
                ## speech and speechFiltered are index 37 and 38 respectively
                else:
                    #print(row_previous)
                    row_previous[37] = row_previous[37] + " " + row[37]
                    row_previous[38] = row_previous[38] + " " + row[38] 
                    #print(row_previous)
            
            ## Add in interventionId and speechId
            ## interventionId is formatted as a 7-digit number
            ## speechId is interventionId appended to date of speech
            ## speechId is index 6, interventionId is index 7, dateYMD is index 9
            
            for row in refined_all_rows:
                previous_interventionId += 1
                #interventionId
                row[7] = str(str(previous_interventionId).zfill(7))
                #speechId
                row[6] = str(row[9])+"-"+row[7]
            
            #print(refined_all_rows)
            
            df_refined_all_rows = pd.DataFrame(refined_all_rows, columns=Senate_final_csv.columns)
            #display(df_refined_all_rows)
            del refined_all_rows
            
            if dry_run == False:
                ## Record dataframe to csv
                if os.path.isfile("hansardExtractedSpeechesFull_senate.csv") == True:
                    with open("hansardExtractedSpeechesFull_senate.csv","a") as file:
                        df_refined_all_rows.to_csv(file,header=False,index=False)
                else:
                    df_refined_all_rows.to_csv("hansardExtractedSpeechesFull_senate.csv",index=False,header=True)

                ## Record final interventionId to file
                with open("Data/Hansard/forParser/previous_interventionId.txt", "w") as id_file:
                    id_file.write(str(previous_interventionId))
                    
            else:
                pass
                
            del df_refined_all_rows
            
            #break
            
        #display(Senate_final_csv.head(n=20))
    
    #Display error if parliament-session combo does not exist
    except FileNotFoundError:
        print("Error: Parliament and session combination does not exist or interventionId file does not exist")
        print("")

## Function Call

In [12]:
for parliamentNumber in parliamentNumber_list:
    for parliamentSession in parliamentSession_list:
        senate_hansard_parser(parliamentNumber,parliamentSession,Senate_final_csv,pattern)

Senate Hansard Parliament 37 Session 1
HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Session 1/371_0001_2001-01-29.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Session 1/371_0002_2001-01-30.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Session 1/371_0003_2001-01-31.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Session 1/371_0004_2001-02-06.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Session 1/371_0005_2001-02-07.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Session 1/371_0006_2001-02-08.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Session 1/371_0007_2001-02-20.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Session 1/371_0008_2001-02-21.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Session 1/371_0009_2001-02-22.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Session 1/371_0010_2001-02-27.html

HTML: Data/Hansard/fo

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Session 1/371_0088_2002-02-07.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Session 1/371_0089_2002-02-19.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Session 1/371_0090_2002-02-20.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Session 1/371_0091_2002-02-21.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Session 1/371_0092_2002-03-05.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Session 1/371_0093_2002-03-06.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Session 1/371_0094_2002-03-07.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Session 1/371_0095_2002-03-12.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Session 1/371_0096_2002-03-13.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Session 1/371_0097_2002-03-14.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Se

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Session 2/372_0051_2003-05-01.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Session 2/372_0052_2003-05-06.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Session 2/372_0053_2003-05-07.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Session 2/372_0054_2003-05-08.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Session 2/372_0055_2003-05-13.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Session 2/372_0056_2003-05-14.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Session 2/372_0057_2003-05-15.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Session 2/372_0058_2003-05-27.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Session 2/372_0059_2003-05-28.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Session 2/372_0060_2003-05-29.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Se

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Session 3/373_0041_2004-05-12.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 37 Session 3/373_0042_2004-05-13.html

Senate Hansard Parliament 38 Session 1
HTML: Data/Hansard/forParser/Senate Hansard Parliament 38 Session 1/381_0001_2004-10-04.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 38 Session 1/381_0002_2004-10-05.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 38 Session 1/381_0003_2004-10-06.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 38 Session 1/381_0004_2004-10-07.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 38 Session 1/381_0005_2004-10-19.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 38 Session 1/381_0006_2004-10-20.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 38 Session 1/381_0007_2004-10-21.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 38 Session 1/381_0008_2004-10-26.html

HTML: Data/Hansard/fo

HTML: Data/Hansard/forParser/Senate Hansard Parliament 38 Session 1/381_0086_2005-09-28.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 38 Session 1/381_0087_2005-09-29.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 38 Session 1/381_0088_2005-10-18.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 38 Session 1/381_0089_2005-10-19.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 38 Session 1/381_0090_2005-10-20.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 38 Session 1/381_0091_2005-10-25.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 38 Session 1/381_0092_2005-10-26.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 38 Session 1/381_0093_2005-10-27.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 38 Session 1/381_0094_2005-11-01.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 38 Session 1/381_0095_2005-11-02.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 38 Se

HTML: Data/Hansard/forParser/Senate Hansard Parliament 39 Session 1/391_0070_2007-02-14.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 39 Session 1/391_0071_2007-02-15.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 39 Session 1/391_0072_2007-02-20.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 39 Session 1/391_0073_2007-02-21.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 39 Session 1/391_0074_2007-02-22.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 39 Session 1/391_0075_2007-02-27.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 39 Session 1/391_0078_2007-03-20.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 39 Session 1/391_0079_2007-03-21.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 39 Session 1/391_0080_2007-03-22.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 39 Session 1/391_0081_2007-03-26.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 39 Se

HTML: Data/Hansard/forParser/Senate Hansard Parliament 39 Session 2/392_0046_2008-04-03.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 39 Session 2/392_0047_2008-04-08.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 39 Session 2/392_0048_2008-04-09.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 39 Session 2/392_0049_2008-04-10.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 39 Session 2/392_0050_2008-04-15.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 39 Session 2/392_0051_2008-04-16.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 39 Session 2/392_0052_2008-04-17.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 39 Session 2/392_0053_2008-04-29.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 39 Session 2/392_0054_2008-04-30.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 39 Session 2/392_0055_2008-05-01.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 39 Se

HTML: Data/Hansard/forParser/Senate Hansard Parliament 40 Session 2/402_0050_2009-06-23.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 40 Session 2/402_0051_2009-09-15.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 40 Session 2/402_0052_2009-09-16.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 40 Session 2/402_0053_2009-09-17.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 40 Session 2/402_0054_2009-09-29.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 40 Session 2/402_0055_2009-09-30.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 40 Session 2/402_0056_2009-10-01.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 40 Session 2/402_0057_2009-10-06.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 40 Session 2/402_0058_2009-10-07.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 40 Session 2/402_0059_2009-10-08.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 40 Se

HTML: Data/Hansard/forParser/Senate Hansard Parliament 40 Session 3/403_0054_2010-10-06.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 40 Session 3/403_0055_2010-10-07.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 40 Session 3/403_0056_2010-10-19.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 40 Session 3/403_0057_2010-10-20.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 40 Session 3/403_0058_2010-10-21.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 40 Session 3/403_0059_2010-10-26.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 40 Session 3/403_0060_2010-10-27.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 40 Session 3/403_0061_2010-10-28.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 40 Session 3/403_0062_2010-11-02.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 40 Session 3/403_0063_2010-11-03.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 40 Se

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 1/411_0042_2011-12-14.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 1/411_0043_2011-12-15.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 1/411_0044_2011-12-16.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 1/411_0045_2012-01-31.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 1/411_0046_2012-02-01.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 1/411_0047_2012-02-02.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 1/411_0048_2012-02-07.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 1/411_0049_2012-02-08.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 1/411_0050_2012-02-09.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 1/411_0051_2012-02-14.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Se

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 1/411_0130_2012-12-12.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 1/411_0131_2012-12-13.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 1/411_0132_2012-12-14.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 1/411_0133_2013-02-05.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 1/411_0134_2013-02-06.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 1/411_0135_2013-02-07.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 1/411_0136_2013-02-12.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 1/411_0137_2013-02-13.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 1/411_0138_2013-02-14.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 1/411_0139_2013-02-26.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Se

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 2/412_0036_2014-02-13.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 2/412_0037_2014-02-25.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 2/412_0038_2014-02-26.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 2/412_0039_2014-02-27.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 2/412_0040_2014-03-04.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 2/412_0041_2014-03-05.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 2/412_0042_2014-03-06.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 2/412_0043_2014-03-25.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 2/412_0044_2014-03-26.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 2/412_0045_2014-03-27.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Se

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 2/412_0124_2015-03-11.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 2/412_0125_2015-03-12.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 2/412_0126_2015-03-24.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 2/412_0127_2015-03-25.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 2/412_0128_2015-03-26.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 2/412_0129_2015-03-30.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 2/412_0130_2015-03-31.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 2/412_0131_2015-04-01.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 2/412_0132_2015-04-02.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Session 2/412_0133_2015-04-21.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 41 Se

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Session 1/421_0048_2016-06-13.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Session 1/421_0049_2016-06-14.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Session 1/421_0050_2016-06-15.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Session 1/421_0051_2016-06-16.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Session 1/421_0052_2016-06-17.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Session 1/421_0053_2016-06-20.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Session 1/421_0054_2016-06-21.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Session 1/421_0055_2016-06-22.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Session 1/421_0056_2016-09-27.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Session 1/421_0057_2016-09-28.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Se

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Session 1/421_0136_2017-06-21.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Session 1/421_0137_2017-06-22.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Session 1/421_0138_2017-09-19.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Session 1/421_0139_2017-09-20.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Session 1/421_0140_2017-09-21.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Session 1/421_0141_2017-09-26.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Session 1/421_0142_2017-09-27.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Session 1/421_0143_2017-09-28.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Session 1/421_0144_2017-10-03.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Session 1/421_0145_2017-10-04.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Se

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Session 1/421_0224_2018-06-20.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Session 1/421_0225_2018-06-21.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Session 1/421_0226_2018-09-18.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Session 1/421_0227_2018-09-19.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Session 1/421_0228_2018-09-20.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Session 1/421_0229_2018-09-25.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Session 1/421_0230_2018-09-26.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Session 1/421_0231_2018-09-27.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Session 1/421_0232_2018-10-02.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Session 1/421_0233_2018-10-03.html

HTML: Data/Hansard/forParser/Senate Hansard Parliament 42 Se