# Initialization

In [1]:
import pandas as pd
import re
import numpy as np
import urllib
from bs4 import BeautifulSoup as Soup
import os
from pathlib import Path

In [363]:
speakers = pd.DataFrame(columns=["speaker_id", 
                                "first_name", 
                                "last_name",
                                "chamber",
                                "type",
                                "party", 
                                "state", 
                                "district",
                                "bio_guide_id",
                                "congress_id"])

In [364]:
speakers

Unnamed: 0,speaker_id,first_name,last_name,chamber,type,party,state,district,bio_guide_id,congress_id


In [365]:
speeches = pd.DataFrame(columns=["speech_id",
                                 "last_name",
                                 "speaker_id",
                                 "proceeding_id", 
                                 "topic_id", 
                                 "word_count", 
                                 "speech_text",
                                 "file_name",
                                 "mods_file"])

In [366]:
speeches

Unnamed: 0,speech_id,last_name,speaker_id,proceeding_id,topic_id,word_count,speech_text,file_name,mods_file


# Master Mods Parsing

In [2]:
def get_all_extensions(file):
    handler = open(file).read()
    soup = Soup(handler, "lxml")
    return soup.find_all("extension")

In [3]:
def get_cong_member_tag(cong_member_extension):
    cong_member_tag = cong_member_extension.find("congmember")
    return cong_member_tag

In [4]:
def get_party(cong_member_tag):
    try:
        return cong_member_tag.attrs['party']
    except:
        return 'N/A'

def get_type(cong_member_tag):
    try:
        return cong_member_tag.attrs['type']
    except:
        return 'N/A'

def get_authority_id(cong_member_tag):
    try:
        return cong_member_tag.attrs['authorityid']
    except:
        print("auth id not found")
        return None

def get_bioguide_id(cong_member_tag):
    try:
        return cong_member_tag.attrs['bioguideid']
    except:
        return 'N/A'

def get_state(cong_member_tag):
    try:
        return cong_member_tag.attrs['state']
    except:
        return 'N/A'

def get_congress_id(cong_member_tag):
    try:
        return cong_member_tag.attrs['congress']
    except:
        return 'N/A'

def get_chamber(cong_member_tag):
    chambers = {'S': 'SENATE', 'H': 'HOUSE'}
    try:
        letter = cong_member_tag.attrs['chamber']
        return chambers[letter]
    except:
        return 'N/A'


In [5]:
def get_district(cong_member_extension):
    district_tag = cong_member_extension.find("district")
    if district_tag == None:
        return None
    return district_tag.string

In [6]:
def get_first_name(cong_member_tag):
    name_tag_arr = cong_member_tag.select("name[type='authority-fnf']")
    if name_tag_arr == []:
        print("no first_name")
        print(cong_member_tag)
        return 'N/A'
    name_tag = name_tag_arr[0]
    first_name = name_tag.text.split()[0]
    return first_name

In [7]:
def get_last_name(cong_member_tag):
    name_tag_arr = cong_member_tag.select("name[type='authority-lnf']")
    if name_tag_arr == []:
        print("no first_name")
        print(cong_member_tag)
        return 'N/A'
    name_tag = name_tag_arr[0]
    full_name = name_tag.string
    return re.match("[^,]*", full_name).group(0).upper()

In [13]:
filenames = os.listdir("/Users/halliday/projects/searchlight/parsing/mastermods")
filenames.remove('.DS_Store')
for filename in filenames:
    extensions = get_all_extensions("/Users/halliday/projects/searchlight/parsing/mastermods/" + filename)
    print("            " + filename)
    for extension in extensions:
        cong_member_tag = get_cong_member_tag(extension)
        if cong_member_tag == None:
            continue
#         if filename == 'mods112.xml':
#             print(cong_member_tag)
        authority_id = get_authority_id(cong_member_tag)
        if speakers[speakers['speaker_id'] == authority_id]['speaker_id'].count() > 0 or authority_id == "" or authority_id == None:
            continue
        cong_member_dict = {'speaker_id': authority_id, 
                            'first_name': get_first_name(cong_member_tag),
                            'last_name': get_last_name(cong_member_tag),
                            'chamber': get_chamber(cong_member_tag),
                            'type': get_type(cong_member_tag),
                            'party': get_party(cong_member_tag),
                            'state': get_state(cong_member_tag),
                            'district': get_district(extension),
                            'bio_guide_id': get_bioguide_id(cong_member_tag),
                            'congress_id': get_congress_id(cong_member_tag)}
        speakers = speakers.append(cong_member_dict, ignore_index=True)

            mods113.xml


NameError: name 'speakers' is not defined

In [None]:
speakers.head()

In [319]:
speakers[speakers['congress_id'] == 'N/A']

Unnamed: 0,speaker_id,first_name,last_name,chamber,type,party,state,district,bio_guide_id,congress_id


In [320]:
speakers = speakers.sort_values('last_name')

In [321]:
speakers.to_csv('masterspeakers.csv', index=False)

# Speeches Parsing

In [367]:
speakersDir = Path('/Users/halliday/projects/searchlight/parsing')
speakers = pd.read_csv(speakersDir/'updatedspeakers.csv')
speakers.head()

Unnamed: 0,speaker_id,first_name,last_name,chamber,type,party,state,district,bio_guide_id,congress_id
0,2,Neil,ABERCROMBIE,HOUSE,REPRESENTATIVE,D,HI,1.0,A000014,107
1,1269,Spencer,ABRAHAM,SENATE,SENATOR,R,MI,,A000355,106
2,2244,Ralph,ABRAHAM,HOUSE,REPRESENTATIVE,R,LA,5.0,A000374,114
3,4,Gary,ACKERMAN,HOUSE,REPRESENTATIVE,D,NY,5.0,A000022,107
4,2006,Sandy,ADAMS,HOUSE,REPRESENTATIVE,R,FL,24.0,A000366,112


In [8]:
def remove_space(regex):
    return regex.group().replace(' ', '')

In [9]:
def sep_speech(filepath):
    parse_file = ''
    with open(filepath) as file:
        for line in file:
            parse_file += line
    parse_file = parse_file.replace('\n', '')
    parse_file = re.sub('Mr. [A-Z][a-z]', remove_space, parse_file)
    
    split = re.split(r'Mr. |Ms. |Mrs. ', parse_file)
    split.pop(0)
    name_and_speech = []
    for i in np.arange(len(split)):
        try:
            lastname = re.match('[A-Z]*\. ', split[i]).group(0)[:-2]
            name_and_speech += [lastname]
            value = re.sub('[A-Z]\w*\. ', '', split[i])
            name_and_speech += [value]
        except:
            continue
    return name_and_speech

In [10]:
def sep_date_from_file(file):
    abcdef = re.findall('[0-9]{4}-[0-9]{2}-[0-9]{2}', file)
    return re.split('-', abcdef[0])

In [11]:
def is_int(s):
    try: 
        int(s)
        return True
    except ValueError:
        return False

In [12]:
def find_title(file_path):
    parse_file = ''
    with open(file_path) as file:
        for line in file:
            parse_file += line
    parse_file = parse_file.replace('Mr. President', 'MrPresident')
    titles = re.findall('[A-Z \'-]+[A-Z0-9-,\. ]*[Continued]*\\n', parse_file)
    if is_int(titles[0].strip().replace('\n', '')):
        return titles[1].strip()
    else:
        return titles[0].strip()

In [13]:
def fix_surname_typos(name):
    if name == 'SOUZZI':
        return 'SUOZZI'
    if name == 'VANHOLLEN':
        return 'VAN HOLLEN'
    if name == 'FISHCER':
        return 'FISCHER'
    return name

# Local Mods Parsing

In [14]:
def get_cong_member_tag_from_mods(last_name, mods_file_path):
    try:
        handler = open(mods_file_path).read()
    except:
        return None
    soup = Soup(handler, "lxml")
    cong_member_tags = soup.find_all("congmember")
    matched_cong_member_tag = None
    for i in range(len(cong_member_tags)):
        curr_last_name = get_last_name(cong_member_tags[i])
        if curr_last_name == last_name:
            matched_cong_member_tag = cong_member_tags[i]
            break
    return matched_cong_member_tag

In [15]:
def get_cong_member_info(last_name, mods_file_path):
    matched_cong_member_tag = get_cong_member_tag_from_mods(last_name, mods_file_path)
    if matched_cong_member_tag == None:
        return {'speaker_id': None, 
                'first_name': 'N/A',
                'last_name': last_name,
                'type': 'N/A',
                'chamber': 'N/A',
                'party': 'N/A',
                'state': 'N/A',
                'district': None,
                'bio_guide_id': 'N/A',
                'congress_id': 'N/A'}
    else:
        return {'speaker_id': get_authority_id(matched_cong_member_tag), 
                'first_name': get_first_name(matched_cong_member_tag),
                'last_name': last_name,
                'chamber': get_chamber(matched_cong_member_tag),
                'type': get_type(matched_cong_member_tag),
                'party': get_party(matched_cong_member_tag),
                'state': get_state(matched_cong_member_tag),
                'district': None,
                'bio_guide_id': get_bioguide_id(matched_cong_member_tag),
                'congress_id': get_congress_id(matched_cong_member_tag)}

In [16]:
def get_authority_id_from_mods(last_name, mods_file_path):
    matched_cong_member_tag = get_cong_member_tag_from_mods(last_name, mods_file_path)
    if matched_cong_member_tag == None:
        return 99999999999999
    authority_id = get_authority_id(matched_cong_member_tag)
    if authority_id == None:
        return 99999999999999
    else:
        return authority_id

In [17]:
def get_bill_context(bill_tag):
    try:
        return bill_tag.attrs['context']
    except:
        return 'N/A'
def get_bill_congress(bill_tag):
    try:
        return bill_tag.attrs['congress']
    except:
        return 'N/A'
def get_bill_number(bill_tag):
    try:
        return bill_tag.attrs['number']
    except:
        return 'N/A'
def get_bill_type(bill_tag):
    try:
        return bill_tag.attrs['type']
    except:
        return 'N/A'

In [21]:
def populate_speeches(count, folder, next_index):
    
    speakersDir = Path('/Users/halliday/projects/searchlight/parsing')
    speakers = pd.read_csv(speakersDir/'updatedspeakers.csv')
    
    speeches = pd.DataFrame(columns=["speech_id",
                                 "last_name",
                                 "speaker_id",
                                 "proceeding_id", 
                                 "topic_id", 
                                 "word_count", 
                                 "speech_text",
                                 "file_name",
                                 "mods_file"])
    
    bills = pd.DataFrame(columns=["mods_file",
                              "congress_id",
                              "context",
                              "bill_number",
                              "bill_type"])

    #collects speaker-speech pairs from text files
    def collect_pairs(folder):
        nonlocal speeches
        nonlocal next_index
        speech_count = next_index
        folder_path = "/Users/halliday/projects/searchlight/parsing/" + folder
        list_of_files = os.listdir(folder_path)
        for file in list_of_files:
            if file.endswith(".txt"):
                print(file)
                if file == 'CREC-2018-03-22-pt1-PgH1769-2.txt':
                    continue
                if file == 'CREC-2017-09-06-pt1-PgH6695.txt':
                    continue
                file_path = "/Users/halliday/projects/searchlight/parsing/" + folder + "/" + file
                mods_file = file.replace('.txt', '.xml')
                separated = sep_speech(file_path)
                i = 0
                while i < len(separated):
                    separated_surname = fix_surname_typos(separated[i])
                    text = separated[i+1]
                    text = text.replace('MrPresident', 'Mr. President')
                    if len(text) > 30:
                        row = {"speech_id": speech_count,
                               "last_name": separated_surname,
                               "speaker_id": 99999999999999,
                               "proceeding_id": "proceeding_id", 
                               "topic_id": "topic_id",
                               "word_count": len(text.split()), 
                               "speech_text": text,
                               "file_name": file,
                               "mods_file": mods_file}
                        speech_count += 1
                        speeches = speeches.append(row, ignore_index=True)     
                    i += 2
                print('finished with file ', speech_count)
    
    collect_pairs(folder)
        
    def get_speaker_id(last_name, mods_file_path):
        nonlocal speakers
        possible_speakers = speakers[speakers['last_name'] == last_name]
        if possible_speakers.shape[0] == 0:
            new_speaker = get_cong_member_info(last_name, mods_file_path)
            speakers = speakers.append(new_speaker, ignore_index=True)
            print(speakers.shape[0])
            speakers = speakers.sort_values('last_name')
            speakers.to_csv('updatedspeakers.csv', index=False)
            print("wrote in new speaker")
            print(new_speaker)
            return new_speaker['speaker_id']
        elif possible_speakers.shape[0] == 1:
            print('used existing row')
            return possible_speakers.iloc[0]['speaker_id']
        else:
            mods_speaker_id = get_authority_id_from_mods(last_name, mods_file_path)
            if int(mods_speaker_id) > 100000:
                print("speaker not found in mods: " + last_name)
                return None
            matched_speaker = possible_speakers[possible_speakers['speaker_id'] == int(mods_speaker_id)]
            if matched_speaker.shape[0] == 1:
                print("speaker matched successfully, used existing row")
                return matched_speaker.iloc[0]['speaker_id']
            elif matched_speaker.shape[0] == 0:
                new_speaker = get_cong_member_info(last_name, mods_file_path)
                speakers = speakers.append(new_speaker, ignore_index=True)
                print("wrote in new speaker")
                print(new_speaker)
                speakers = speakers.sort_values('last_name')
                speakers.to_csv('updatedspeakers.csv', index=False)
                return new_speaker['speaker_id']
    
    def get_bills(mods_file_path, mods_file):
        nonlocal bills
        
        file_exists = bills[bills['mods_file'] == mods_file]
        if file_exists.shape[0] > 0:
            return
        
        try:
            handler = open(mods_file_path).read()
        except:
            return
        soup = Soup(handler, "lxml")
        bill_tags = soup.find_all("bill")

        if len(bill_tags) == 0:
            return
        else:
            for bill_tag in bill_tags:
                new_bill_row = {"mods_file": mods_file,
                               "congress_id": get_bill_congress(bill_tag),
                               "context": get_bill_context(bill_tag),
                               "bill_number": get_bill_number(bill_tag),
                               "bill_type": get_bill_type(bill_tag)}
                bills = bills.append(new_bill_row, ignore_index=True)        
    
    #initialize new columns
    speeches['proceeding_title'], speeches['year'], speeches['month'], speeches['day'] = "", 0, 0, 0
    
    #collect speaker_ids, titles, dates, and bills
    for i in range(speeches.shape[0]):
        
        curr_row = speeches.iloc[i]
        last_name = curr_row['last_name']
        mods_file_path = "/Users/halliday/projects/searchlight/parsing/" + folder + "/" + curr_row['mods_file']
        text_file_path = "/Users/halliday/projects/searchlight/parsing/" + folder + "/" + curr_row['file_name']
        
        #collect speaker_ids, titles, and dates
        speaker_id = get_speaker_id(last_name, mods_file_path)
        title = find_title(text_file_path)
        year, month, day = sep_date_from_file(curr_row['file_name'])
        
        speeches.loc[i, "speaker_id"] = speaker_id
        speeches.loc[i, "proceeding_title"] = title
        speeches.loc[i, "year"] = int(year)
        speeches.loc[i, "month"] = int(month)
        speeches.loc[i, "day"] = int(day)
        
        #populate bills table if necessary
        get_bills(mods_file_path, curr_row['mods_file'])
    
    speeches = speeches.sort_values('speech_id')
    bills = bills.sort_values('mods_file')
    
    speeches.to_csv('./parsing_results/' + 'speeches_' + str(count) + ".csv", index=False)
    bills.to_csv('./parsing_results/' + 'bills_' + str(count) + ".csv", index=False)
    
    print("                " + "saved " + "speeches_" + str(count))
    print("                " + "saved " + "bills_" + str(count))
    
    return max(speeches['speech_id'])
    

In [22]:
count = 1
folders = os.listdir("./scraped_folders")
print(folders)
next_index = 0
for folder in folders:
    next_index = populate_speeches(count, "scraped_folders/" + folder, next_index)
    next_index += 1
    count += 1

['scrape2']
CREC-2017-05-17-pt1-PgH4312-2.txt
finished with file  0
CREC-2017-05-17-pt1-PgH4236-6.txt
finished with file  0
CREC-2017-06-15-pt1-PgH4923-2.txt
finished with file  0
CREC-2017-04-07-pt1-PgS2463-2.txt
finished with file  0
CREC-2017-05-16-pt1-PgH4203-9.txt
finished with file  0
CREC-2017-07-18-pt1-PgS4030.txt
finished with file  0
CREC-2017-07-12-pt1-PgH5439.txt
finished with file  1
CREC-2017-04-24-pt1-PgS2475-3.txt
finished with file  1
CREC-2017-07-13-pt1-PgS3992-2.txt
finished with file  2
CREC-2017-05-04-pt1-PgH4180-2.txt
finished with file  2
CREC-2017-06-06-pt1-PgH4642-6.txt
finished with file  3
CREC-2017-05-18-pt1-PgH4345-2.txt
finished with file  4
CREC-2017-05-01-pt1-PgS2648.txt
finished with file  5
CREC-2017-04-27-pt1-PgS2612-4.txt
finished with file  6
CREC-2017-06-27-pt1-PgH5183-2.txt
finished with file  6
CREC-2017-07-31-pt1-PgS4629-2.txt
finished with file  6
CREC-2017-05-02-pt1-PgH3068-10.txt
finished with file  6
CREC-2017-05-19-pt1-PgH4390-8.txt
finishe

finished with file  348
CREC-2017-06-14-pt1-PgH4918-3.txt
finished with file  348
CREC-2017-05-18-pt1-PgH4358-6.txt
finished with file  348
CREC-2017-06-13-pt1-PgH4900-2.txt
finished with file  348
CREC-2017-07-11-pt1-PgH5433-5.txt
finished with file  348
CREC-2017-06-12-pt1-PgH4854-9.txt
finished with file  348
CREC-2017-06-12-pt1-PgS3399.txt
finished with file  348
CREC-2017-07-20-pt1-PgH6146-8.txt
finished with file  348
CREC-2017-05-25-pt1-PgS3224.txt
finished with file  348
CREC-2017-07-24-pt1-PgS4135-3.txt
finished with file  348
CREC-2017-07-24-pt1-PgH6213-5.txt
finished with file  348
CREC-2017-07-17-pt1-PgH5903-5.txt
finished with file  349
CREC-2017-05-19-pt1-PgH4380.txt
finished with file  350
CREC-2017-05-24-pt1-PgH4518-4.txt
finished with file  351
CREC-2017-05-25-pt1-PgS3225-5.txt
finished with file  351
CREC-2017-04-26-pt1-PgS2549.txt
finished with file  352
CREC-2017-05-03-pt1-PgS2721.txt
finished with file  352
CREC-2017-05-01-pt1-PgS2649.txt
finished with file  353
CR

finished with file  473
CREC-2017-06-28-pt1-PgH5296-7.txt
finished with file  473
CREC-2017-04-26-pt1-PgH2859.txt
finished with file  474
CREC-2017-04-27-pt1-PgH2942-10.txt
finished with file  474
CREC-2017-07-27-pt1-PgH6518-24.txt
finished with file  474
CREC-2017-04-26-pt1-PgH2897-25.txt
finished with file  474
CREC-2017-04-06-pt1-PgH2791-2.txt
finished with file  474
CREC-2017-07-17-pt1-PgS4009-5.txt
finished with file  474
CREC-2017-06-15-pt1-PgH4935.txt
finished with file  475
CREC-2017-04-06-pt1-PgH2789-13.txt
finished with file  475
CREC-2017-07-24-pt1-PgH6170.txt
finished with file  479
CREC-2017-07-27-pt1-PgH6518-25.txt
finished with file  479
CREC-2017-04-27-pt1-PgH2942-11.txt
finished with file  479
CREC-2017-04-26-pt1-PgH2858.txt
finished with file  480
CREC-2017-06-28-pt1-PgH5296-6.txt
finished with file  480
CREC-2017-07-24-pt1-PgH6171.txt
finished with file  484
CREC-2017-05-08-pt1-PgH4186.txt
finished with file  484
CREC-2017-04-06-pt1-PgH2789-12.txt
finished with file 

finished with file  841
CREC-2017-05-03-pt1-PgH3309.txt
finished with file  842
CREC-2017-06-06-pt1-PgH4640-2.txt
finished with file  842
CREC-2017-06-08-pt1-PgH4715-3.txt
finished with file  843
CREC-2017-04-27-pt1-PgH2908-5.txt
finished with file  844
CREC-2017-05-18-pt1-PgH4324-5.txt
finished with file  844
CREC-2017-06-22-pt1-PgS3698-3.txt
finished with file  901
CREC-2017-04-06-pt1-PgH2768-3.txt
finished with file  902
CREC-2017-04-20-pt1-PgH2812-3.txt
finished with file  902
CREC-2017-04-25-pt1-PgH2847-13.txt
finished with file  902
CREC-2017-05-19-pt1-PgH4361-3.txt
finished with file  904
CREC-2017-06-13-pt1-PgS3459-4.txt
finished with file  904
CREC-2017-05-17-pt1-PgH4234-2.txt
finished with file  904
CREC-2017-07-28-pt1-PgH6579-12.txt
finished with file  904
CREC-2017-05-24-pt1-PgH4507-4.txt
finished with file  904
CREC-2017-04-06-pt1-PgH2792-32.txt
finished with file  904
CREC-2017-06-06-pt1-PgH4650.txt
finished with file  904
CREC-2017-07-13-pt1-PgH5772-5.txt
finished with f

CREC-2017-04-28-pt1-PgH2959-2.txt
finished with file  1175
CREC-2017-07-31-pt1-PgS4612.txt
finished with file  1177
CREC-2017-06-08-pt1-PgH4713-4.txt
finished with file  1177
CREC-2017-06-20-pt1-PgS3642-2.txt
finished with file  1178
CREC-2017-06-27-pt1-PgH5232-20.txt
finished with file  1178
CREC-2017-05-30-pt1-PgH4617-6.txt
finished with file  1178
CREC-2017-05-18-pt1-PgH4322-2.txt
finished with file  1179
CREC-2017-06-15-pt1-PgH4938-15.txt
finished with file  1179
CREC-2017-06-07-pt1-PgH4701-4.txt
finished with file  1179
CREC-2017-04-12-pt1-PgH2799-4.txt
finished with file  1179
CREC-2017-05-26-pt1-PgH4615.txt
finished with file  1179
CREC-2017-05-03-pt1-PgS2687-4.txt
finished with file  1179
CREC-2017-07-25-pt1-PgH6222-8.txt
finished with file  1179
CREC-2017-07-12-pt1-PgH5760.txt
finished with file  1179
CREC-2017-05-01-pt1-PgH2988.txt
finished with file  1179
CREC-2017-04-27-pt1-PgS2630-4.txt
finished with file  1181
CREC-2017-06-22-pt1-PgH5067-2.txt
finished with file  1181
CRE

finished with file  1403
CREC-2017-05-03-pt1-PgH3324-2.txt
finished with file  1403
CREC-2017-04-06-pt1-PgH2758.txt
finished with file  1403
CREC-2017-07-24-pt1-PgH6200.txt
finished with file  1404
CREC-2017-06-06-pt1-PgH4647.txt
finished with file  1404
CREC-2017-07-13-pt1-PgH5832-26.txt
finished with file  1404
CREC-2017-06-21-pt1-PgH5045.txt
finished with file  1404
CREC-2017-06-23-pt1-PgH5139.txt
finished with file  1404
CREC-2017-04-06-pt1-PgH2792-25.txt
finished with file  1404
CREC-2017-05-22-pt1-PgH4395-2.txt
finished with file  1404
CREC-2017-04-25-pt1-PgS2499-8.txt
finished with file  1404
CREC-2017-07-28-pt1-PgH6579-11.txt
finished with file  1404
CREC-2017-04-06-pt1-PgH2792-31.txt
finished with file  1404
CREC-2017-07-24-pt1-PgH6214.txt
finished with file  1404
CREC-2017-06-20-pt1-PgH4994-28.txt
finished with file  1404
CREC-2017-07-24-pt1-PgS4124.txt
finished with file  1419
CREC-2017-04-06-pt1-PgH2792-19.txt
finished with file  1419
CREC-2017-07-28-pt1-PgH6577-14.txt
fini

finished with file  1571
CREC-2017-05-17-pt1-PgH4311-18.txt
finished with file  1571
CREC-2017-05-17-pt1-PgH4269.txt
finished with file  1577
CREC-2017-04-27-pt1-PgH2942-17.txt
finished with file  1577
CREC-2017-07-27-pt1-PgH6518-23.txt
finished with file  1577
CREC-2017-07-27-pt1-PgS4421-2.txt
finished with file  1577
CREC-2017-07-25-pt1-PgS4168.txt
finished with file  1577
CREC-2017-05-08-pt1-PgS2798.txt
finished with file  1578
CREC-2017-04-06-pt1-PgH2789-14.txt
finished with file  1578
CREC-2017-07-31-pt1-PgS4614-6.txt
finished with file  1579
CREC-2017-07-17-pt1-PgS4009-2.txt
finished with file  1579
CREC-2017-06-15-pt1-PgH4932.txt
finished with file  1579
CREC-2017-06-27-pt1-PgS3781-6.txt
finished with file  1581
CREC-2017-05-03-pt1-PgS2687.txt
finished with file  1581
CREC-2017-04-26-pt1-PgH2897-22.txt
finished with file  1581
CREC-2017-04-06-pt1-PgH2791-5.txt
finished with file  1581
CREC-2017-06-15-pt1-PgS3561.txt
finished with file  1581
CREC-2017-05-15-pt1-PgH4193-4.txt
fini

CREC-2017-05-23-pt1-PgH4430.txt
finished with file  1812
CREC-2017-05-25-pt1-PgS3209.txt
finished with file  1812
CREC-2017-06-15-pt1-PgH4923-6.txt
finished with file  1812
CREC-2017-05-17-pt1-PgH4236-2.txt
finished with file  1813
CREC-2017-06-14-pt1-PgS3504-2.txt
finished with file  1814
CREC-2017-07-26-pt1-PgS4298.txt
finished with file  1815
CREC-2017-07-28-pt1-PgH6580-9.txt
finished with file  1815
CREC-2017-05-17-pt1-PgH4312-6.txt
finished with file  1815
CREC-2017-04-26-pt1-PgS2564.txt
finished with file  1815
CREC-2017-04-24-pt1-PgS-FrontMatter.txt
finished with file  1815
CREC-2017-06-12-pt1-PgS3411.txt
finished with file  1815
CREC-2017-04-28-pt1-PgS2635-9.txt
finished with file  1815
CREC-2017-05-24-pt1-PgS3139-3.txt
finished with file  1815
CREC-2017-05-22-pt1-PgH4424-2.txt
finished with file  1815
CREC-2017-07-18-pt1-PgH5924-9.txt
finished with file  1816
CREC-2017-05-02-pt1-PgH3068-14.txt
finished with file  1816
CREC-2017-04-26-pt1-PgH2898-8.txt
finished with file  1816


CREC-2017-07-28-pt1-PgH6570-3.txt
finished with file  1894
CREC-2017-05-23-pt1-PgH4503-5.txt
finished with file  1894
CREC-2017-06-26-pt1-PgH5180-5.txt
finished with file  1894
CREC-2017-06-29-pt1-PgH-FrontMatter.txt
finished with file  1894
CREC-2017-06-21-pt1-PgS3658-3.txt
finished with file  1895
CREC-2017-05-02-pt1-PgH3024-2.txt
finished with file  1897
CREC-2017-05-01-pt1-PgS2641-5.txt
finished with file  1897
CREC-2017-05-03-pt1-PgH3324-32.txt
finished with file  1897
CREC-2017-07-10-pt1-PgS3874-2.txt
finished with file  1898
CREC-2017-05-03-pt1-PgH3324-26.txt
finished with file  1898
CREC-2017-06-29-pt1-PgH5357-4.txt
finished with file  1899
CREC-2017-06-29-pt1-PgH5378-29.txt
finished with file  1899
CREC-2017-06-26-pt1-PgS3764-6.txt
finished with file  1899
CREC-2017-06-14-pt1-PgS-FrontMatter.txt
finished with file  1899
CREC-2017-06-26-pt1-PgH5147.txt
finished with file  1904
CREC-2017-07-11-pt1-PgH5430-5.txt
finished with file  1904
CREC-2017-05-17-pt1-PgH4228-2.txt
finished 

CREC-2017-06-15-pt1-PgH4920-2.txt
finished with file  2227
CREC-2017-06-26-pt1-PgH-FrontMatter.txt
finished with file  2227
CREC-2017-06-07-pt1-PgS3327-5.txt
finished with file  2227
CREC-2017-06-26-pt1-PgS3779-2.txt
finished with file  2227
CREC-2017-05-17-pt1-PgH4311-2.txt
finished with file  2227
CREC-2017-06-12-pt1-PgS3395-3.txt
finished with file  2227
CREC-2017-05-17-pt1-PgH4235-6.txt
finished with file  2227
CREC-2017-07-13-pt1-PgS3991-2.txt
finished with file  2228
CREC-2017-05-18-pt1-PgH4346-2.txt
finished with file  2230
CREC-2017-05-16-pt1-PgH4213-3.txt
finished with file  2231
CREC-2017-06-13-pt1-PgH4896.txt
finished with file  2231
CREC-2017-05-03-pt1-PgH3325-5.txt
finished with file  2231
CREC-2017-06-07-pt1-PgS3301-6.txt
finished with file  2231
CREC-2017-05-18-pt1-PgS3045.txt
finished with file  2231
CREC-2017-05-25-pt1-PgH4609-21.txt
finished with file  2231
CREC-2017-05-02-pt1-PgS2684.txt
finished with file  2231
CREC-2017-06-28-pt1-PgH5295-4.txt
finished with file  2

CREC-2017-04-06-pt1-PgH2770-4.txt
finished with file  2551
CREC-2017-06-29-pt1-PgH5353-3.txt
finished with file  2553
CREC-2017-04-27-pt1-PgH2910-2.txt
finished with file  2553
CREC-2017-05-19-pt1-PgH4379-4.txt
finished with file  2554
CREC-2017-06-12-pt1-PgH4826.txt
finished with file  2560
CREC-2017-06-12-pt1-PgH4823-7.txt
finished with file  2560
CREC-2017-04-06-pt1-PgH2791-11.txt
finished with file  2560
CREC-2017-07-10-pt1-PgH5389-8.txt
finished with file  2560
CREC-2017-07-27-pt1-PgS4420-6.txt
finished with file  2560
CREC-2017-07-24-pt1-PgH6150-2.txt
finished with file  2560
CREC-2017-07-18-pt1-PgH5968-4.txt
finished with file  2560
CREC-2017-06-05-pt1-PgS3245-2.txt
finished with file  2560
CREC-2017-04-24-pt1-PgS2497.txt
finished with file  2560
CREC-2017-05-25-pt1-PgH4610-16.txt
finished with file  2560
CREC-2017-07-31-pt1-PgS4615-2.txt
finished with file  2561
CREC-2017-07-20-pt1-PgH6146-11.txt
finished with file  2561
CREC-2017-07-27-pt1-PgH6479-6.txt
finished with file  256

CREC-2017-07-24-pt1-PgH6213-13.txt
finished with file  2699
CREC-2017-07-28-pt1-PgH6580-16.txt
finished with file  2699
CREC-2017-06-07-pt1-PgH4690.txt
finished with file  2699
CREC-2017-07-12-pt1-PgS3955-2.txt
finished with file  2700
CREC-2017-05-17-pt1-PgH4231.txt
finished with file  2700
CREC-2017-04-28-pt1-PgH2967-7.txt
finished with file  2700
CREC-2017-07-10-pt1-PgS3876-3.txt
finished with file  2700
CREC-2017-06-12-pt1-PgH4855-8.txt
finished with file  2700
CREC-2017-06-13-pt1-PgH4901-3.txt
finished with file  2700
CREC-2017-07-28-pt1-PgH6578-20.txt
finished with file  2700
CREC-2017-05-18-pt1-PgH4359-7.txt
finished with file  2700
CREC-2017-06-08-pt1-PgS-FrontMatter.txt
finished with file  2700
CREC-2017-04-26-pt1-PgH2851-5.txt
finished with file  2700
CREC-2017-06-22-pt1-PgH5111.txt
finished with file  2700
CREC-2017-07-11-pt1-PgS3931-2.txt
finished with file  2700
CREC-2017-06-15-pt1-PgH4919-4.txt
finished with file  2700
CREC-2017-05-15-pt1-PgS-FrontMatter.txt
finished with

finished with file  2908
CREC-2017-05-17-pt1-PgH4296-6.txt
finished with file  2909
CREC-2017-07-18-pt1-PgH5977-2.txt
finished with file  2909
CREC-2017-04-14-pt1-PgH2805.txt
finished with file  2909
CREC-2017-06-28-pt1-PgH5288-2.txt
finished with file  2910
CREC-2017-06-27-pt1-PgH5219-4.txt
finished with file  2910
CREC-2017-06-07-pt1-PgH4700-2.txt
finished with file  2910
CREC-2017-05-04-pt1-PgH4179-22.txt
finished with file  2910
CREC-2017-05-17-pt1-PgH4233-3.txt
finished with file  2910
CREC-2017-06-20-pt1-PgH4979.txt
finished with file  2912
CREC-2017-07-11-pt1-PgH5435.txt
finished with file  2912
CREC-2017-05-18-pt1-PgH4323-4.txt
finished with file  2912
CREC-2017-07-26-pt1-PgH6466-19.txt
finished with file  2912
CREC-2017-05-16-pt1-PgH4215-6.txt
finished with file  2912
CREC-2017-06-20-pt1-PgH4978-3.txt
finished with file  2912
CREC-2017-06-08-pt1-PgH4712-2.txt
finished with file  2913
CREC-2017-07-11-pt1-PgH5421.txt
finished with file  2914
CREC-2017-05-01-pt1-PgH3003-11.txt
fi

finished with file  3066
CREC-2017-07-17-pt1-PgH5887-4.txt
finished with file  3067
CREC-2017-07-11-pt1-PgH5431.txt
finished with file  3067
CREC-2017-05-04-pt1-PgH4179-26.txt
finished with file  3067
CREC-2017-05-17-pt1-PgH4233-7.txt
finished with file  3068
CREC-2017-06-21-pt1-PgS-FrontMatter.txt
finished with file  3068
CREC-2017-06-12-pt1-PgS3393-2.txt
finished with file  3068
CREC-2017-05-02-pt1-PgS2683.txt
finished with file  3069
CREC-2017-06-28-pt1-PgH5295-3.txt
finished with file  3069
CREC-2017-05-18-pt1-PgS3042.txt
finished with file  3069
CREC-2017-05-25-pt1-PgH4609-26.txt
finished with file  3069
CREC-2017-07-26-pt1-PgH6308.txt
finished with file  3070
CREC-2017-06-20-pt1-PgH4996.txt
finished with file  3070
CREC-2017-04-06-pt1-PgH2792-6.txt
finished with file  3070
CREC-2017-06-29-pt1-PgH5355.txt
finished with file  3073
CREC-2017-05-09-pt1-PgS2821-3.txt
finished with file  3073
CREC-2017-04-26-pt1-PgH2893-2.txt
finished with file  3073
CREC-2017-05-24-pt1-PgH4570-8.txt
f

CREC-2017-07-20-pt1-PgH6137-4.txt
finished with file  3214
CREC-2017-07-11-pt1-PgS3917-5.txt
finished with file  3214
CREC-2017-04-28-pt1-PgH2967-3.txt
finished with file  3214
CREC-2017-06-22-pt1-PgH5101.txt
finished with file  3221
CREC-2017-05-17-pt1-PgH4235.txt
finished with file  3221
CREC-2017-06-07-pt1-PgH4657.txt
finished with file  3222
CREC-2017-04-26-pt1-PgH2897-7.txt
finished with file  3222
CREC-2017-07-13-pt1-PgH5771-5.txt
finished with file  3222
CREC-2017-07-14-pt1-PgH5884-2.txt
finished with file  3222
CREC-2017-06-22-pt1-PgS3731.txt
finished with file  3222
CREC-2017-06-22-pt1-PgS3725.txt
finished with file  3222
CREC-2017-06-08-pt1-PgH4820-6.txt
finished with file  3222
CREC-2017-08-01-pt1-PgS4633-3.txt
finished with file  3222
CREC-2017-07-28-pt1-PgH6578-18.txt
finished with file  3222
CREC-2017-06-05-pt1-PgS3239.txt
finished with file  3223
CREC-2017-05-02-pt1-PgS2659-6.txt
finished with file  3224
CREC-2017-07-18-pt1-PgS4059-4.txt
finished with file  3225
CREC-201

finished with file  3811
CREC-2017-05-11-pt1-PgS2921-3.txt
finished with file  3811
CREC-2017-05-02-pt1-PgH3050.txt
finished with file  3812
CREC-2017-07-24-pt1-PgS4130-2.txt
finished with file  3816
CREC-2017-07-11-pt1-PgH5418.txt
finished with file  3816
CREC-2017-05-01-pt1-PgH3003-3.txt
finished with file  3816
CREC-2017-05-17-pt1-PgH4296-3.txt
finished with file  3817
CREC-2017-04-28-pt1-PgH2972-21.txt
finished with file  3817
CREC-2017-07-20-pt1-PgH6131-2.txt
finished with file  3818
CREC-2017-05-02-pt1-PgH3006-2.txt
finished with file  3818
CREC-2017-06-21-pt1-PgH5045-3.txt
finished with file  3819
CREC-2017-04-06-pt1-PgH2791-15.txt
finished with file  3819
CREC-2017-06-12-pt1-PgH4823-3.txt
finished with file  3819
CREC-2017-04-28-pt1-PgH2961-5.txt
finished with file  3820
CREC-2017-06-15-pt1-PgH4939-5.txt
finished with file  3820
CREC-2017-06-13-pt1-PgH4866-5.txt
finished with file  3820
CREC-2017-06-12-pt1-PgH4836.txt
finished with file  3820
CREC-2017-07-20-pt1-PgH6146-29.txt


CREC-2017-05-25-pt1-PgH4609-25.txt
finished with file  3929
CREC-2017-05-18-pt1-PgS3041.txt
finished with file  3929
CREC-2017-07-27-pt1-PgH6519-15.txt
finished with file  3929
CREC-2017-07-20-pt1-PgH6133-2.txt
finished with file  3929
CREC-2017-05-04-pt1-PgS2781-6.txt
finished with file  3930
CREC-2017-07-06-pt1-PgH5386-9.txt
finished with file  3930
CREC-2017-06-08-pt1-PgS3345.txt
finished with file  3930
CREC-2017-06-21-pt1-PgH5002-3.txt
finished with file  3931
CREC-2017-04-28-pt1-PgH2945-6.txt
finished with file  3931
CREC-2017-06-29-pt1-PgH5377-4.txt
finished with file  3931
CREC-2017-05-30-pt1-PgH4618-8.txt
finished with file  3931
CREC-2017-07-26-pt1-PgH6313-4.txt
finished with file  3932
CREC-2017-07-17-pt1-PgH5902.txt
finished with file  3933
CREC-2017-05-25-pt1-PgH4609-19.txt
finished with file  3933
CREC-2017-06-14-pt1-PgS3486-3.txt
finished with file  3933
CREC-2017-05-25-pt1-PgH4607-2.txt
finished with file  3933
CREC-2017-04-06-pt1-PgH2789-6.txt
finished with file  3933


finished with file  4132
CREC-2017-07-13-pt1-PgS3971-2.txt
finished with file  4166
CREC-2017-05-23-pt1-PgH4427-5.txt
finished with file  4166
CREC-2017-04-24-pt1-PgS2496-3.txt
finished with file  4166
CREC-2017-07-17-pt1-PgH5888.txt
finished with file  4169
CREC-2017-06-23-pt1-PgH5127-5.txt
finished with file  4170
CREC-2017-06-06-pt1-PgS3294.txt
finished with file  4170
CREC-2017-04-06-pt1-PgH2789-7.txt
finished with file  4170
CREC-2017-05-25-pt1-PgH4607-3.txt
finished with file  4170
CREC-2017-06-14-pt1-PgS3486-2.txt
finished with file  4171
CREC-2017-06-21-pt1-PgS3683-4.txt
finished with file  4171
CREC-2017-06-29-pt1-PgS3866-4.txt
finished with file  4171
CREC-2017-07-24-pt1-PgH6149-4.txt
finished with file  4172
CREC-2017-04-25-pt1-PgH2847-9.txt
finished with file  4172
CREC-2017-06-08-pt1-PgH4820-15.txt
finished with file  4172
CREC-2017-07-26-pt1-PgH6313-5.txt
finished with file  4173
CREC-2017-05-30-pt1-PgH4618-9.txt
finished with file  4173
CREC-2017-06-29-pt1-PgH5377-5.txt


CREC-2017-06-29-pt1-PgH5305-3.txt
finished with file  4385
CREC-2017-07-14-pt1-PgH5871-5.txt
finished with file  4385
CREC-2017-06-28-pt1-PgH5263.txt
finished with file  4465
CREC-2017-05-23-pt1-PgH4489.txt
finished with file  4465
CREC-2017-04-27-pt1-PgH2903-2.txt
finished with file  4466
CREC-2017-06-06-pt1-PgH4628-6.txt
finished with file  4466
CREC-2017-05-25-pt1-PgH4610-20.txt
finished with file  4466
CREC-2017-06-12-pt1-PgH4838.txt
finished with file  4489
CREC-2017-06-13-pt1-PgS3452-3.txt
finished with file  4489
CREC-2017-07-20-pt1-PgH6146-27.txt
finished with file  4489
CREC-2017-07-13-pt1-PgH5783.txt
finished with file  4489
CREC-2017-08-02-pt1-PgS4718-3.txt
finished with file  4490
CREC-2017-04-25-pt1-PgS-FrontMatter.txt
finished with file  4490
CREC-2017-07-10-pt1-PgH5389-2.txt
finished with file  4490
CREC-2017-06-28-pt1-PgH5242-2.txt
finished with file  4491
CREC-2017-05-04-pt1-PgH4181.txt
finished with file  4491
CREC-2017-05-03-pt1-PgH3076-3.txt
finished with file  4491

CREC-2017-08-01-pt1-PgS4694.txt
finished with file  4658
CREC-2017-06-12-pt1-PgH4855-2.txt
finished with file  4658
CREC-2017-07-11-pt1-PgH5421-4.txt
finished with file  4658
CREC-2017-07-25-pt1-PgH6222.txt
finished with file  4658
CREC-2017-06-12-pt1-PgS3411-3.txt
finished with file  4658
CREC-2017-07-20-pt1-PgH6045-4.txt
finished with file  4658
CREC-2017-05-04-pt1-PgS-FrontMatter.txt
finished with file  4658
CREC-2017-08-01-pt1-PgH6585-7.txt
finished with file  4658
CREC-2017-06-13-pt1-PgH4912-2.txt
finished with file  4658
CREC-2017-08-01-pt1-PgS4681.txt
finished with file  4659
CREC-2017-08-01-pt1-PgH6585-6.txt
finished with file  4659
CREC-2017-07-20-pt1-PgH6045-5.txt
finished with file  4659
CREC-2017-06-07-pt1-PgH4664.txt
finished with file  4703
CREC-2017-07-25-pt1-PgH6223.txt
finished with file  4703
CREC-2017-06-12-pt1-PgS3411-2.txt
finished with file  4703
CREC-2017-07-20-pt1-PgH6147-2.txt
finished with file  4703
CREC-2017-07-18-pt1-PgH5925.txt
finished with file  4703
CRE

CREC-2017-05-04-pt1-PgH4179-28.txt
finished with file  4794
CREC-2017-05-01-pt1-PgS2649-4.txt
finished with file  4795
CREC-2017-06-20-pt1-PgS3615-4.txt
finished with file  4795
CREC-2017-05-19-pt1-PgH4390-21.txt
finished with file  4795
CREC-2017-07-11-pt1-PgH5396-2.txt
finished with file  4795
CREC-2017-07-26-pt1-PgH6466-13.txt
finished with file  4795
CREC-2017-05-16-pt1-PgH4220-5.txt
finished with file  4795
CREC-2017-06-06-pt1-PgS3253-2.txt
finished with file  4795
CREC-2017-05-26-pt1-PgH4615-15.txt
finished with file  4795
CREC-2017-05-02-pt1-PgS2672.txt
finished with file  4799
CREC-2017-06-06-pt1-PgH4647-9.txt
finished with file  4799
CREC-2017-05-22-pt1-PgS3060.txt
finished with file  4799
CREC-2017-05-04-pt1-PgH4179-14.txt
finished with file  4799
CREC-2017-05-22-pt1-PgS3070-2.txt
finished with file  4799
CREC-2017-07-24-pt1-PgS4123-7.txt
finished with file  4799
CREC-2017-07-11-pt1-PgH5403.txt
finished with file  4802
CREC-2017-06-23-pt1-PgH5114-6.txt
finished with file  482

CREC-2017-06-27-pt1-PgS3797-5.txt
finished with file  4984
CREC-2017-06-07-pt1-PgH4689-2.txt
finished with file  4984
CREC-2017-07-25-pt1-PgS4224-5.txt
finished with file  4984
CREC-2017-06-13-pt1-PgS3413-4.txt
finished with file  4984
CREC-2017-05-26-pt1-PgH4613-4.txt
finished with file  4984
CREC-2017-04-27-pt1-PgH2942-5.txt
finished with file  4984
CREC-2017-06-08-pt1-PgS3358-2.txt
finished with file  4984
CREC-2017-06-22-pt1-PgS3697-3.txt
finished with file  4984
CREC-2017-04-06-pt1-PgH2767-3.txt
finished with file  4985
CREC-2017-04-27-pt1-PgH2907-5.txt
finished with file  4986
CREC-2017-07-11-pt1-PgS3920-3.txt
finished with file  4986
CREC-2017-06-29-pt1-PgH5378-23.txt
finished with file  4986
CREC-2017-08-01-pt1-PgS4687-3.txt
finished with file  4988
CREC-2017-06-13-pt1-PgH4857-5.txt
finished with file  4989
CREC-2017-07-28-pt1-PgH6571.txt
finished with file  4989
CREC-2017-05-18-pt1-PgS-FrontMatter.txt
finished with file  4989
CREC-2017-05-24-pt1-PgH4569-5.txt
finished with fil

finished with file  5171
CREC-2017-06-28-pt1-PgH5240-2.txt
finished with file  5171
CREC-2017-07-10-pt1-PgS3876.txt
finished with file  5177
CREC-2017-07-26-pt1-PgH6463.txt
finished with file  5178
CREC-2017-06-08-pt1-PgS3363.txt
finished with file  5178
CREC-2017-07-26-pt1-PgH6305.txt
finished with file  5179
CREC-2017-05-24-pt1-PgH4570-5.txt
finished with file  5179
CREC-2017-04-25-pt1-PgH2819-5.txt
finished with file  5180
CREC-2017-06-29-pt1-PgH5358.txt
finished with file  5180
CREC-2017-06-23-pt1-PgH5140.txt
finished with file  5180
CREC-2017-05-16-pt1-PgS2954-2.txt
finished with file  5182
CREC-2017-07-26-pt1-PgH6311.txt
finished with file  5182
CREC-2017-05-25-pt1-PgH4607-12.txt
finished with file  5182
CREC-2017-05-08-pt1-PgH4183-7.txt
finished with file  5182
CREC-2017-07-27-pt1-PgH6519-27.txt
finished with file  5182
CREC-2017-08-02-pt1-PgS4758.txt
finished with file  5183
CREC-2017-06-21-pt1-PgH5049-2.txt
finished with file  5183
CREC-2017-06-15-pt1-PgH4935-4.txt
finished wi

CREC-2017-06-20-pt1-PgH4994-6.txt
finished with file  5372
CREC-2017-06-20-pt1-PgS3615-3.txt
finished with file  5372
CREC-2017-05-01-pt1-PgS2649-3.txt
finished with file  5373
CREC-2017-06-20-pt1-PgH4974.txt
finished with file  5374
CREC-2017-06-20-pt1-PgS3650-3.txt
finished with file  5374
CREC-2017-06-07-pt1-PgH4654-2.txt
finished with file  5376
CREC-2017-07-11-pt1-PgH5396-5.txt
finished with file  5376
CREC-2017-07-26-pt1-PgH6466-14.txt
finished with file  5376
CREC-2017-07-25-pt1-PgS4186-3.txt
finished with file  5377
CREC-2017-06-06-pt1-PgS3253-5.txt
finished with file  5377
CREC-2017-05-26-pt1-PgH4615-12.txt
finished with file  5377
CREC-2017-05-16-pt1-PgH4220-2.txt
finished with file  5377
CREC-2017-05-02-pt1-PgH3064.txt
finished with file  5378
CREC-2017-06-21-pt1-PgH5049-6.txt
finished with file  5378
CREC-2017-05-04-pt1-PgH4179-13.txt
finished with file  5378
CREC-2017-07-11-pt1-PgH5404.txt
finished with file  5381
CREC-2017-05-01-pt1-PgH3003-20.txt
finished with file  5381

finished with file  5503
CREC-2017-06-20-pt1-PgS3650.txt
finished with file  5503
CREC-2017-06-20-pt1-PgS3644.txt
finished with file  5503
CREC-2017-05-25-pt1-PgH4574-2.txt
finished with file  5504
CREC-2017-07-25-pt1-PgH6219.txt
finished with file  5505
CREC-2017-06-07-pt1-PgH4676.txt
finished with file  5520
CREC-2017-07-20-pt1-PgH6147-4.txt
finished with file  5520
CREC-2017-07-18-pt1-PgH5923.txt
finished with file  5520
CREC-2017-05-24-pt1-PgH4570-17.txt
finished with file  5520
CREC-2017-06-22-pt1-PgH5108.txt
finished with file  5521
CREC-2017-06-21-pt1-PgH5033-5.txt
finished with file  5522
CREC-2017-06-12-pt1-PgH4855-5.txt
finished with file  5522
CREC-2017-05-01-pt1-PgS2650-3.txt
finished with file  5522
CREC-2017-07-11-pt1-PgH5421-3.txt
finished with file  5523
CREC-2017-08-01-pt1-PgS4687.txt
finished with file  5525
CREC-2017-06-29-pt1-PgH5303-2.txt
finished with file  5526
CREC-2017-06-12-pt1-PgH4836-6.txt
finished with file  5527
CREC-2017-07-20-pt1-PgH6045-3.txt
finished w

CREC-2017-07-26-pt1-PgH6466-15.txt
finished with file  5691
CREC-2017-07-11-pt1-PgH5396-4.txt
finished with file  5691
CREC-2017-06-20-pt1-PgH4975.txt
finished with file  5695
CREC-2017-06-20-pt1-PgS3650-2.txt
finished with file  5695
CREC-2017-05-08-pt1-PgS2798-3.txt
finished with file  5696
CREC-2017-07-27-pt1-PgS4415-2.txt
finished with file  5698
CREC-2017-06-20-pt1-PgH4994-7.txt
finished with file  5698
CREC-2017-06-05-pt1-PgS3235-6.txt
finished with file  5698
CREC-2017-05-18-pt1-PgS3040-4.txt
finished with file  5698
CREC-2017-05-16-pt1-PgS2970-6.txt
finished with file  5699
CREC-2017-05-11-pt1-PgH4190-16.txt
finished with file  5699
CREC-2017-04-28-pt1-PgH2972-14.txt
finished with file  5699
CREC-2017-06-22-pt1-PgH5054.txt
finished with file  5699
CREC-2017-05-18-pt1-PgS3023-7.txt
finished with file  5700
CREC-2017-06-28-pt1-PgS3829-3.txt
finished with file  5700
CREC-2017-04-06-pt1-PgH2791-20.txt
finished with file  5700
CREC-2017-07-20-pt1-PgS4118-2.txt
finished with file  57

finished with file  5879
CREC-2017-07-25-pt1-PgH6280-3.txt
finished with file  5880
CREC-2017-05-17-pt1-PgH4245-2.txt
finished with file  5889
CREC-2017-04-28-pt1-PgS2636.txt
finished with file  5889
CREC-2017-06-07-pt1-PgS3327-8.txt
finished with file  5889
CREC-2017-05-18-pt1-PgH4355-5.txt
finished with file  5889
CREC-2017-05-01-pt1-PgH2991-5.txt
finished with file  5890
CREC-2017-06-08-pt1-PgH4820-21.txt
finished with file  5890
CREC-2017-05-23-pt1-PgS3096-3.txt
finished with file  5890
CREC-2017-06-12-pt1-PgH4855-10.txt
finished with file  5890
CREC-2017-05-03-pt1-PgH3325-8.txt
finished with file  5890
CREC-2017-06-15-pt1-PgH4933-5.txt
finished with file  5891
CREC-2017-07-11-pt1-PgS-FrontMatter.txt
finished with file  5891
CREC-2017-05-04-pt1-PgH4113-3.txt
finished with file  5891
CREC-2017-05-23-pt1-PgS3076-6.txt
finished with file  5902
CREC-2017-05-22-pt1-PgH4402.txt
finished with file  5914
CREC-2017-05-23-pt1-PgH4503-9.txt
finished with file  5914
CREC-2017-05-24-pt1-PgH4508

finished with file  6027
CREC-2017-05-18-pt1-PgH4349.txt
finished with file  6032
CREC-2017-06-13-pt1-PgH4865.txt
finished with file  6032
CREC-2017-06-29-pt1-PgH5378-18.txt
finished with file  6032
CREC-2017-06-27-pt1-PgS3797-2.txt
finished with file  6033
CREC-2017-06-07-pt1-PgH4689-5.txt
finished with file  6033
CREC-2017-05-25-pt1-PgH4609-5.txt
finished with file  6033
CREC-2017-07-20-pt1-PgH6148.txt
finished with file  6033
CREC-2017-05-03-pt1-PgH3324-17.txt
finished with file  6033
CREC-2017-04-27-pt1-PgH2925.txt
finished with file  6034
CREC-2017-07-25-pt1-PgS4224-2.txt
finished with file  6035
CREC-2017-04-27-pt1-PgH2942-2.txt
finished with file  6035
CREC-2017-07-28-pt1-PgH6562.txt
finished with file  6039
CREC-2017-05-02-pt1-PgH3011-6.txt
finished with file  6040
CREC-2017-05-26-pt1-PgH4613-3.txt
finished with file  6040
CREC-2017-06-13-pt1-PgS3413-3.txt
finished with file  6040
CREC-2017-06-29-pt1-PgH5378-30.txt
finished with file  6040
CREC-2017-06-13-pt1-PgH4859.txt
finish

CREC-2017-05-03-pt1-PgS2715.txt
finished with file  6156
CREC-2017-06-15-pt1-PgS3552-3.txt
finished with file  6157
CREC-2017-04-27-pt1-PgH2919-2.txt
finished with file  6157
CREC-2017-05-23-pt1-PgH4429.txt
finished with file  6158
CREC-2017-05-25-pt1-PgS3210.txt
finished with file  6158
CREC-2017-07-11-pt1-PgH5393-4.txt
finished with file  6160
CREC-2017-06-08-pt1-PgS3360-6.txt
finished with file  6160
CREC-2017-05-25-pt1-PgS3204.txt
finished with file  6161
CREC-2017-06-20-pt1-PgS3655-2.txt
finished with file  6161
CREC-2017-06-07-pt1-PgH4651-3.txt
finished with file  6161
CREC-2017-05-24-pt1-PgS3103.txt
finished with file  6161
CREC-2017-07-28-pt1-PgH6580-4.txt
finished with file  6161
CREC-2017-07-11-pt1-PgS3912.txt
finished with file  6162
CREC-2017-05-24-pt1-PgS3111-2.txt
finished with file  6176
CREC-2017-04-08-pt1-PgH2796-2.txt
finished with file  6176
CREC-2017-06-20-pt1-PgH4993-5.txt
finished with file  6176
CREC-2017-07-13-pt1-PgH5770-8.txt
finished with file  6176
CREC-2017

finished with file  6416
CREC-2017-05-23-pt1-PgH4489-5.txt
finished with file  6416
CREC-2017-04-27-pt1-PgH2942-32.txt
finished with file  6416
CREC-2017-05-25-pt1-PgH4593-5.txt
finished with file  6416
CREC-2017-06-27-pt1-PgH5232-2.txt
finished with file  6416
CREC-2017-07-14-pt1-PgH5870-6.txt
finished with file  6416
CREC-2017-07-25-pt1-PgH6241.txt
finished with file  6444
CREC-2017-04-27-pt1-PgH2942-26.txt
finished with file  6444
CREC-2017-07-27-pt1-PgH6518-12.txt
finished with file  6444
CREC-2017-04-26-pt1-PgH2897-13.txt
finished with file  6444
CREC-2017-07-14-pt1-PgH5835-6.txt
finished with file  6444
CREC-2017-07-10-pt1-PgH5389.txt
finished with file  6444
CREC-2017-07-12-pt1-PgS-FrontMatter.txt
finished with file  6444
CREC-2017-05-02-pt1-PgH3051-5.txt
finished with file  6445
CREC-2017-04-28-pt1-PgH2973-2.txt
finished with file  6445
CREC-2017-07-27-pt1-PgH6518-13.txt
finished with file  6445
CREC-2017-04-27-pt1-PgH2942-27.txt
finished with file  6445
CREC-2017-07-26-pt1-PgH

finished with file  6594
CREC-2017-05-17-pt1-PgH4303-2.txt
finished with file  6599
CREC-2017-05-03-pt1-PgH3307.txt
finished with file  6599
CREC-2017-04-12-pt1-PgH2802-2.txt
finished with file  6599
CREC-2017-06-20-pt1-PgS3634-2.txt
finished with file  6606
CREC-2017-06-13-pt1-PgH4913-10.txt
finished with file  6606
CREC-2017-04-26-pt1-PgS2539-4.txt
finished with file  6606
CREC-2017-06-28-pt1-PgS3828.txt
finished with file  6606
CREC-2017-07-18-pt1-PgH5978-18.txt
finished with file  6606
CREC-2017-04-25-pt1-PgH2847-21.txt
finished with file  6606
CREC-2017-06-22-pt1-PgH5054-2.txt
finished with file  6606
CREC-2017-06-23-pt1-PgH5113-3.txt
finished with file  6606
CREC-2017-05-22-pt1-PgS3051-4.txt
finished with file  6606
CREC-2017-06-27-pt1-PgH5192-2.txt
finished with file  6607
CREC-2017-05-16-pt1-PgS2968-2.txt
finished with file  6607
CREC-2017-04-06-pt1-PgH2769.txt
finished with file  6607
CREC-2017-06-29-pt1-PgH5377-13.txt
finished with file  6607
CREC-2017-04-14-pt1-PgH2805-2.txt

finished with file  6766
CREC-2017-07-17-pt1-PgH5921-9.txt
finished with file  6766
CREC-2017-06-15-pt1-PgH4938-24.txt
finished with file  6766
CREC-2017-05-25-pt1-PgS3159.txt
finished with file  6766
CREC-2017-07-25-pt1-PgS4187-5.txt
finished with file  6766
CREC-2017-05-15-pt1-PgS2932.txt
finished with file  6766
CREC-2017-05-10-pt1-PgS2876.txt
finished with file  6766
CREC-2017-06-15-pt1-PgH4934-6.txt
finished with file  6767
CREC-2017-05-19-pt1-PgH4374-3.txt
finished with file  6767
CREC-2017-06-29-pt1-PgH5378-7.txt
finished with file  6767
CREC-2017-05-02-pt1-PgH3068-2.txt
finished with file  6767
CREC-2017-07-13-pt1-PgH5832-3.txt
finished with file  6767
CREC-2017-06-13-pt1-PgS3452.txt
finished with file  6767
CREC-2017-06-15-pt1-PgH4938-18.txt
finished with file  6767
CREC-2017-06-07-pt1-PgH4701-9.txt
finished with file  6767
CREC-2017-07-17-pt1-PgS4010.txt
finished with file  6767
CREC-2017-05-25-pt1-PgS3209-6.txt
finished with file  6767
CREC-2017-06-22-pt1-PgS3737-4.txt
finis

CREC-2017-06-08-pt1-PgS3366-2.txt
finished with file  6899
CREC-2017-05-03-pt1-PgH3072.txt
finished with file  6900
CREC-2017-06-22-pt1-PgH5096-3.txt
finished with file  6900
CREC-2017-07-24-pt1-PgH6187.txt
finished with file  6908
CREC-2017-05-16-pt1-PgH4223-7.txt
finished with file  6908
CREC-2017-06-15-pt1-PgS3554-7.txt
finished with file  6908
CREC-2017-05-02-pt1-PgH3009-2.txt
finished with file  6908
CREC-2017-07-25-pt1-PgH6280.txt
finished with file  6909
CREC-2017-06-29-pt1-PgS3866.txt
finished with file  6909
CREC-2017-06-06-pt1-PgH4647-10.txt
finished with file  6909
CREC-2017-05-16-pt1-PgS2962.txt
finished with file  6910
CREC-2017-07-25-pt1-PgS4165-3.txt
finished with file  6910
CREC-2017-06-15-pt1-PgS3552.txt
finished with file  6911
CREC-2017-05-22-pt1-PgH4417-2.txt
finished with file  6912
CREC-2017-06-27-pt1-PgS3801-3.txt
finished with file  6912
CREC-2017-07-28-pt1-PgH6523-5.txt
finished with file  6913
CREC-2017-07-27-pt1-PgH6476-2.txt
finished with file  6913
CREC-201

finished with file  7031
CREC-2017-04-07-pt1-PgS2435-3.txt
finished with file  7031
CREC-2017-07-13-pt1-PgS4003-3.txt
finished with file  7031
CREC-2017-05-17-pt1-PgH4312-9.txt
finished with file  7031
CREC-2017-07-26-pt1-PgS4297.txt
finished with file  7032
CREC-2017-07-28-pt1-PgH6580-6.txt
finished with file  7032
CREC-2017-05-03-pt1-PgS-FrontMatter.txt
finished with file  7032
CREC-2017-05-25-pt1-PgS3206.txt
finished with file  7033
CREC-2017-06-27-pt1-PgH5189.txt
finished with file  7033
CREC-2017-06-08-pt1-PgS3360-4.txt
finished with file  7033
CREC-2017-05-16-pt1-PgH4203-2.txt
finished with file  7033
CREC-2017-07-28-pt1-PgH6525-3.txt
finished with file  7033
CREC-2017-06-27-pt1-PgH5190-3.txt
finished with file  7034
CREC-2017-06-22-pt1-PgH5111-4.txt
finished with file  7034
CREC-2017-07-28-pt1-PgH6560-3.txt
finished with file  7034
CREC-2017-04-28-pt1-PgS2635-6.txt
finished with file  7034
CREC-2017-05-24-pt1-PgS3129.txt
finished with file  7034
CREC-2017-05-18-pt1-PgS3039-2.txt

finished with file  7157
CREC-2017-05-23-pt1-PgS3075.txt
finished with file  7157
CREC-2017-06-06-pt1-PgS3296-2.txt
finished with file  7157
CREC-2017-07-27-pt1-PgH6476-6.txt
finished with file  7157
CREC-2017-07-31-pt1-PgS4609-8.txt
finished with file  7157
CREC-2017-05-22-pt1-PgS3073-4.txt
finished with file  7158
CREC-2017-05-22-pt1-PgH4417-6.txt
finished with file  7159
CREC-2017-06-15-pt1-PgS3556.txt
finished with file  7161
CREC-2017-06-21-pt1-PgS3695-2.txt
finished with file  7161
CREC-2017-05-16-pt1-PgS2966.txt
finished with file  7162
CREC-2017-06-06-pt1-PgH4647-14.txt
finished with file  7162
CREC-2017-06-28-pt1-PgS3835-4.txt
finished with file  7162
CREC-2017-07-28-pt1-PgH6540-2.txt
finished with file  7166
CREC-2017-05-26-pt1-PgH-FrontMatter.txt
finished with file  7166
CREC-2017-07-27-pt1-PgH6518-14.txt
finished with file  7166
CREC-2017-04-27-pt1-PgH2942-20.txt
finished with file  7166
CREC-2017-04-26-pt1-PgH2897-15.txt
finished with file  7166
CREC-2017-05-02-pt1-PgH3051

CREC-2017-04-06-pt1-PgH2792-12.txt
finished with file  7356
CREC-2017-06-29-pt1-PgH5316.txt
finished with file  7421
CREC-2017-07-28-pt1-PgH6579-32.txt
finished with file  7421
CREC-2017-07-13-pt1-PgH5832-11.txt
finished with file  7421
CREC-2017-05-03-pt1-PgH3073-3.txt
finished with file  7422
CREC-2017-06-26-pt1-PgS3750-2.txt
finished with file  7423
CREC-2017-08-02-pt1-PgS4758-3.txt
finished with file  7425
CREC-2017-06-08-pt1-PgH4819-10.txt
finished with file  7425
CREC-2017-04-25-pt1-PgS2518-2.txt
finished with file  7426
CREC-2017-04-07-pt1-PgS2449-2.txt
finished with file  7426
CREC-2017-06-23-pt1-PgH5113-5.txt
finished with file  7426
CREC-2017-06-21-pt1-PgS3681.txt
finished with file  7427
CREC-2017-06-27-pt1-PgH5192-4.txt
finished with file  7427
CREC-2017-05-22-pt1-PgS3051-2.txt
finished with file  7427
CREC-2017-05-16-pt1-PgS2968-4.txt
finished with file  7427
CREC-2017-06-08-pt1-PgH4818.txt
finished with file  7427
CREC-2017-07-27-pt1-PgH6513-7.txt
finished with file  7427

CREC-2017-06-28-pt1-PgS3828-2.txt
finished with file  7820
CREC-2017-04-27-pt1-PgS2565-7.txt
finished with file  7820
CREC-2017-05-17-pt1-PgH4311-10.txt
finished with file  7820
CREC-2017-07-27-pt1-PgH6476.txt
finished with file  7820
CREC-2017-04-06-pt1-PgH2782-7.txt
finished with file  7820
CREC-2017-06-16-pt1-PgH4941-8.txt
finished with file  7820
CREC-2017-06-05-pt1-PgS3245.txt
finished with file  7820
CREC-2017-06-07-pt1-PgS3339.txt
finished with file  7820
CREC-2017-04-26-pt1-PgH2856.txt
finished with file  7820
CREC-2017-05-25-pt1-PgH4573-5.txt
finished with file  7820
CREC-2017-05-08-pt1-PgS2799-2.txt
finished with file  7820
CREC-2017-05-17-pt1-PgH4275.txt
finished with file  7824
CREC-2017-07-25-pt1-PgH6278.txt
finished with file  7826
CREC-2017-08-02-pt1-PgS4698-4.txt
finished with file  7827
CREC-2017-05-25-pt1-PgH4608-33.txt
finished with file  7827
CREC-2017-06-28-pt1-PgH5296-8.txt
finished with file  7827
CREC-2017-06-07-pt1-PgS3338.txt
finished with file  7827
CREC-2017

finished with file  7966
CREC-2017-04-25-pt1-PgH2847-24.txt
finished with file  7966
CREC-2017-07-18-pt1-PgH5926-2.txt
finished with file  7966
CREC-2017-08-02-pt1-PgS4715.txt
finished with file  7966
CREC-2017-07-18-pt1-PgH5924-10.txt
finished with file  7967
CREC-2017-04-06-pt1-PgH2778.txt
finished with file  7969
CREC-2017-07-27-pt1-PgH6513-4.txt
finished with file  7969
CREC-2017-07-25-pt1-PgH6224-4.txt
finished with file  7969
CREC-2017-04-28-pt1-PgS2637-2.txt
finished with file  7970
CREC-2017-06-21-pt1-PgS3682.txt
finished with file  7971
CREC-2017-06-23-pt1-PgH5113-6.txt
finished with file  7972
CREC-2017-04-24-pt1-PgS2484-3.txt
finished with file  7972
CREC-2017-06-27-pt1-PgH5192-7.txt
finished with file  7972
CREC-2017-07-12-pt1-PgS3963-4.txt
finished with file  7973
CREC-2017-04-20-pt1-PgH2812-8.txt
finished with file  7973
CREC-2017-06-08-pt1-PgH4706-2.txt
finished with file  7974
CREC-2017-04-25-pt1-PgH2847-18.txt
finished with file  7974
CREC-2017-05-02-pt1-PgH3011.txt
fi

used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used

used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used exist

speaker matched successfully, used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
speaker matched successfully, used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
speaker matched successfully, used existing row
used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing ro

used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
no first_name
<congmember chamber="H" congress="115" role="VOTED YES">
<name type="parsed">Lujan Grisham, M.</name>
</congmember>
speaker matched successfully, used existing row
used existing row
used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existin

used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used exist

used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
speaker matched successfully, used existing row
used existing row
speaker matched successfully, used existing row
used existing row
speaker matched succes

used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
speaker matched successfully, used

used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
used existing row
used existing ro

used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
no first_name
<congmember chamber="H" congress="115" role="VOTED YES">
<name type="parsed">Lujan Grisham, M.</name>
</congmember>
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
no first_name
<congmember chamber="H" congress="115" role="VOTED YES">
<name type="parsed">Lujan Grisham, M.</name>
</congmember>
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing 

used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing ro

no first_name
<congmember chamber="H" congress="115" role="VOTED YES">
<name type="parsed">Lujan Grisham, M.</name>
</congmember>
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
no first_name
<congmember chamber="H" congress="115" role="VOTED YES">
<name type="parsed">Lujan Grisham, M.</name>
</congmember>
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing 

used existing row
speaker matched successfully, used existing row
used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker ma

speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing ro

speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing ro

used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
used existing row
used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
speaker matched successfully, used existing row
speaker ma

used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used exist

speaker matched successfully, used existing row
used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
used existing row
speaker matched successfully, used existing row
used existing row
speaker matched successfully, used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used

used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used

used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
used existing row
used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing ro

used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
speaker ma

used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
no first_name
<congmember chamber="H" congress="115" role="VOTED YES">
<name type="parsed">Lujan Grisham, M.</name>
</congmember>
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
no first_name
<congmember chamber="H" congress="115" role="VOTED YES">
<name type="parsed">Lujan Grisham, M.</name>
</congmember>
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used existing row
us

used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
speaker matched successfully, used existing row
speaker ma

In [92]:
next_index

8074

In [2]:
path = Path('/Users/halliday/projects/searchlight/parsing/aws_results')
speakers = pd.read_csv(path/'updatedspeakers.csv')
speakers = speakers.drop(0)

In [3]:
speakers = speakers.drop(1)

In [9]:
speakers.to_csv('allspeakers.csv', index=False)

In [5]:
allspeakers = pd.read_csv('allspeakers.csv')

In [8]:
allspeakers

Unnamed: 0,speaker_id,first_name,last_name,chamber,type,party,state,district,bio_guide_id,congress_id
0,2.0,Neil,ABERCROMBIE,HOUSE,REPRESENTATIVE,D,HI,1.0,A000014,107.0
1,1269.0,Spencer,ABRAHAM,SENATE,SENATOR,R,MI,,A000355,106.0
2,2244.0,Ralph,ABRAHAM,HOUSE,REPRESENTATIVE,R,LA,5.0,A000374,114.0
3,4.0,Gary,ACKERMAN,HOUSE,REPRESENTATIVE,D,NY,5.0,A000022,107.0
4,2201.0,Alma,ADAMS,HOUSE,REPRESENTATIVE,D,NC,12.0,A000370,114.0
5,2006.0,Sandy,ADAMS,HOUSE,REPRESENTATIVE,R,FL,24.0,A000366,112.0
6,1460.0,Robert,ADERHOLT,HOUSE,REPRESENTATIVE,R,AL,4.0,A000055,113.0
7,1935.0,John,ADLER,HOUSE,REPRESENTATIVE,D,NJ,3.0,A000364,111.0
8,2229.0,Pete,AGUILAR,HOUSE,REPRESENTATIVE,D,CA,31.0,A000371,114.0
9,7.0,Daniel,AKAKA,SENATE,SENATOR,D,HI,,A000069,107.0


In [10]:
def concat_frames(Path, file_names):
    count = 0
    frame_dict = {}
    for file in file_names:
        frame_dict[count] = pd.read_csv(Path/file)
        count += 1
    frames = list(frame_dict.values())
    return pd.concat(frames)

In [11]:
path = Path('/Users/halliday/projects/searchlight/parsing/aws_results/parsing_results')
file_names = ["speeches_" + str(i) + ".csv" for i in range(1, 144)]
bill_file_names = ["bills_" + str(i) + ".csv" for i in range(1,144)]
concat_speeches = concat_frames(path, file_names)
concat_bills = concat_frames(path, bill_file_names)

In [12]:
concat_speeches

Unnamed: 0,speech_id,last_name,speaker_id,proceeding_id,topic_id,word_count,speech_text,file_name,mods_file,proceeding_title,year,month,day
0,0,FORD,1334.0,proceeding_id,topic_id,127,"Mr.President, I wish to announce that the Comm...",CREC-1994-03-08-pt1-PgS46.txt,CREC-1994-03-08-pt1-PgS46.xml,,1994,3,8
1,1,INOUYE,1369.0,proceeding_id,topic_id,76,"Mr.President, I would like to announce that th...",CREC-1994-03-08-pt1-PgS46.txt,CREC-1994-03-08-pt1-PgS46.xml,,1994,3,8
2,2,GOSS,450.0,proceeding_id,topic_id,153,"Mr.Speaker, the White House and certain high-d...",CREC-1994-02-23-pt1-PgH28.txt,CREC-1994-02-23-pt1-PgH28.xml,,1994,2,23
3,3,HATCH,1351.0,proceeding_id,topic_id,4,"Thank you, Mr. _______...",CREC-1994-03-09-pt1-PgS7.txt,CREC-1994-03-09-pt1-PgS7.xml,,1994,3,9
4,4,RICHARDSON,1883.0,proceeding_id,topic_id,11,"Mr.Speaker, I rise today to ask 81 Republicans...",CREC-1994-03-16-pt1-PgH10.txt,CREC-1994-03-16-pt1-PgH10.xml,,1994,3,16
5,5,LEAHY,1383.0,proceeding_id,topic_id,1361,"Mr.President, I want to speak about the former...",CREC-1994-02-24-pt1-PgS10.txt,CREC-1994-02-24-pt1-PgS10.xml,,1994,2,24
6,6,JOHNSTON,606.0,proceeding_id,topic_id,25,"Mr.President, what is the pending business? T...",CREC-1994-02-24-pt1-PgS10.txt,CREC-1994-02-24-pt1-PgS10.xml,,1994,2,24
7,7,JOHNSTON,606.0,proceeding_id,topic_id,40,"Mr.President, would it be in order to speak on...",CREC-1994-02-24-pt1-PgS10.txt,CREC-1994-02-24-pt1-PgS10.xml,,1994,2,24
8,8,MOAKLEY,820.0,proceeding_id,topic_id,63,"Mr.Speaker, I ask unanimous consent that the C...",CREC-1994-03-09-pt1-PgH30.txt,CREC-1994-03-09-pt1-PgH30.xml,,1994,3,9
9,9,MOYNIHAN,1407.0,proceeding_id,topic_id,37,"Mr.President, I rise to announce that during t...",CREC-1994-02-22-pt1-PgS46.txt,CREC-1994-02-22-pt1-PgS46.xml,,1994,2,22


In [13]:
concat_bills

Unnamed: 0,mods_file,congress_id,context,bill_number,bill_type
0,CREC-1994-01-25-pt1-PgH12.xml,103,OTHER,325,HRES
1,CREC-1994-01-25-pt1-PgH12.xml,103,HEADERLINE,325,HRES
2,CREC-1994-01-25-pt1-PgH14.xml,103,OTHER,326,HRES
3,CREC-1994-01-25-pt1-PgH14.xml,103,HEADERLINE,326,HRES
4,CREC-1994-01-25-pt1-PgH15.xml,103,HEADERLINE,327,HRES
5,CREC-1994-01-25-pt1-PgH15.xml,103,OTHER,327,HRES
6,CREC-1994-01-25-pt1-PgH16.xml,103,OTHER,329,HRES
7,CREC-1994-01-25-pt1-PgH16.xml,103,OTHER,328,HRES
8,CREC-1994-01-25-pt1-PgH16.xml,103,HEADERLINE,328,HRES
9,CREC-1994-01-25-pt1-PgH21.xml,103,HEADERLINE,197,HCONRES


In [15]:
concat_speeches.to_csv('concat_speeches.csv', index=False)
# concat_bills.to_csv('allbills.csv', index=False)

In [17]:
concat_speeches_index = pd.read_csv('concat_speeches.csv')
concat_speeches_index = concat_speeches_index.drop(['Unnamed: 0', "speech_id"], axis=1)

In [19]:
concat_speeches_index['speech_id'] = np.arange(1140685)

In [24]:
# concat_speeches_index.to_csv('allspeecheserrors.csv', index=False)
concat_speeches_index = concat_speeches_index[["speech_id", "topic_id", "word_count", "speech_text", 
                                              "file_name", "mods_file", "speaker_id", "proceeding_title",
                                              "year", "month", "day"]]
concat_speeches_index

Unnamed: 0,speech_id,topic_id,word_count,speech_text,file_name,mods_file,speaker_id,proceeding_title,year,month,day
0,0,topic_id,127,"Mr.President, I wish to announce that the Comm...",CREC-1994-03-08-pt1-PgS46.txt,CREC-1994-03-08-pt1-PgS46.xml,1334.0,,1994,3,8
1,1,topic_id,76,"Mr.President, I would like to announce that th...",CREC-1994-03-08-pt1-PgS46.txt,CREC-1994-03-08-pt1-PgS46.xml,1369.0,,1994,3,8
2,2,topic_id,153,"Mr.Speaker, the White House and certain high-d...",CREC-1994-02-23-pt1-PgH28.txt,CREC-1994-02-23-pt1-PgH28.xml,450.0,,1994,2,23
3,3,topic_id,4,"Thank you, Mr. _______...",CREC-1994-03-09-pt1-PgS7.txt,CREC-1994-03-09-pt1-PgS7.xml,1351.0,,1994,3,9
4,4,topic_id,11,"Mr.Speaker, I rise today to ask 81 Republicans...",CREC-1994-03-16-pt1-PgH10.txt,CREC-1994-03-16-pt1-PgH10.xml,1883.0,,1994,3,16
5,5,topic_id,1361,"Mr.President, I want to speak about the former...",CREC-1994-02-24-pt1-PgS10.txt,CREC-1994-02-24-pt1-PgS10.xml,1383.0,,1994,2,24
6,6,topic_id,25,"Mr.President, what is the pending business? T...",CREC-1994-02-24-pt1-PgS10.txt,CREC-1994-02-24-pt1-PgS10.xml,606.0,,1994,2,24
7,7,topic_id,40,"Mr.President, would it be in order to speak on...",CREC-1994-02-24-pt1-PgS10.txt,CREC-1994-02-24-pt1-PgS10.xml,606.0,,1994,2,24
8,8,topic_id,63,"Mr.Speaker, I ask unanimous consent that the C...",CREC-1994-03-09-pt1-PgH30.txt,CREC-1994-03-09-pt1-PgH30.xml,820.0,,1994,3,9
9,9,topic_id,37,"Mr.President, I rise to announce that during t...",CREC-1994-02-22-pt1-PgS46.txt,CREC-1994-02-22-pt1-PgS46.xml,1407.0,,1994,2,22


In [25]:
concat_speeches_index.to_csv('allspeeches.csv', index=False)

In [76]:
allspeakers = pd.read_csv("/Users/halliday/projects/searchlight/parsing/aws_results/updatedspeakers.csv")
allspeakers

In [86]:
allspeakers.to_csv("allspeakers.csv", index=False)

In [88]:
allspeakers.dtypes

speaker_id      float64
first_name       object
last_name        object
chamber          object
type             object
party            object
state            object
district        float64
bio_guide_id     object
congress_id     float64
dtype: object

# Title Fixing

In [26]:
# path = Path('/Users/halliday/projects/searchlight/parsing/')
speeches = pd.read_csv('allspeeches.csv')
speeches

Unnamed: 0,speech_id,topic_id,word_count,speech_text,file_name,mods_file,speaker_id,proceeding_title,year,month,day
0,0,topic_id,127,"Mr.President, I wish to announce that the Comm...",CREC-1994-03-08-pt1-PgS46.txt,CREC-1994-03-08-pt1-PgS46.xml,1334.0,,1994,3,8
1,1,topic_id,76,"Mr.President, I would like to announce that th...",CREC-1994-03-08-pt1-PgS46.txt,CREC-1994-03-08-pt1-PgS46.xml,1369.0,,1994,3,8
2,2,topic_id,153,"Mr.Speaker, the White House and certain high-d...",CREC-1994-02-23-pt1-PgH28.txt,CREC-1994-02-23-pt1-PgH28.xml,450.0,,1994,2,23
3,3,topic_id,4,"Thank you, Mr. _______...",CREC-1994-03-09-pt1-PgS7.txt,CREC-1994-03-09-pt1-PgS7.xml,1351.0,,1994,3,9
4,4,topic_id,11,"Mr.Speaker, I rise today to ask 81 Republicans...",CREC-1994-03-16-pt1-PgH10.txt,CREC-1994-03-16-pt1-PgH10.xml,1883.0,,1994,3,16
5,5,topic_id,1361,"Mr.President, I want to speak about the former...",CREC-1994-02-24-pt1-PgS10.txt,CREC-1994-02-24-pt1-PgS10.xml,1383.0,,1994,2,24
6,6,topic_id,25,"Mr.President, what is the pending business? T...",CREC-1994-02-24-pt1-PgS10.txt,CREC-1994-02-24-pt1-PgS10.xml,606.0,,1994,2,24
7,7,topic_id,40,"Mr.President, would it be in order to speak on...",CREC-1994-02-24-pt1-PgS10.txt,CREC-1994-02-24-pt1-PgS10.xml,606.0,,1994,2,24
8,8,topic_id,63,"Mr.Speaker, I ask unanimous consent that the C...",CREC-1994-03-09-pt1-PgH30.txt,CREC-1994-03-09-pt1-PgH30.xml,820.0,,1994,3,9
9,9,topic_id,37,"Mr.President, I rise to announce that during t...",CREC-1994-02-22-pt1-PgS46.txt,CREC-1994-02-22-pt1-PgS46.xml,1407.0,,1994,2,22


In [58]:
speeches = speeches.drop(['Unnamed: 0', 'speech_id'], axis=1)

In [59]:
ids = np.arange(557818)

In [60]:
speeches['speech_id'] = ids

In [61]:
speeches = speeches[['speech_id', 'topic_id', 'word_count', 'speech_text', 'file_name', 'mods_file', 'last_name', 'speaker_id', 'proceeding_title', 'year', 'month', 'day']]

In [62]:
allspeeches = speeches.drop(['last_name'], axis=1)
allspeeches

Unnamed: 0,speech_id,topic_id,word_count,speech_text,file_name,mods_file,speaker_id,proceeding_title,year,month,day
0,0,topic_id,153,"Mr.Speaker, the White House and certain high-d...",CREC-1994-02-23-pt1-PgH28.txt,CREC-1994-02-23-pt1-PgH28.xml,450.0,,1994,2,23
1,1,topic_id,11,"Mr.Speaker, I rise today to ask 81 Republicans...",CREC-1994-03-16-pt1-PgH10.txt,CREC-1994-03-16-pt1-PgH10.xml,1883.0,,1994,3,16
2,2,topic_id,341,"Mr.Speaker, I move to suspend the rules and ag...",CREC-1994-04-26-pt1-PgH38.txt,CREC-1994-04-26-pt1-PgH38.xml,850.0,,1994,4,26
3,3,topic_id,170,"Mr.Speaker, I yield myself such time as I may ...",CREC-1994-04-26-pt1-PgH38.txt,CREC-1994-04-26-pt1-PgH38.xml,850.0,,1994,4,26
4,4,topic_id,350,"Mr.Speaker, I yield myself such time as I may ...",CREC-1994-04-26-pt1-PgH38.txt,CREC-1994-04-26-pt1-PgH38.xml,322.0,,1994,4,26
5,5,topic_id,117,"Mr.Speaker, as the author of House Concurrent ...",CREC-1994-04-26-pt1-PgH38.txt,CREC-1994-04-26-pt1-PgH38.xml,1166.0,,1994,4,26
6,6,topic_id,9,"Mr.Speaker, I yield back the balance of my tim...",CREC-1994-04-26-pt1-PgH38.txt,CREC-1994-04-26-pt1-PgH38.xml,322.0,,1994,4,26
7,7,topic_id,76,"Mr.Speaker, I, too, yield back the balance of ...",CREC-1994-04-26-pt1-PgH38.txt,CREC-1994-04-26-pt1-PgH38.xml,850.0,,1994,4,26
8,8,topic_id,314,"Mr.Speaker, the debate on the crime bill begin...",CREC-1994-04-12-pt1-PgH21.txt,CREC-1994-04-12-pt1-PgH21.xml,1036.0,,1994,4,12
9,9,topic_id,63,"Mr.Speaker, I ask unanimous consent that the C...",CREC-1994-03-09-pt1-PgH30.txt,CREC-1994-03-09-pt1-PgH30.xml,820.0,,1994,3,9


In [72]:
allspeeches[allspeeches['proceeding_title'].isnull()]

Unnamed: 0,speech_id,topic_id,word_count,speech_text,file_name,mods_file,speaker_id,proceeding_title,year,month,day


In [54]:
# allspeeches = pd.read_csv(path/'parsing_results/allspeeches.csv')
allspeeches = allspeeches.drop(['Unnamed: 0'], axis=1)
allspeeches.to_csv(path/'parsing_results/allspeeches.csv', index=False)

In [2]:
path = Path('/Users/halliday/projects/ltd/ltd-congressscraper/searchlight-website/class/csv')
allspeeches = pd.read_csv(path/'allspeeches.csv')
allspeeches

Unnamed: 0,speech_id,topic_id,word_count,speech_text,file_name,mods_file,speaker_id,proceeding_title,year,month,day
0,0,topic_id,153,"Mr.Speaker, the White House and certain high-d...",CREC-1994-02-23-pt1-PgH28.txt,CREC-1994-02-23-pt1-PgH28.xml,450.0,,1994,2,23
1,1,topic_id,11,"Mr.Speaker, I rise today to ask 81 Republicans...",CREC-1994-03-16-pt1-PgH10.txt,CREC-1994-03-16-pt1-PgH10.xml,1883.0,,1994,3,16
2,2,topic_id,341,"Mr.Speaker, I move to suspend the rules and ag...",CREC-1994-04-26-pt1-PgH38.txt,CREC-1994-04-26-pt1-PgH38.xml,850.0,,1994,4,26
3,3,topic_id,170,"Mr.Speaker, I yield myself such time as I may ...",CREC-1994-04-26-pt1-PgH38.txt,CREC-1994-04-26-pt1-PgH38.xml,850.0,,1994,4,26
4,4,topic_id,350,"Mr.Speaker, I yield myself such time as I may ...",CREC-1994-04-26-pt1-PgH38.txt,CREC-1994-04-26-pt1-PgH38.xml,322.0,,1994,4,26
5,5,topic_id,117,"Mr.Speaker, as the author of House Concurrent ...",CREC-1994-04-26-pt1-PgH38.txt,CREC-1994-04-26-pt1-PgH38.xml,1166.0,,1994,4,26
6,6,topic_id,9,"Mr.Speaker, I yield back the balance of my tim...",CREC-1994-04-26-pt1-PgH38.txt,CREC-1994-04-26-pt1-PgH38.xml,322.0,,1994,4,26
7,7,topic_id,76,"Mr.Speaker, I, too, yield back the balance of ...",CREC-1994-04-26-pt1-PgH38.txt,CREC-1994-04-26-pt1-PgH38.xml,850.0,,1994,4,26
8,8,topic_id,314,"Mr.Speaker, the debate on the crime bill begin...",CREC-1994-04-12-pt1-PgH21.txt,CREC-1994-04-12-pt1-PgH21.xml,1036.0,,1994,4,12
9,9,topic_id,63,"Mr.Speaker, I ask unanimous consent that the C...",CREC-1994-03-09-pt1-PgH30.txt,CREC-1994-03-09-pt1-PgH30.xml,820.0,,1994,3,9


In [68]:
month_dict = {1:'January', 2:'February', 3:'March', 4:'April', 5:'May', 6:'June', 7:'July', 8:'August', 9:'September', 10:'October', 11:'November', 12:'December'}
for i in range(1, 1140685):
    if type(speeches.loc[i, 'month']) == int:
        if i % 10000 == 0:
            print(i)
        speeches.loc[i, 'month'] = month_dict[speeches.loc[i, 'month']]        

960000
970000
980000
990000
1000000
1010000
1020000
1030000
1040000
1050000
1060000
1070000
1080000
1090000
1100000
1110000
1120000
1130000
1140000


In [73]:
speeches.loc[1140684, 'month']

'May'

In [74]:
speeches

Unnamed: 0,speech_id,topic_id,word_count,speech_text,file_name,mods_file,speaker_id,proceeding_title,year,month,day
0,0,topic_id,127,"Mr.President, I wish to announce that the Comm...",CREC-1994-03-08-pt1-PgS46.txt,CREC-1994-03-08-pt1-PgS46.xml,1334.0,,1994,March,8
1,1,topic_id,76,"Mr.President, I would like to announce that th...",CREC-1994-03-08-pt1-PgS46.txt,CREC-1994-03-08-pt1-PgS46.xml,1369.0,,1994,March,8
2,2,topic_id,153,"Mr.Speaker, the White House and certain high-d...",CREC-1994-02-23-pt1-PgH28.txt,CREC-1994-02-23-pt1-PgH28.xml,450.0,,1994,February,23
3,3,topic_id,4,"Thank you, Mr. _______...",CREC-1994-03-09-pt1-PgS7.txt,CREC-1994-03-09-pt1-PgS7.xml,1351.0,,1994,March,9
4,4,topic_id,11,"Mr.Speaker, I rise today to ask 81 Republicans...",CREC-1994-03-16-pt1-PgH10.txt,CREC-1994-03-16-pt1-PgH10.xml,1883.0,,1994,March,16
5,5,topic_id,1361,"Mr.President, I want to speak about the former...",CREC-1994-02-24-pt1-PgS10.txt,CREC-1994-02-24-pt1-PgS10.xml,1383.0,,1994,February,24
6,6,topic_id,25,"Mr.President, what is the pending business? T...",CREC-1994-02-24-pt1-PgS10.txt,CREC-1994-02-24-pt1-PgS10.xml,606.0,,1994,February,24
7,7,topic_id,40,"Mr.President, would it be in order to speak on...",CREC-1994-02-24-pt1-PgS10.txt,CREC-1994-02-24-pt1-PgS10.xml,606.0,,1994,February,24
8,8,topic_id,63,"Mr.Speaker, I ask unanimous consent that the C...",CREC-1994-03-09-pt1-PgH30.txt,CREC-1994-03-09-pt1-PgH30.xml,820.0,,1994,March,9
9,9,topic_id,37,"Mr.President, I rise to announce that during t...",CREC-1994-02-22-pt1-PgS46.txt,CREC-1994-02-22-pt1-PgS46.xml,1407.0,,1994,February,22


In [89]:
speeches.to_csv('allspeeches.csv', index=False)

# Double Checking

In [68]:
speechesDir = Path('/Users/halliday/projects/searchlight/parsing/aws_setup/parsing_results')
speeches = pd.read_csv(speechesDir/'concat_test.csv')
speeches[speeches['last_name'] == '']

Unnamed: 0,speech_id,last_name,speaker_id,proceeding_id,topic_id,word_count,speech_text,file_name,mods_file,proceeding_title,year,month,day


In [49]:
speakersDir = Path('/Users/halliday/projects/searchlight/parsing/aws_setup')
speakers = pd.read_csv(speakersDir/'updatedspeakers.csv')
speakers[speakers['last_name'] == 'UDALL']

Unnamed: 0,speaker_id,first_name,last_name,chamber,type,party,state,district,bio_guide_id,congress_id
1159,1567.0,Tom,UDALL,SENATE,SENATOR,D,NM,,U000039,113.0
1160,1595.0,Mark,UDALL,SENATE,SENATOR,D,CO,,U000038,113.0


In [30]:
speeches.dtypes

speech_id             int64
last_name            object
speaker_id          float64
proceeding_id        object
topic_id             object
word_count            int64
speech_text          object
file_name            object
mods_file            object
proceeding_title     object
year                  int64
month                 int64
day                   int64
dtype: object

In [95]:
billsDir = Path('/Users/halliday/projects/searchlight/parsing')
bills = pd.read_csv(billsDir/'bills_1.csv')
bills

Unnamed: 0,mods_file,congress_id,context,bill_number,bill_type
0,CREC-2017-04-06-pt1-PgH2759-2.xml,115,FIRSTPARAGRAPH,242,HRES
1,CREC-2017-04-06-pt1-PgH2759-2.xml,115,OTHER,444,S
2,CREC-2017-04-06-pt1-PgH2759-2.xml,115,HEADERLINE,1219,HR
3,CREC-2017-04-06-pt1-PgH2759-2.xml,115,FIRSTPARAGRAPH,1219,HR
4,CREC-2017-04-06-pt1-PgH2759-2.xml,115,OTHER,4854,HR
5,CREC-2017-04-06-pt1-PgH2759-2.xml,115,OTHER,4852,HR
6,CREC-2017-04-06-pt1-PgH2759-2.xml,115,OTHER,1219,HR
7,CREC-2017-04-06-pt1-PgH2759-2.xml,115,OTHER,4855,HR
8,CREC-2017-04-06-pt1-PgH2759-2.xml,115,OTHER,242,HRES
9,CREC-2017-04-06-pt1-PgH2767-3.xml,115,TITLE,246,HRES


In [77]:
speakers = pd.read_csv(speechesDir/'updatedspeakers.csv')
speakers[speakers['last_name'] == 'MERKLEY']

Unnamed: 0,speaker_id,first_name,last_name,chamber,type,party,state,district,bio_guide_id,congress_id
789,1900.0,Jeff,MERKLEY,SENATE,SENATOR,D,OR,,M001176,113.0


In [438]:
speakers[speakers['last_name'] == 'BACON']

Unnamed: 0,speaker_id,first_name,last_name,chamber,type,party,state,district,bio_guide_id,congress_id
32,2337,Don,BACON,HOUSE,,R,NE,,B001298,115


In [439]:
speeches.dtypes

speech_id        object
last_name        object
speaker_id       object
proceeding_id    object
topic_id         object
word_count       object
speech_text      object
file_name        object
mods_file        object
dtype: object

In [440]:
speakers.dtypes

speaker_id        int64
first_name       object
last_name        object
chamber          object
type             object
party            object
state            object
district        float64
bio_guide_id     object
congress_id       int64
dtype: object

In [463]:
df = pd.DataFrame(columns=['a', 'b'])

In [466]:
df = df.append({'a': 0, 'b': 1}, ignore_index=True)

In [469]:
df['c'] = [0 for i in range(3)]

In [471]:
df['d'], df['e'] = 1, 2

In [475]:
df.loc[0,'a'] = 4

In [476]:
df

Unnamed: 0,a,b,c,d,e
0,4,1,0,1,2
1,0,1,0,1,2
2,0,1,0,1,2


In [6]:
x = {'a': 1}
y = dict(x)
y['b'] = 2
print(x)
print(y)

{'a': 1}
{'a': 1, 'b': 2}


# Type Fixing

In [8]:
speakers = pd.read_csv('/Users/halliday/projects/searchlight/parsing/aws_parsing_setup/updatedspeakers.csv')
speakers.dtypes

speaker_id      float64
first_name       object
last_name        object
chamber          object
type             object
party            object
state            object
district        float64
bio_guide_id     object
congress_id     float64
dtype: object

In [11]:
speakers['speaker_id'] = speakers['speaker_id'].astype('float64')
speakers.dtypes

speaker_id      float64
first_name       object
last_name        object
chamber          object
type             object
party            object
state            object
district        float64
bio_guide_id     object
congress_id     float64
dtype: object