# Match the witnesses to the *Open Secrets* lobbying categories

#### This script classifies all witnesses according to the *Open Secrets* lobbying categories and subcategories

In [1]:
from TextCollection import *
import keyboard  # using module keyboard

In [2]:
# Change directory
os.chdir('../../Data/')

In [3]:
# Run for maximum recursion error during saving

import resource
import sys

print(resource.getrlimit(resource.RLIMIT_STACK))
print(sys.getrecursionlimit())

# May segfault without this line. 0x100 is a guess at the size of each stack frame.
max_rec = 0x100000
resource.setrlimit(resource.RLIMIT_STACK, [0x100 * max_rec, resource.RLIM_INFINITY])
sys.setrecursionlimit(max_rec)

print(sys.getrecursionlimit())

(8388608, -1)
3000
1048576


<br>

## 1) Preparation: Loading and inspecting the data

In [4]:
t = load('Hearings/03_desmog_witnesses.pkl')
with open('OpenSecrets/sectors_industries_contributors.json', 'r') as jfile:
    sectors = json.load(jfile)
print('We imported {} hearings and {} sectors of lobbying organisations for the years {} to {}.\n'.format(len(t),len(sectors), sectors[0]['industries'][0]['year'][0], 
                                                                                                        sectors[0]['industries'][0]['year'][-1]))

We imported 263 hearings and 13 sectors of lobbying organisations for the years 2003 to 2010.



In [5]:
print('Each of these sectors is grouped into multiple industries. These are the sectors with their respective industries:\n')
for i, sector in enumerate(sectors):
    print(i, sector['name'].upper(), '({})'.format(sector['id']))
    for j, industry in enumerate(sector['industries']):
        print('\t', j, industry['industry'])
    print('\n')    

Each of these sectors is grouped into multiple industries. These are the sectors with their respective industries:

0 AGRIBUSINESS (A)
	 0 Agricultural Services/Products
	 1 Crop Production & Basic Processing
	 2 Dairy
	 3 Farm bureaus
	 4 Food and kindred products manufacturing
	 5 Food Processing & Sales
	 6 Food stores
	 7 Forestry & Forest Products
	 8 Livestock
	 9 Meat processing & products
	 10 Poultry & Eggs
	 11 Sugar cane & sugar beets
	 12 Tobacco
	 13 Vegetables, fruits and tree nut


1 COMMUNICATIONS/ELECTRONICS (B)
	 0 Book, newspaper & periodical publishing
	 1 Cable & satellite TV production
	 2 Commercial TV & radio stations
	 3 Computer software
	 4 Electronics Mfg & Equip
	 5 Internet
	 6 Motion Picture production & distribution
	 7 Printing & Publishing
	 8 Recorded Music & music production
	 9 Telecom Services
	 10 Telephone Utilities
	 11 TV production
	 12 TV/Movies/Music


2 CONSTRUCTION (C)
	 0 Architectural services
	 1 Building Materials & Equipment
	 2 Const

In [6]:
# Check Keywords
# for i, sector in enumerate(sectors):
#     for j, industry in enumerate(sector['industries']):
#             for k, organisation in enumerate(industry['lobbying_groups']):
#                 key = 'Org'
#                 if len(re.findall(key, organisation)) >0:
#                     print(organisation)
#                     print(re.findall(key, organisation))

In [7]:
# Example organisation names
print(sectors[0]['industries'][0]['lobbying_groups'][0])
print(sectors[0]['industries'][0]['contributors'][1])

Intl Species Identification System
American Veterinary Medical Assn


In [9]:
# Adapt the lobbying group names for improved matching
for i, sector in enumerate(sectors):
    for j, industry in enumerate(sector['industries']):
        for k, organisation in enumerate(industry['lobbying_groups']):
            # Replace abbreviations
            organisation_temp = organisation.replace('Assn', 'Association').replace('Cmte', 'Committee').replace('Cltn', 'Coalition')
            organisation_temp = organisation_temp.replace('Wkrs', 'Workers').replace('Natl ', 'National ').replace('Ntl ', 'National ')
            organisation_temp = organisation_temp.replace('Cnty', 'County').replace('Cncil ', 'Council ').replace('Progs ', 'Programs ')
            organisation_temp = organisation_temp.replace('Corp ', 'Corporation ').replace('Fdn', 'Foundation').replace('Fdtns', 'Foundations')
            organisation_temp = organisation_temp.replace('Intl', 'International').replace('Fedn', 'Federation').replace('Mgmt', 'Management')
            organisation_temp = organisation_temp.replace('Org ', 'Organisation ').replace('Orgs ', 'Organisations ').replace('Svc', 'Service')
            organisation_temp = organisation_temp.replace('Cmpnstn', 'Compensation').replace('Imm ', 'Immigration ').replace('Hvy', 'Heavy').replace('Ctrs', 'Centers')
            organisation_temp = organisation_temp.replace('Ind Storage', 'Industrial Storage').replace('Ind Mortgage', 'Independent Mortgage ')
            organisation_temp = organisation_temp.replace('Allied-Ind Chem', 'Allied-Industrial Chemical').replace('Ind ', 'Industry ').replace('All/', 'Alliance/')
            # Add Organisation names as cited in the Congressional Hearings
            organisation_temp = organisation_temp.replace('Pew Charitable Trusts', 'Pew Charitable Trusts / Pew Center On Global Climate Change')
            sectors[i]['industries'][j]['lobbying_groups'][k] = organisation_temp
            
        for k, contributor in enumerate(industry['contributors']):
            # Replace abbreviations
            contributor_temp = contributor.replace('Assn', 'Association').replace('Cmte', 'Committee').replace('Cltn', 'Coalition')
            contributor_temp = contributor_temp.replace('Wkrs', 'Workers').replace('Natl ', 'National ').replace('Ntl ', 'National ')
            contributor_temp = contributor_temp.replace('Cnty', 'County').replace('Cncil ', 'Council ').replace('Progs ', 'Programs ')
            contributor_temp = contributor_temp.replace('Corp ', 'Corporation ').replace('Fdn', 'Foundation').replace('Fdtns', 'Foundations')
            contributor_temp = contributor_temp.replace('Intl', 'International').replace('Fedn', 'Federation').replace('Mgmt', 'Management')
            contributor_temp = contributor_temp.replace('Org ', 'Organisation ').replace('Orgs ', 'Organisations ').replace('Svc', 'Service')
            contributor_temp = contributor_temp.replace('Cmpnstn', 'Compensation').replace('Imm ', 'Immigration ').replace('Hvy', 'Heavy').replace('Ctrs', 'Centers')
            contributor_temp = contributor_temp.replace('Ind Storage', 'Industrial Storage').replace('Ind Mortgage', 'Independent Mortgage ')
            contributor_temp = contributor_temp.replace('Allied-Ind Chem', 'Allied-Industrial Chemical').replace('Ind ', 'Industry ').replace('All/', 'Alliance/')
            sectors[i]['industries'][j]['contributors'][k] = contributor_temp

In [10]:
# Corrected organisation names 
print(sectors[0]['industries'][0]['lobbying_groups'][0])
print(sectors[0]['industries'][0]['contributors'][1])

International Species Identification System
American Veterinary Medical Association


<br>

## 2) Matching: match the active lobbing groups for each year to the witnesses 

### 2.1) Match perfect matches

In [11]:
%%time
for i, text in enumerate(t.texts):
    text['witness_affiliation'] = []
    text['witness_sector'] = []
    text['witness_industry'] = []
    text['witness_affiliation'] = []
    for j, witness in enumerate(text['witnesses']):
        match = None
        for sector in sectors:
            for industry in sector['industries']:
                for group in industry['lobbying_groups']:
                    if len(find_string.first_match([' ' + group.lower()], witness.lower())) > 0:
                        print(i, j, witness, '\n', group, '\n\n')
                        match = group
                        break
                else:
                    continue
                break
            else:
                continue
            break
        if match is not None:
            text['witness_affiliation'].append(match)
            text['witness_sector'].append(sector['name'])
            text['witness_industry'].append(industry['industry'])
        else:                    
            text['witness_affiliation'].append(None)
            text['witness_sector'].append(None)
            text['witness_industry'].append(None)

0 1 Martin McBroom, Director, Federal Environmental Affairs, American Electric Power 
 American Electric Power 


0 2 Paul Cicio, Industrial Energy Consumers of America 
 Industrial Energy Consumers of America 


0 3 Margo Thorning, Ph.D., Senior Vice President and Chief Economist, American Council for Capital Formation 
 American Council for Capital Formation 


1 3 Vice Admiral Paul G. Gaffney (Ret.), President, Monmouth University, West Long Branch, New Jersey 
 Monmouth University 


2 1 Mr. Michael Morris, Chairman and CEO, American Electric Power 
 American Electric Power 


2 3 Mr. David Hawkins, Director, Natural Resources Defense Council's Climate Center 
 Natural Resources Defense Council 


2 5 Mr. Stuart Dalton, Director, Generation Sector, Electric Power Research Institute 
 Electric Power Research Institute 


3 0 Mr. Todd Mitchell, President, Houston Advanced Research Center 
 Houston Advanced Research Center 


3 1 Dr. Richard E. Smalley, Director, Carbon Nanotechnology

24 1 Denis, Roberto R., Senior Vice President, Sierra Pacific Resources 
 Sierra Pacific Resources 


24 2 McRae, Gregory J., Hoyt C. Hottel Professor of Chemical Engineering, Department of Chemical Engineering, Massachusetts Institute of Technology 
 Massachusetts Institute of Technology 


24 3 Rencheck, Michael W., Senior Vice President--Engineering, Projects, and Field Services, American Electric Power 
 American Electric Power 


26 2 Ms. Stephanie Meeks, Acting President and CEO, The Nature Conservancy 
 Nature Conservancy 


27 1 Drevna, Charles T., President, National Petrochemical & Refiners Association 
 National Petrochemical & Refiners Association 


28 2 Doniger, David D., policy director, Climate Center, Natural Resources Defense Council 
 Natural Resources Defense Council 


29 3 Ford B. West, President, The Fertilizer Institute 
 Fertilizer Institute 


29 4 Johannes Lehmann, Associate Professor of Soil Fertility Management/Soil Biogeochemistry, Cornell University 
 Cor

65 1 Erdos, Babe, International Executive Board Member, United Mine Workers of America, District 6, Shadyside, Ohio 
 United Mine Workers 


65 5 Murray, Robert E., President and Chief Executive Officer, Murray Energy Corporation, and The Ohio Valley Coal Company 
 Murray Energy 


65 7 Trisko, Eugene M., Attorney at Law, Testifying on behalf of the United Mine Workers of America, Berkeley Springs, West Virginia 
 United Mine Workers 


66 2 Hawkins, David G., Climate Center Program Director, Natural Resources Defense Council 
 Natural Resources Defense Council 


66 5 Rogers, James, CEO and President, Cinergy Corporation, on behalf of the Edison Electric Institute 
 Edison Electric Institute 


66 6 Trisko, Eugene, United Mine Workers of America 
 United Mine Workers 


66 14 Benson, Steven A., senior research manager, Energy and Environmental Center, University of North Dakota 
 University of North Dakota 


66 18 Excerpt, Southern Co.'s Proxy Statement 
 Southern Co 


66 20 McGinni

90 1 David Doniger, Policy Director, Climate Center, Natural Resources Defense Council 
 Natural Resources Defense Council 


90 2 Raymond Ludwiszewski, Partner, Gibson, Dunn & Crutcher LLP 
 Gibson, Dunn & Crutcher 


90 3 Lisa Heinzerling, Professor of Law, Georgetown University Law Center 
 Georgetown University 


90 4 Peter Glaser, Partner, Troutman Sanders LLP 
 Troutman Sanders 


91 3 DTE Biomass Energy, Inc. and the Solid Waste Association of North America, Curtis T. Ranger 
 Solid Waste Association of North America 


91 6 Sharp Electronics, and Solar Energy Industries Association, Chris O'Brien 
 Solar Energy Industries Association 


92 1 Joseph Romm, Senior Fellow, Center for American Progress 
 Center for American Progress 


93 0 Akasofu, Dr. Syun-Ichi, Director, International Arctic Research Center, University of Alaska Fairbanks 
 University of Alaska 


93 3 Murawski, Ph.D., Steven A., Director of Scientific Programs/Chief Science Advisor, National Marine Fisheries Se

112 11 Podesta, John, President and Chief Executive Officer, Center for American Progress Action Fund 
 Center for American Progress 


112 12 Helme, Ned, President, Center for Clean Air Policy 
 Center for Clean Air Policy 


112 18 Hawkins, David, Director, Climate Center, Natural Resources Defense Council 
 Natural Resources Defense Council 


112 19 Trisko, Eugene, Attorney at Law, on behalf of the United Mine Workers of America 
 United Mine Workers 


112 21 Cicio, Paul, President, Industrial Energy Consumers of America 
 Industrial Energy Consumers of America 


113 1 Berrigan, Carol, director, industry infrastructure, Nuclear Energy Institute, Washington, DC 
 Nuclear Energy Institute 


113 3 Thorning, Dr. Margo, senior vice president and chief economist, American Council for Capital Formation, Washington, DC 
 American Council for Capital Formation 


114 3 Bookbinder, David, Chief Climate Counsel, Sierra Club 
 Sierra Club 


115 0 Brehm, Peter, Vice President, Business Deve

130 12 Jeffry E. Sterba, Chairman of the Board and Chief Executive Officer, PNM Resources \2\ Prepared statement 
 PNM Resources 


131 1 Graedel, Thomas E., Ph.D., Professor of Industrial Ecology, Yale University 
 Yale University 


131 3 Liverman, Dr. Diana M., Director, Latin American Studies Program, University of Arizona 
 University of Arizona 


131 4 Solow, Dr. Andrew, Associate Scientist and Director, Marine Policy Center, Woods Hole Oceanographic Institution 
 Woods Hole Oceanographic Institution 


132 0 Anderson, A. Scott, Senior Policy Advisor, Environmental Defense Fund, Austin, Tx 
 Environmental Defense Fund 


132 2 Der, Victor K., Acting Assistant Secretary, Office Of Fossil Energy, Department Of Energy 
 Foss 


132 3 Freudenthal, Hon. Dave, Governor, State Of Wyoming, Cheyenne, Wy 
 State of Wyoming 


132 5 Moor, Karl, Vice President & Associate General Counsel, Southern Company, Atlanta, Ga 
 Southern Co 


133 2 Svenson, Eric, Vice President, Environment, Health

156 3 Arthur J. Alexander, Ph.D., Adjunct Professor of Asian Studies and Economics, Georgetown University (former President of the Japan Economic Institute) 
 Georgetown University 


157 3 Mr. Richard L. Kauffman, Chairman of the Board, Levi Strauss & Co 
 Levi Strauss & Co 


158 4 Evans, Ronald T., Denbury Resources, Inc., Plano, Tx 
 Denbury Resources 


158 7 Slutz, James, Acting Principal Deputy Assistant Secretary, Office Of Fossil Energy, Department Of Energy 
 Foss 


159 2 Rice, John G., vice chairman, General Electric, Washington, DC 
 General Electric 


159 3 Rowlan, Steven, general manager, environmental affairs, Nucor Corporation, on behalf of Dan DiMicco, chairman, president, and chief executive officer, Nucor Corporation 
 Nucor Corp 


161 2 Lal, Rattan, Director, Carbon Management and Sequestration Center, Ohio State University 
 Ohio State University 


161 3 Reed, Debbie, Legislative Director, National Environmental Trust 
 National Environmental Trust 


161 4 Ros

178 5 John S. Hill, Director for Economic and Environmental Justice, United Methodist Church, General Board of Church and Society 
 METHOD 


179 1 Eakin, C. Mark, Ph.D., Coordinator, Coral Reef Watch, National Environmental Satellite, Data, and Information Service, National Oceanic and Atmospheric Administration, U.S. Department of Commerce 
 Oceanic 


179 3 Haney, J. Christopher, Ph.D., Chief Scientist, Defenders of Wildlife 
 Defenders of Wildlife 


179 5 Lawler, Joshua J., Ph.D., Assistant Professor, College of Forest Resources, University of Washington 
 University of Washington 


179 8 Root, Terry L., Ph.D., Senior Fellow University Faculty, Stanford University 
 Stanford University 


180 0 Peter Bradford, Former Member, U.S. Nuclear Regulatory Commission, Former Chairman, New York State Public Service Commission, Former Chairman, Maine Public Utilities Commission, Adjunct Professor, Vermont Law School 
 Vermont Law School 


180 2 Jack Spencer, Research Fellow In Nuclear Ene

205 0 Corwin, R. Scott, Executive Director, Public Power Council, Portland, Oregon 
 Public Power Council 


205 5 Rahill, Edward M., Senior Vice President of Finance and CFO, ITC Holdings, Transmission Company, Novi, Michigan 
 ITC Holdings 


205 6 Wright, Stephen J., Administrator, Bonneville Power Administration, U.S. Department of Energy, Portland, Oregon 20 Prepared statement of 
 Portland, OR 


206 2 Rich Wells, Vice President, Energy, The Dow Chemical Corporation 
 Dow Chemical 


206 3 Iain Campbell, Vice President and General Manager, Johnson Controls, Inc 
 Johnson Controls 


206 4 John Anderson, President, Electricity Consumers Resource Council 
 Electricity Consumers Resource Council 


206 5 Bryan Reichel, President and CEO, PureChoice, Inc 
 PureChoice 


207 2 Mr. Frank Nutter, President, Reinsurance Association of America 
 Reinsurance Association of America 


208 0 Buis, Tom, President, National Farmers Union, Washington, D.C 
 National Farmers Union 


208 1 Damga

223 22 David Doniger, Policy Director, Climate Center, Natural Resources Defense Council 
 Natural Resources Defense Council 


223 23 Michael Morris, Chairman Of The Board, President, And Chief Executive Officer, American Electric Power 
 American Electric Power 


223 24 Michael Walsh, Senior Vice President, Chicago Climate Exchange 
 Chicago Climate Exchange 


223 25 Rafe Pomerance, Chairman, Climate Policy Center 
 Climate Policy Center 


224 1 Culbertson, Tim, Representative For National Hydropower Association, Ephrata, Wa 
 National Hydropower Association 


224 4 Mote, Philip W., Ph.D., Research Scientist, Jisao-Cses Climate Impacts Group, University Of Washington, Seattle, Wa 
 University of Washington 


224 5 O'Toole, Patrick, President, Family Farm Alliance, Savery, Wy 
 Family Farm Alliance 


224 6 Udall, Bradley H., Director, National Oceanic And Atmospheric Administration-University Of Colorado Western Water Assessment, Boulder, Co 
 Oceanic 


224 7 Williams, Jack, Se

241 0 Box, Matthew, Vice-Chairman, Southern Ute Indian Tribe 
 Ute Indian Tribe 


241 1 Lee, Robert G., Ph.D., Professor, Sociology of Natural Resources, University of Washington 
 University of Washington 


241 4 Schweitzer, Hon. Brian, Governor, State of Montana 
 State of Montana 


241 6 Williams, Hon. Pat, Former Congressman and Senior Fellow, O'Connor Center for the Rocky Mountain West, University of Montana 
 University of Montana 


242 1 Dr. Daniel Lashof, Director, Climate Center, Natural Resources Defense Council 
 Natural Resources Defense Council 


242 3 Dr. William Whitesell, Director of Policy Research, Center for Clean Air Policy 
 Center for Clean Air Policy 


242 4 Michelle Chan, Program Director, Green Investments, Friends of the Earth--United States, San Francisco, California 
 Friends of the Earth 


242 5 Dr. Gilbert Metcalf, Professor of Economics, Tufts University, Medford, Massachusetts 
 Tufts University 


242 6 Dr. Margo Thorning, Senior Vice President a

In [12]:
t.print_key('witnesses', 0)
t.print_key('witness_affiliation', 0)
t.print_key('witness_sector', 0)
t.print_key('witness_industry', 0)

['John McMackin, Jr., Williams and Jenson, PLLC, On Behalf Of The Energy Intensive Manufacturers Working Group on Greenhouse Gas Regulations', 'Martin McBroom, Director, Federal Environmental Affairs, American Electric Power', 'Paul Cicio, Industrial Energy Consumers of America', 'Margo Thorning, Ph.D., Senior Vice President and Chief Economist, American Council for Capital Formation', 'Richard D. Morgenstern, Senior Fellow, Resources for the Future', 'Eileen Claussen, President, Pew Center on Global Climate Change']
[None, 'American Electric Power', 'Industrial Energy Consumers of America', 'American Council for Capital Formation', None, None]
[None, 'Energy & Natural Resources', 'Misc Business', 'Misc Business', None, None]
[None, 'Electric Utilities', 'Misc Manufacturing & Distributing', 'Business Associations', None, None]


In [17]:
# Summary:
matched = 0
witnesses = 0

for text in t.texts:
    for i, witness in enumerate(text['witnesses']):
        witnesses += 1
        if text['witness_affiliation'][i] != None:
            matched += 1              

print('The affiliations of {} out of {} witnesses were successfully matched.\n\n'.format(matched, witnesses))

The affiliations of 706 out of 1789 witnesses were successfully matched.




<br>

### 2.2) Correct false matches


In [16]:
# Check for falsely matched one-worded affiliations
count = 0
for i, text in enumerate(t.texts):
    for j, witness in enumerate(text['witnesses']):
        if text['witness_affiliation'][j] != None:
            if len(text['witness_affiliation'][j].split()) == 1:
                print(i, j, text['witnesses'][j], '\n',
                      text['witness_affiliation'][j], '\n\n')
                print('Match? [y/n]:')
                x = input()
                
                if x == 'y':
                    continue
                elif x == 'n':
                    text['witness_affiliation'][j] = None
                    text['witness_sector'][j] = None
                    text['witness_industry'][j] = None
                    count += 1
                    continue
                else:
                    print('\n\nWrong entry! Try again:\n',
                          i, j, text['witnesses'][j], '\n',
                          text['witness_affiliation'][j], '\n\n')
                    print('Match? [y/n]:')
                    x = input()
                    
print(f'\n\nAll done! {count} falsely matched witnesses removed.')

14 4 Fahey, Dr. David W., Research Physicist, National Oceanic And Atmospheric Administration 
 Oceanic 


Match? [y/n]:
n
22 2 Michael Carey, President, Ohio Coal Association 
 CARE 


Match? [y/n]:
n
45 7 Baugh, Robert C., executive director, AFL-CIO Industrial Union Council and Chair, AFL-CIO Energy Task Force 
 AFL-CIO 


Match? [y/n]:
y
50 2 Mckay, Lamar, Chairman And President, Bp America, Inc., Houston, Tx 
 BP 


Match? [y/n]:
y
63 1 Lubchenco, Jane, Ph.D., Under Secretary of Commerce for Oceans and Atmosphere, National Oceanic and Atmospheric Administration, U.S. Department of Commerce 
 Oceanic 


Match? [y/n]:
n
70 4 Thomas W. LaSorda, chief executive officer and president, Chrysler Group of DaimlerChrysler, Auburn Hills, MI 
 DaimlerChrysler 


Match? [y/n]:
y
76 2 T. Boone Pickens, Chairman, BP Capital 
 BP 


Match? [y/n]:
y
93 3 Murawski, Ph.D., Steven A., Director of Scientific Programs/Chief Science Advisor, National Marine Fisheries Service and Ecosystem Goal Team Lea

In [19]:
# Check for falsely matched civil servants (location instead of organisation)
count = 0
for i, text in enumerate(t.texts):
    for j, witness in enumerate(text['witnesses']):
        if text['witness_affiliation'][j] != None:
            if text['witness_industry'][j] == 'Civil Servants/Public Officials':
                print(i, j, text['witnesses'][j], '\n',
                      text['witness_affiliation'][j], '\n\n')
                print('Match? [y/n]:')
                x = input()
                

                if x == 'y':
                    continue
                elif x == 'n':
                    text['witness_affiliation'][j] = None
                    text['witness_sector'][j] = None
                    text['witness_industry'][j] = None
                    count += 1
                    continue
                else:
                    print('\n\nWrong entry! Try again:\n',
                          i, j, text['witnesses'][j], '\n',
                          text['witness_affiliation'][j], '\n\n')
                    print('Match? [y/n]:')
                    x = input()
                    
print(f'\n\nAll done! {count} falsely matched witnesses removed.')

23 2 Becker, Hon. Ralph, Mayor, Salt Lake City, Utah 
 Salt Lake City, UT 


Match? [y/n]:
y
23 3 Bragdon, David, President, Metro Council, Portland, Oregon Region 
 Portland, OR 


Match? [y/n]:
y
37 4 Jack Lavin, Director, Department Of Commerce And Economic Opportunity, State Of Illinois 
 State of Illinois 


Match? [y/n]:
y
45 12 Sims, Ron, county executive of King County, Washington 
 King County, WA 


Match? [y/n]:
y
58 5 Zepponi, David, President, Northwest Food Processors Association, Portland, Or 
 Portland, OR 


Match? [y/n]:
n
66 22 Nappier, Hon. Denise, Treasurer, State of Connecticut 
 State of Connecticut 


Match? [y/n]:
y
76 9 The Honorable Rod Dole, Auditor, Controller, Treasury, and Tax Collector, Sonoma County, California 
 Sonoma County, CA 


Match? [y/n]:
y
77 0 Ritter, Hon. Bill Jr., Governor, State of Colorado 
 State of Colorado 


Match? [y/n]:
y
77 1 Gregoire, Hon. Chris, Governor, State of Washington 
 State of Washington 


Match? [y/n]:
y
77 2 Hoeven, H

In [21]:
# Summary:
matched = 0
witnesses = 0

for text in t.texts:
    for i, witness in enumerate(text['witnesses']):
        witnesses += 1
        if text['witness_affiliation'][i] != None:
            matched += 1              

print('The affiliations of {} out of {} witnesses were successfully matched.\n\n'.format(matched, witnesses))

The affiliations of 671 out of 1789 witnesses were successfully matched.




<br>

### 2.3) Match partial matches

In [22]:
count = 0
for i, text in enumerate(t.texts):
    for j, witness in enumerate(text['witnesses']):
        if text['witness_affiliation'][j] == None:
            for sector in sectors:
                for industry in sector['industries']:
                    for lobbying_group in industry['lobbying_groups']:
                        ratio = fuzz.partial_ratio(lobbying_group, witness)
                        if ratio >= 90:
                            print(i, j, ratio, ': ',
                                  witness,'\n\t',
                                  lobbying_group, '\n\t')
                            print('Match? [y/n]:')
                            x = input()
                            
                            if x == 'y':
                                text['witness_affiliation'][j] = lobbying_group
                                text['witness_sector'][j] = sector['name']
                                text['witness_industry'][j] = industry['industry']
                                count +=1
                                print('\n')
                                break
                            elif x == 'n':
                                print('\n')
                                continue
                            else:
                                print('\n\nWrong entry! Try again:\n', i, j, ratio, ': ',
                                      witness,'\n\t',
                                      lobbying_group, '\n\t')
                                print('Match? [y/n]:')
                                x = input()
                    else:
                        continue
                    break
                else:
                    continue
                break

print('The affiliation of {} witnesses was matched.\n'.format(count))                        

0 0 91 :  John McMackin, Jr., Williams and Jenson, PLLC, On Behalf Of The Energy Intensive Manufacturers Working Group on Greenhouse Gas Regulations 
	 Arkin Group 
	
Match? [y/n]:
n


3 4 100 :  Dr. Franklin R. Chang-Diaz, NASA Astronaut and Director of the Advanced Space Propulsion Laboratory, Johnson Space Center 
	 ASA 
	
Match? [y/n]:
n


4 3 94 :  W. David Montgomery, Ph.D., Vice President, Environmental Practice, CRA International 
	 SRA International 
	
Match? [y/n]:
n


4 3 94 :  W. David Montgomery, Ph.D., Vice President, Environmental Practice, CRA International 
	 ACA International 
	
Match? [y/n]:
n


4 3 93 :  W. David Montgomery, Ph.D., Vice President, Environmental Practice, CRA International 
	 M International 
	
Match? [y/n]:
n


4 3 93 :  W. David Montgomery, Ph.D., Vice President, Environmental Practice, CRA International 
	 M International 
	
Match? [y/n]:
n


4 3 94 :  W. David Montgomery, Ph.D., Vice President, Environmental Practice, CRA International 
	 ECR Int

n


43 1 93 :  Sandalow, Hon. David B., Assistant Secretary for Policy and International Affairs, U.S. Department of Energy 
	 M International 
	
Match? [y/n]:
n


43 1 93 :  Sandalow, Hon. David B., Assistant Secretary for Policy and International Affairs, U.S. Department of Energy 
	 M International 
	
Match? [y/n]:
n


44 0 92 :  Jensen, Jay, Deputy Under Secretary for Natural Resources and Environment, U.S. Department of Agriculture, Washington, D.C 
	 Washington DC 
	
Match? [y/n]:
n


44 1 90 :  Koehn, Steve, Maryland State Forester, on behalf of the National Association of State Foresters, Parkton, Maryland 
	 National Association of State Treasurers 
	
Match? [y/n]:
n


44 3 94 :  McPeek, Brian, North America Conservation Region Director, The NatureConservancy, Denver, Colorado 
	 Nature Conservancy 
	
Match? [y/n]:
y


45 2 94 :  Smith, Anne E., Ph.D., vice president, CRA International 
	 SRA International 
	
Match? [y/n]:
n


45 2 94 :  Smith, Anne E., Ph.D., vice president, 

y


79 3 93 :  Durham, Michael D., President and CEO, ADA Environmental Solutions 
	 Renal Solutions 
	
Match? [y/n]:
n


79 3 93 :  Durham, Michael D., President and CEO, ADA Environmental Solutions 
	 Renal Solutions 
	
Match? [y/n]:
n


79 8 93 :  Dungan, Arthur E., President, The Chlorine Institute, Inc 
	 Wine Institute 
	
Match? [y/n]:
n


81 0 93 :  Breehey, Abraham, legislative representative, Government Affairs Department, International Brotherhood of Boilermakers 
	 M International 
	
Match? [y/n]:
n


81 0 93 :  Breehey, Abraham, legislative representative, Government Affairs Department, International Brotherhood of Boilermakers 
	 M International 
	
Match? [y/n]:
n


85 7 91 :  Janet Kavinoky, Director, Transportation Infrastructure Congressional & Public Affairs and Executive Director, Americans for Transportation Mobility Coalition, United States Chamber of Commerce 
	 US Chamber of Commerce 
	
Match? [y/n]:
y


85 8 92 :  Edward Wytkind, President, Transportation Trades 

n


113 2 92 :  Green, Dr. Kenneth P., resident scholar, American Enterprise Institute for Public Policy Research, Washington, DC 
	 Washington DC 
	
Match? [y/n]:
n


113 6 100 :  American Petroleum Institute 
	 American Petroleum Institute 
	
Match? [y/n]:
y


113 7 91 :  Association of American Universities and Association of Public and Land-grant Universities 
	 Association of Public & Land-Grant Universities 
	
Match? [y/n]:
y


114 4 95 :  Kovacs, Bill, Vice President, Environment, Technology and Regulatory Affairs, U.S. Chamber of Commerce 
	 US Chamber of Commerce 
	
Match? [y/n]:
y


115 3 100 :  Nutter, Hon. Michael A., Mayor, Philadelphia, Pennsylvania; Trustee, U.S. Conference of Mayors 
	 Truste 
	
Match? [y/n]:
n


115 3 96 :  Nutter, Hon. Michael A., Mayor, Philadelphia, Pennsylvania; Trustee, U.S. Conference of Mayors 
	 US Conference of Mayors 
	
Match? [y/n]:
y


115 6 91 :  Vassey, Brett A., President and Chief Executive Officer, Virginia Manufacturers Association 
	

n


170 13 90 :  Adams, Gordon, Dr.; Fellow, Woodrow Wilson International Center for Scholars................................................ 102, 
	 Edison International 
	
Match? [y/n]:
n


170 13 93 :  Adams, Gordon, Dr.; Fellow, Woodrow Wilson International Center for Scholars................................................ 102, 
	 M International 
	
Match? [y/n]:
n


170 13 93 :  Adams, Gordon, Dr.; Fellow, Woodrow Wilson International Center for Scholars................................................ 102, 
	 M International 
	
Match? [y/n]:
n


170 14 93 :  Bergsten, Fred C., Dr.; Director, Peterson Institute for International Economics....................................... 15, 
	 M International 
	
Match? [y/n]:
n


170 14 93 :  Bergsten, Fred C., Dr.; Director, Peterson Institute for International Economics....................................... 15, 
	 M International 
	
Match? [y/n]:
n


170 15 93 :  Cline, William, Dr.; Senior Fellow, Peterson Institute for International Ec

n


224 0 91 :  Brick, Tim F., Chairman, Metropolitan Water District Of Southern California, Los Angeles, Ca 
	 Southern Co 
	
Match? [y/n]:
n


224 0 91 :  Brick, Tim F., Chairman, Metropolitan Water District Of Southern California, Los Angeles, Ca 
	 Southern Co 
	
Match? [y/n]:
n


224 6 100 :  Udall, Bradley H., Director, National Oceanic And Atmospheric Administration-University Of Colorado Western Water Assessment, Boulder, Co 
	 Oceanic 
	
Match? [y/n]:
n


224 6 95 :  Udall, Bradley H., Director, National Oceanic And Atmospheric Administration-University Of Colorado Western Water Assessment, Boulder, Co 
	 University of Colorado 
	
Match? [y/n]:
y


225 0 93 :  Dr. Gary Yohe, Professor of Economics, Wesleyan University 
	 Kean University 
	
Match? [y/n]:
n


231 15 90 :  David Manning, Vice President, External Affairs, National Grid 
	 National Grid Co 
	
Match? [y/n]:
y


231 16 94 :  Yvette Pena, Legislative Director of the Blue Green Alliance 
	 BlueGreen Alliance 
	
Match? 

n


The affiliation of 69 witnesses was matched.



In [None]:
for i, text in enumerate(t.texts):
    for j, witness in enumerate(text['witnesses']):
        if text['witness_affiliation'][j] == 'Capital Partners':
#             print(i,j, witness, ': ', text['witness_industry'][j])
            text['witnesses'][j] = None
            text['witness_affiliation'][j] = None
            text['witness_sector'][j] = None
            text['witness_industry'][j] = None

In [23]:
#Check result
print(t.texts[5]['witnesses'][1])
print(t.texts[5]['witness_affiliation'][1])
print(t.texts[5]['witness_sector'][1])
print(t.texts[5]['witness_industry'][1])

Mr. Thomas N. Hansen, Vice President, Environmental Services, Conservation and Renewable Energy, Tucson Electric Power
Tucson Electric Power
Energy & Natural Resources
Electric Utilities


In [24]:
# Summary:
matched = 0
witnesses = 0
s
for text in t.texts:
    for i, witness in enumerate(text['witnesses']):
        witnesses += 1
        if text['witness_affiliation'][i] != None:
            matched += 1              

print('The affiliations of {} out of {} witnesses were successfully matched.\n\n'.format(matched, witnesses))

The affiliations of 740 out of 1789 witnesses were successfully matched.




### Save data


In [35]:
save_as(t, 'Hearings/04_opensecrets_industries_contributors.pkl') # Last completed on Dec 3, 2020