# Match the witnesses to the *Open Secrets* lobbying categories

#### This script classifies all witnesses according to the *Open Secrets* lobbying categories and subcategories

In [1]:
from CommitteeHearingsFunctions import *

# Change directory
os.chdir('../../Data/')

In [2]:
# Load the data
with open('CommitteeHearings/hearings_witnesses_contrarians.json', 'r') as file:
    df = json.load(file)

### Load and prepare the OpenSectrets Lobbying sectors and industries

In [3]:
# Load the OpenSectrets Lobbying sectors and industries
with open('OpenSecrets/sectors_industries_contributors.json', 'r') as jfile:
    sectors = json.load(jfile)
print('We imported {} sectors of lobbying organisations for the years {} to {}.\n'.format(len(sectors), sectors[0]['industries'][0]['year'][0], 
                                                                                                        sectors[0]['industries'][0]['year'][-1]))
               
print('Each of these sectors is grouped into multiple industries. These are the sectors with their respective industries:\n')
for i, sector in enumerate(sectors):
    print(i, sector['name'].upper(), '({})'.format(sector['id']))
    for j, industry in enumerate(sector['industries']):
        print('\t', j, industry['industry'])
    print('\n')    

We imported 13 sectors of lobbying organisations for the years 2003 to 2010.

Each of these sectors is grouped into multiple industries. These are the sectors with their respective industries:

0 AGRIBUSINESS (A)
	 0 Agricultural Services/Products
	 1 Crop Production & Basic Processing
	 2 Dairy
	 3 Farm bureaus
	 4 Food and kindred products manufacturing
	 5 Food Processing & Sales
	 6 Food stores
	 7 Forestry & Forest Products
	 8 Livestock
	 9 Meat processing & products
	 10 Poultry & Eggs
	 11 Sugar cane & sugar beets
	 12 Tobacco
	 13 Vegetables, fruits and tree nut


1 COMMUNICATIONS/ELECTRONICS (B)
	 0 Book, newspaper & periodical publishing
	 1 Cable & satellite TV production
	 2 Commercial TV & radio stations
	 3 Computer software
	 4 Electronics Mfg & Equip
	 5 Internet
	 6 Motion Picture production & distribution
	 7 Printing & Publishing
	 8 Recorded Music & music production
	 9 Telecom Services
	 10 Telephone Utilities
	 11 TV production
	 12 TV/Movies/Music


2 CONSTRUCTI

In [4]:
# Create sorting id to sort the sectors by topic relevance 
for i, sector in enumerate(sectors):
    sector['sorting_id'] = [7, 9, 8, 10, 0 , 12, 11, 1, 6, 3, 2, 5, 4][i]

for sector in sorted(sectors, key=lambda d: d['sorting_id']):
    print(sector['name'])

Energy & Natural Resources
Ideological/Single-Issue
Misc Business
Lawyers & Lobbyists
Transportation
Other
Labor
Agribusiness
Construction
Communications/Electronics
Defense
Health
Finance, Insurance & Real Estate


In [5]:
# Example organisation names
print(sectors[0]['industries'][0]['lobbying_groups'][0])
print(sectors[0]['industries'][0]['lobbying_groups'][18])

Intl Species Identification System
Surebeam Corp


In [6]:
# Adapt the lobbying group names for improved matching
for i, sector in enumerate(sectors):
    for j, industry in enumerate(sector['industries']):
        for k, organisation in enumerate(industry['lobbying_groups']):
            # Replace abbreviations
            organisation_temp = re.sub('Corp$|Corp ', 'Corporation', organisation)
            organisation_temp = re.sub('Co$|Co ', 'Company', organisation_temp)
            organisation_temp = re.sub('Org$|Org ', 'Organisation', organisation_temp)
            organisation_temp = re.sub('Orgs$|Orgs ', 'Organisations', organisation_temp)
            organisation_temp = re.sub('Progs$|Progs ', 'Programs', organisation_temp)
            organisation_temp = organisation_temp.replace('Assn', 'Association').replace('Cmte', 'Committee').replace('Cltn', 'Coalition')
            organisation_temp = organisation_temp.replace('Wkrs', 'Workers').replace('Natl ', 'National ').replace('Ntl ', 'National ')
            organisation_temp = organisation_temp.replace('Cnty', 'County').replace('Cncil', 'Council')
            organisation_temp = organisation_temp.replace('Fdn', 'Foundation').replace('Fdtns', 'Foundations').replace('Fedn', 'Federation')
            organisation_temp = organisation_temp.replace('Intl', 'International').replace('Mgmt', 'Management').replace('Svc', 'Service')
            organisation_temp = organisation_temp.replace('Cmpnstn', 'Compensation').replace('Imm ', 'Immigration ').replace('Hvy', 'Heavy')
            organisation_temp = organisation_temp.replace('Ind Storage', 'Industrial Storage').replace('Ind Mortgage', 'Independent Mortgage ')
            organisation_temp = organisation_temp.replace('Allied-Ind Chem', 'Allied-Industrial Chemical').replace('Ind ', 'Industry ')
            organisation_temp = organisation_temp.replace('Ctrs', 'Centers').replace(' & ', ' and ').replace('All/', 'Alliance/')
            sectors[i]['industries'][j]['lobbying_groups'][k] = organisation_temp

In [7]:
# Adapted organisation names 
print(sectors[0]['industries'][0]['lobbying_groups'][0])
print(sectors[0]['industries'][0]['lobbying_groups'][18])

International Species Identification System
Surebeam Corporation


### Match lobbing groups to the witnesses by name

In [8]:
# Match perfect matches
count = 0
for i, text in enumerate(tqdm(df)):
    text['witness_affiliation'] = []
    text['witness_sector'] = []
    text['witness_industry'] = []
    text['witness_affiliation'] = []
    for j, witness in enumerate(text['witnesses']):
        match = None
        for sector in sorted(sectors, key=lambda d: d['sorting_id']):
            for industry in sector['industries']:
                for group in industry['lobbying_groups']:
                    if len(first_match([' ' + group.lower()+ '(?:$| |,)'], witness.lower().replace(' & ', ' and '))) > 0:
                        print(i, j, witness, '\n', group, '\n\n')
                        match = group
                        count += 1
                        break
                else:
                    continue
                break
            else:
                continue
            break
        if match is not None:
            text['witness_affiliation'].append(match)
            text['witness_sector'].append(sector['name'])
            text['witness_industry'].append(industry['industry'])
        else:                    
            text['witness_affiliation'].append(None)
            text['witness_sector'].append(None)
            text['witness_industry'].append(None)
            
print(f'\n\nAll done! {count} witnesses matched.')

  0%|          | 0/117 [00:00<?, ?it/s]

0 1 Curry, William B., Director, Ocean and Climate Change Institute, Woods Hole Oceanographic Institution 
 Woods Hole Oceanographic Institution 


0 4 Mote, Ph.D., Philip W., Joint Institute for the Study of the Atmosphere and Ocean, Climate Impacts Group, University of Washington 
 University of Washington 


2 3 Krupp, Fred, President, Environmental Defense Fund 
 Environmental Defense Fund 


3 1 Hawkins, David G., Climate Center Program Director, Natural Resources Defense Council 
 Natural Resources Defense Council 


3 4 Rogers, James, CEO and President, Cinergy Corporation, on behalf of the Edison Electric Institute 
 Edison Electric Institute 


3 5 Trisko, Eugene, United Mine Workers of America 
 United Mine Workers 


3 12 Benson, Steven A., senior research manager, Energy and Environmental Center, University of North Dakota 
 University of North Dakota 


3 17 McGinnis, Jim, managing director, Morgan Stanley 
 Morgan Stanley 


3 18 Monroe, Larry S., program manager, Office 

17 9 Dr. Roger A. Pielke, Jr., Professor of Environmental Studies Program at the University of Colorado and Director of the Center for Science and Technological Policy Research 
 University of Colorado 


18 0 Cinnamon, Barry, CEO, Akeena Solar President, California Solar Energy Industries Association 
 Akeena Solar 


18 1 Hanemann, W. Michael, Chancellor's Professor, Department of Agriculture and Resource Economics, and Goldman School of Public Policy Director, California Climate Change Center at UC Berkeley, University of California, Berkeley 
 University of California 


18 2 Musk, Elon, Chairman of Tesla Motors 
 Tesla Motors 


20 1 Rick Wagoner, chairman and chief executive officer, General Motors Corporation, Detroit, MI 
 General Motors 


20 3 Alan R. Mulally, president and chief executive officer, Ford Motor Company, Dearborn, MI 
 Ford Motor Company 


20 4 Thomas W. LaSorda, chief executive officer and president, Chrysler Group of DaimlerChrysler, Auburn Hills, MI 
 Daimle

38 6 Mary Minette, Director for Environmental Education and Advocacy, Evangelical Lutheran Church in America 
 Evangelical Lutheran Church in America 


38 7 Ford West, President, The Fertilizer Institute 
 Fertilizer Institute 


38 8 John Felmy, Chief Economist, American Petroleum Institute 
 American Petroleum Institute 


38 9 Robert C.Baugh, Executive Director of AFL-CIO Industrial Union Council and Chair of AFL-CIO Energy Task Force 
 AFL-CIO 


38 10 Emily Figdor, Director, Federal Global Warming Program, Environment America 
 Environment America 


38 14 Paul N. Cicio, President, Industrial Energy Consumers of America \1\ 
 Industrial Energy Consumers of America 


39 1 Mr. Michael Morris, Chairman and CEO, American Electric Power 
 American Electric Power 


39 5 Mr. Stuart Dalton, Director, Generation Sector, Electric Power Research Institute 
 Electric Power Research Institute 


40 1 Dr. Camille Parmesan, Associate Professor of Integrative Biology, University of Texas at Au

66 1 Nielson, Dianne R., Ph.D., Energy Advisor, Office of the Governor, Salt Lake City, UT 
 Salt Lake City, UT 


66 2 Schwartz, Eric, Member, Energy Security Leadership Council & Former Co-CEO of Goldman Sachs Asset Management 
 Goldman Sachs 


66 3 Batten, Kit, Ph.D., Senior Fellow, Center for American Progress Action Fund 
 Center for American Progress 


67 0 Jeffrey R. Immelt, Chairman and Chief Executive Officer, General Electric 
 General Electric 


67 1 Jim Rogers, Chairman, President, and Chief Executive Officer, Duke Energy 
 Duke Energy 


67 2 Frances Beinecke, President, Natural Resources Defense Council 
 Natural Resources Defense Council 


67 3 Fred Krupp, President, Environmental Defense Fund 
 Environmental Defense Fund 


67 5 Peter A. Darbee, Chairman, Chief Executive Officer, and President, PG&E Corporation 
 PG&E Corporation 


67 10 John Rowe, President and Chief Executive Officer, Exelon Corporation 
 Exelon Corporation 


67 11 David Crane, President and Chi

90 3 Gayle, Helene, president and CEO, CARE, Atlanta, GA 
 CARE 


91 4 Red Cavaney, Senior Vice President for Government and Public Affairs, ConocoPhillips 
 ConocoPhillips 


91 5 Jim Rogers, Chairman, President and CEO, Duke Energy Corp 
 Duke Energy 


91 6 Frances Beinecke, President, Natural Resources Defense Council 
 Natural Resources Defense Council 


91 7 Meg McDonald, Director, Global Issues, Alcoa Inc 
 Alcoa Inc 


91 8 David Crane, President and CEO, NRG Energy, Inc 
 NRG Energy 


91 10 Paul N. Cicio, President, Industrial Energy Consumers of America 
 Industrial Energy Consumers of America 


91 11 Kevin Knobloch, President, Union of Concerned Scientists 
 Union of Concerned Scientists 


91 13 David Kreutzer, Senior Policy Analyst in Energy Economics and Climate Change, The Heritage Foundation 
 Heritage Foundation 


91 14 Nathaniel Keohane, Director of Economic Policy And Analysis, Environmental Defense Fund 
 Environmental Defense Fund 


91 16 Frank Ackerman, Seni

102 1 Dr. James McCarthy, Professor of Biological Oceanography, Harvard University 
 Harvard University 


102 4 Dr. Lisa Graumlich, Director, School of Natural Resources and the Environment, University of Arizona 
 University of Arizona 


103 1 Stallman, Bob, President, American Farm Bureau Federation; Rice and Cattle Producer, Columbus, TX 
 American Farm Bureau 


103 5 Johnson, Roger, President, National Farmers Union, Washington, D.C 
 National Farmers Union 


103 7 English, Hon. Glenn, CEO, National Rural Electric Cooperative Association, Arlington, VA 
 National Rural Electric Cooperative Association 


103 8 West, Ford B., President, The Fertilizer Institute, Washington, D.C 
 Fertilizer Institute 


104 2 Chad Stone, Ph. D., Chief Economist, Center on Budget and Policy Priorities 
 Center on Budget and Policy Priorities 


106 0 Nat Keohane, Economist, Environmental Defense Fund 
 Environmental Defense Fund 


106 1 Reverend Dr. Mari Castellanos, Minister for Policy Advocacy

In [9]:
# Remove wrongly matched affiliations
for i, text in enumerate(df):
    for j, witness in enumerate(text['witnesses']):
        if text['witness_affiliation'][j] in ['Capital Partners', 'ELLIS', 'General Electric', 'NETWORK', 'Oceanic', 'State of Tennessee', 'State of Wisconsin']:
#             print(i,j, witness, ': ', text['witness_industry'][j])
            text['witness_affiliation'][j] = None
            text['witness_sector'][j] = None
            text['witness_industry'][j] = None

In [10]:
# Summary:
matched = 0
witnesses = 0

for text in df:
    for i, witness in enumerate(text['witnesses']):
        witnesses += 1
        if text['witness_affiliation'][i] != None:
            matched += 1              

print(f'The affiliations of {matched} out of {witnesses} witnesses were successfully matched.\n') #354 

The affiliations of 358 out of 855 witnesses were successfully matched.



In [11]:
# Save the data
with open('CommitteeHearings/hearings_witnesses_contrarians_classified_partial.json', 'w') as file:
    json.dump(df, file)