# Manually match the remaining witnesses into *Open Secrets* lobbying categories

#### This script classifies all witnesses according to the *Open Secrets* lobbying categories and subcategories

In [1]:
from TextCollection import *
import keyboard  # using module keyboard

In [2]:
# # Reinstantiate class after changing the Textcollection.py script
# # Do not run this in the last run! Leads to a conflict with pickle.

# from importlib import reload

# os.chdir('/home/mirjam/OneDrive/congress_committees/ArticleOne/Article_Scripts/Hearings')
# import TextCollection; reload(TextCollection)

# # Reinstantiate class
# t.__class__ = HearingsCollection

In [3]:
# Change directory
os.chdir('../../Data/')

In [4]:
# Run for maximum recursion error during saving

import resource
import sys

print(resource.getrlimit(resource.RLIMIT_STACK))
print(sys.getrecursionlimit())

# May segfault without this line. 0x100 is a guess at the size of each stack frame.
max_rec = 0x100000
resource.setrlimit(resource.RLIMIT_STACK, [0x100 * max_rec, resource.RLIM_INFINITY])
sys.setrecursionlimit(max_rec)

print(sys.getrecursionlimit())

(8388608, -1)
3000
1048576


<br>

## 1) Preparation: Loading, correcting and inspecting the data

In [5]:
t = load('Hearings/04_opensecrets_industries_contributors.pkl')
with open('OpenSecrets/sectors_industries_contributors.json', 'r') as jfile:
    sectors = json.load(jfile)
print('We imported {} hearings and {} sectors of lobbying organisations for the years {} to {}.\n'.format(len(t),len(sectors), sectors[0]['industries'][0]['year'][0], 
                                                                                                        sectors[0]['industries'][0]['year'][-1]))
# Append sectors to hearings collection
t.sectors = sectors

We imported 263 hearings and 13 sectors of lobbying organisations for the years 2003 to 2010.



In [6]:
# Add a new category 'International/Intergovernmental' to sector 11 'Other'

sectors[11]['industries'].append(
    {'industry': 'International/Intergovernmental',
     'id': None,
     'page_url': None,
     'year': None,
     'lobbying_groups_by_year': None,
     'lobbying_groups': None,
     'contributors_by_year': None,
     'contributors': None})

In [7]:
print('Each of these sectors is grouped into multiple industries. These are the sectors with their respective industries:\n')
for i, sector in enumerate(sectors):
    print(i, sector['name'].upper(), '({})'.format(sector['id']))
    for j, industry in enumerate(sector['industries']):
        print('\t', j, industry['industry'])
    print('\n')    

Each of these sectors is grouped into multiple industries. These are the sectors with their respective industries:

0 AGRIBUSINESS (A)
	 0 Agricultural Services/Products
	 1 Crop Production & Basic Processing
	 2 Dairy
	 3 Farm bureaus
	 4 Food and kindred products manufacturing
	 5 Food Processing & Sales
	 6 Food stores
	 7 Forestry & Forest Products
	 8 Livestock
	 9 Meat processing & products
	 10 Poultry & Eggs
	 11 Sugar cane & sugar beets
	 12 Tobacco
	 13 Vegetables, fruits and tree nut


1 COMMUNICATIONS/ELECTRONICS (B)
	 0 Book, newspaper & periodical publishing
	 1 Cable & satellite TV production
	 2 Commercial TV & radio stations
	 3 Computer software
	 4 Electronics Mfg & Equip
	 5 Internet
	 6 Motion Picture production & distribution
	 7 Printing & Publishing
	 8 Recorded Music & music production
	 9 Telecom Services
	 10 Telephone Utilities
	 11 TV production
	 12 TV/Movies/Music


2 CONSTRUCTION (C)
	 0 Architectural services
	 1 Building Materials & Equipment
	 2 Const

In [8]:
# Add witness information missed during parsing
print(t.texts[39]['witnesses'][9])
t.texts[39]['witnesses'][9] = 'Rudolph G. Penner, Ph.D., Senior Fellow, The Urban Institute'
print('>>', t.texts[39]['witnesses'][9], '\n')

print(t.texts[66]['witnesses'][15])
t.texts[66]['witnesses'][15] = 'Brownstein, Mark S., director, Enterprise Strategy, PSEG Service Corporation'
print('>>', t.texts[66]['witnesses'][15], '\n')

print(t.texts[83]['witnesses'][2])
t.texts[83]['witnesses'][2] = 'Allen Fawcett, Environmental Protection Agency'
print('>>', t.texts[83]['witnesses'][2], '\n')

print(t.texts[113]['witnesses'][8])
t.texts[113]['witnesses'][8] = 'Kammen, Daniel M., Transportation Sustainability Research Center'
print('>>', t.texts[113]['witnesses'][8], '\n')

print(t.texts[123]['witnesses'][50])
t.texts[123]['witnesses'][50] = 'Sam Stone, Dairy Industry'
print('>>', t.texts[123]['witnesses'][50])

rudolph g. penner, phd,
>> Rudolph G. Penner, Ph.D., Senior Fellow, The Urban Institute 

Brownstein, Mark S., director, Enterprise Strategy
>> Brownstein, Mark S., director, Enterprise Strategy, PSEG Service Corporation 

Allen Fawcett
>> Allen Fawcett, Environmental Protection Agency 

Kammen, Daniel M
>> Kammen, Daniel M., Transportation Sustainability Research Center 

sam stone
>> Sam Stone, Dairy Industry


In [9]:
# Correct witnesses that have been wrongly classified as Desmog denialists (same name, different person)
print(t.texts[32]['witnesses'][6])
print(t.texts[32]['desmog_witness'][6])
t.texts[32]['desmog_witness'][6] = None
print('>>', t.texts[32]['desmog_witness'][6], '\n')

print(t.texts[151]['witnesses'][5])
print(t.texts[151]['desmog_witness'][5])
t.texts[151]['desmog_witness'][5] = None
print('>>', t.texts[151]['desmog_witness'][5])


Robert Bradley, Director Of International Climate Policy, World Resources Institute
Robert Bradley
>> None 

Mathis, Mark, President, Confluence Energy, Kremmling, Colorado
Mark Mathis
>> None


In [10]:
# Check if Jeffrey Holmstead is correctly classified as a denialist?

print(t.texts[40]['witnesses'][0])
print(t.texts[40]['desmog_witness'][0])
print(t.texts[40]['year'])

# Holmstead's appointment at the EPA was controversial and protested by US Senators due to his previous lobbying work for coal companies. 
# Jeffrey Holmstead has been described as a prime example of a “revolving door lobbyist“—a term that OpenSecrets defines as “a revolving 
# door that shuffles former federal employees into jobs as lobbyists, consultants and strategists just as the door pulls former hired guns 
# into government careers.” [6], [7], [8]
# https://www.desmogblog.com/jeffrey-r-holmstead

# >> Yes, no need to remove him from the denialist witnesses.

Hon. Jeffery Holmstead, Assistant Administrator For Air And Radiation, Environmental Protection Agency
Jeffrey Holmstead
2003


In [11]:
# Correct witnesses that should be classified as Desmog witnesses

# 'International Council for Capital Formation' is an affiliate of the 'American Council for cCpital Formation' 
# see: https://www.desmogblog.com/american-council-for-capital-formation and http://www.iccfglobal.org/pdf/ICCFbrochure1.pdf
print(t.texts[257]['witnesses'][3])
print(t.texts[257]['desmog_organisation'][3])
t.texts[257]['desmog_organisation'][3] = 'International Council for Capital Formation'
print('>>', t.texts[257]['desmog_organisation'][3])

Thorning, Dr. Margo, Managing Director, International Council for Capital Formation
None
>> International Council for Capital Formation


In [12]:
# Correct wrongly classified witnesses 

# Capital Partners >> 'Finance' instead of 'Lobbyists' (>> Organisation name overlap)
for i, text in enumerate(t.texts):
    for j, witness in enumerate(text['witnesses']):
        if text['witness_affiliation'][j] == 'Capital Partners':
#             print(i,j, witness, ': ', text['witness_industry'][j])
            text['witness_affiliation'][j] = None
            text['witness_sector'][j] = None
            text['witness_industry'][j] = None
         
# Green for All >> 'Environmental' instead of 'Non-Proft' (>> Belongs in multiple industries, depending on context.)
for i, text in enumerate(t.texts):
    for j, witness in enumerate(text['witnesses']):
        if text['witness_affiliation'][j] == 'Green for All':
#             print(i,j, witness, ': ', text['witness_industry'][j])
            text['witness_affiliation'][j] = None
            text['witness_sector'][j] = None
            text['witness_industry'][j] = None
            
# General Electric >> 'Misc Manufacturing' instead of 'TV production' (>> Belongs in multiple industries, depending on context.)
for i, text in enumerate(t.texts):
    for j, witness in enumerate(text['witnesses']):
        if text['witness_affiliation'][j] == 'General Electric':
#             print(i,j, witness, ': ', text['witness_industry'][j])
            text['witness_affiliation'][j] = None
            text['witness_sector'][j] = None
            text['witness_industry'][j] = None

In [13]:
# Delete wrong witness entries (charts, articles, position papers...)

for i, text in enumerate([16, 46, 48, 48, 48, 66, 170, 199, 206]):
    false_entry = [4, 4, 1, 2, 3, 8, 26, 6, 6][i]
    print(text, false_entry, t.texts[text]['witnesses'][false_entry])
    t.texts[text]['witnesses'] = [(i, entry)[1] for i, entry in enumerate(t.texts[text]['witnesses']) if i != false_entry]
    t.texts[text]['desmog_witness'] = [(i, entry)[1] for i, entry in enumerate(t.texts[text]['desmog_witness']) if i != false_entry]
    t.texts[text]['desmog_organisation'] = [(i, entry)[1] for i, entry in enumerate(t.texts[text]['desmog_organisation']) if i != false_entry]
    t.texts[text]['witness_affiliation'] = [(i, entry)[1] for i, entry in enumerate(t.texts[text]['witness_affiliation']) if i != false_entry]
    t.texts[text]['witness_sector'] = [(i, entry)[1] for i, entry in enumerate(t.texts[text]['witness_sector']) if i != false_entry]
    t.texts[text]['witness_industry'] = [(i, entry)[1] for i, entry in enumerate(t.texts[text]['witness_industry']) if i != false_entry]
print(f'\n\n{i+1} false entries have been removed.')

# Remove witnesses_MODS and witnesses_transcript variables 
# (these are not accurate anymore, after witnesse have been 
# removed without adapting these variables at an earlier stage)

t.remove_key('witnesses_MODS')
t.remove_key('witnesses_transcript')

16 4 ``Important Transitions in Emitting Countries Over the Coming Century'', of 2007 from Global Energy Technology Strategy, Addressing Climate Change: Phase 2 Findings from an International Public-Private Sponsored Research Program, Battell Memorial Institute
46 4 Chart, Integrated ECO -ECO&lt;INF&gt;2&lt;/INF&gt; Installation
48 1 Position paper, The Economics of Greenhouse Gas Mitigation
48 2 Position paper, Piece on Climate Change for Prospect
48 3 Analysis, The Kyoto Protocol: Impact on EU Emissions and Competitiveness
66 8 Clean Air Act, several organizations
170 26 Page QUESTIONS AND ANSWERS Questions and Answers.......................... 230, 292, 349, 432,
199 6 Nuclear Waste, Safety and Training
206 6 Chart entitled ``Total Electricity Use, per capita, 1960-2001


9 false entries have been removed.


<br>

## 2) Matching: match the witnesses according to the OpenSecrets lobbing groups

### 11 OTHER

In [14]:
# 11 OTHER (W)
# 	 4 Non-Profit Institutions
# 	 0 Civil Servants/Public Officials
# 	 2 Education
#    	 3 For-profit Education
# 	 1 Clergy & Religious Organizations
# 	 5 Retired

In [15]:
# 11.1 OTHER - Clergy & Religious Organizations: 
t.select_industry(11, 1)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['\w+\s?\w+ church',
            'southern baptist theological seminary',
            'religious action center of reform judaism',
            'institute on religion and democracy',
            'interfaith stewardship alliance',
            'virginia interfaith center for public policy',
            'author and historian'] # Barton, David, author and historian: representing evangelicals
t.match_witnesses(keywords, print_witnesses = False)

11.1 OTHER - Clergy & Religious Organizations:

There are 3 Clergy & Religious Organizations witnesses.

8 more Clergy & Religious Organizations witnesses were matched, resulting in a total of 11 witnesses.


In [16]:
# 11.2 OTHER: Education
t.select_industry(11, 2)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# United States research and development agencies
# https://en.wikipedia.org/wiki/List_of_United_States_research_and_development_agencies
# https://en.wikipedia.org/wiki/Federally_funded_research_and_development_centers

keywords = [# Independent Agencies
            'national science foundation', ' nsf',
            'national aeronautics and space administration', ' nasa', 
            'environmental protection agency office of research and development', 
            'intelligence advanced research projects activity', ' iarpa',
            'smithsonian',
             # Department of Agriculture
            'agricultural research service', ' ars',
            'national institute of food and agriculture', ' nifa'
            'economic research service', ' ers',
            'united states forest service research and development', 'united states forest service r&d',
            'rocky mountain research station, forest service',
             # Department of Commerce
            'national institute of standards and technology', ' nist',
            'national oceanic and atmospheric administration', ' noaa',
            # Department of Education
            'institute of education sciences', ' ies',
            'national institute on disability and rehabilitation research', ' nidrr',
            # Department of Energy
            'department of energy office of science', 'doe office of science', ' doe sc',
            'advanced research projects agency-energy', ' arpa-e',
                # National laboratories
            'national \w*\s*\w*\s*\w*\s*laboratory', 'national \w*\s*\w*\s*\w*\s*lab',
            # Department of health and human services
            'national institutes of health', ' nih',
            'national institute for occupational safety and health', ' niosh',
            'food and drug administration science and research programs',
            'agency for healthcare research and quality', ' ahrq',
            'biomedical advanced research and development authority', ' barda',
            # Department of homeland security
            'directorate for science and technology', ' s&t',
            'coast guard research & development center', ' cg rdc',
            # Department of the interior
            'united states geological survey', ' usgs', 'geological survey',
            # Department of justice
            'national institute of justice', ' nij',
            # Department of transportation
            'research and innovative technology administration',
            'federal aviation administration research, engineering, and development',
            'federal highway administration research and technology',
            'scientist(?:.+?)environmental protection agency',
            'researcher(?:.+?)environmental protection agency',
            'research and development, united states environmental protection agency',
            # Veterans affairs
            'veterans health administration office of research and development', ' ord',
            # Multi-agency initiatives
            'office of science and technology', ' ostp',
            'u.s. global change research program', ' usgcrp', 
            'networking and information technology research and development program', ' nitrd',
            'national nanotechnology initiative', ' nni',
            # Judicial branch
            'federal judicial center',
            # Legislative branch
            'house committee on science, space and technology',
            'senate committee on commerce, science, and transportation',
            'office of technology assessment', ' ota',
            # Joint programs
            'carbon cycle scientific steering group',
            # Federally funded research and development centers
            'institute for defense analyses',
            'center for naval (analyses|analysis){1}',
            'national center for atmospheric research',
    
            # Other
            'congressional research service',
            'lawrence berkeley laboratory']

t.match_witnesses(keywords, print_witnesses = False)

count = 0
for i, text in enumerate(t.texts):
    text['state_research'] = []
    for j, witness in enumerate(text['witnesses']):
        text['state_research'].append(0)
        if len(find_string.first_match(keywords, witness.lower())) > 0:
                count += 1
                text['state_research'][j] = 1
                
print('A total of {} state research witnesses were matched.\n\n'.format(count))

# Other research 
keywords = ['national academy of sciences', 'national research council',
            ', (mit)', ' (mit)$',
            '\w+\s?\w+\s?\w+\suniversity',  'university\s\w+\s?\w+',
            '\w+\s?\w+\s?\w+\scollege', 'college\s\w+\s?\w+\s?\w+', 
            '\w*\s?school of\s\w+\s?\w+\s?\w+\s?\w+\s?\w+', '\w+\s?\w+\s?\w+\sschool',
            '\w+ institution of \w+', 
            'graduate institute of international and development studies', 
            'transportation sustainability research center',
            'yale', 'johns hopkins', 'institut pasteur', 
            'kansas coalition for carbon management',
            'american meteorological society']
antikeywords = ['in-q-tel', 'copenhagen consensus center']

t.match_witnesses(keywords, antikeywords, print_witnesses = False)

11.2 OTHER - Education:

There are 155 Education witnesses.

82 more Education witnesses were matched, resulting in a total of 237 witnesses.
A total of 86 state research witnesses were matched.


42 more Education witnesses were matched, resulting in a total of 279 witnesses.


In [17]:
# 11.0 OTHER: Civil Servants/Public Officials
t.select_industry(11,0)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['department of the \w+', 'department of \w+', '\w+ \w+ agency', '\w+ \w+ administration',
                 'office of management and budget', 'administrator, office of \w+', 'city administrator',
                 '\w+ and \w+ commission',  '\w+ \w+ commission', 'commission on \w+', 'commissioner',
                 'committee on \w+', 'committee for \w+ \w+', '\w+ and \w+ \w+ committee',
                  'under secretary for democracy and global affairs', 'new jersey dep',
                  'governmental affairs department', 'environment department',
                  'government accountability office', 'general accounting office',
                  'governors of the federal reserve system',
                  'mayor\,?\s?\w+ \w+ \w+', 'mayor of \w+', 'mayor, \w+',
                  'office of governor', 'governor\,?\s?\w+', 'city manager', 
                  'forest service', 'usda', 'house of representatives',
                  'congressional budget office', 'united states congress', 'director, congr', 
                  'representative from \w+', '\w+ \w+ senate', 
                  'tourism policy council','office of the consumer advocate',
                  'calpers', 'assistant director for macroeconomics analysis',
                  '((white house)? council on environmental quality)', 'national intelligence council',
                  'comptroller general of the united states', 'social security advisory board',
                  'california air resources board', 'board of public utilities',
                  'northeast states for coordinated air use management','\w+ state \w+',
                  '(council o(n|f) \w+ \w+)','adirondack council', 'hoopa valley tribe',
                  '((u.s.|united states) (army|navy))', '\w+ \w+ chamber of commerce',
                  'metropolitan water district of southern california',
                  'northern colorado water conservancy district']

antikeywords = ['national association of regulatory utility commissioners', 'inc\.|inc$', 'incorporated', 'international energy agency', 
                      'former', 'retired', '(ret\.)']

t.match_witnesses(keywords, antikeywords, print_witnesses = False)

11.0 OTHER - Civil Servants/Public Officials:

There are 42 Civil Servants/Public Officials witnesses.

332 more Civil Servants/Public Officials witnesses were matched, resulting in a total of 374 witnesses.


In [18]:
# 11.4 OTHER - Non-Profit Institutions
t.select_industry(11, 4)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['realizing the dream', 'association of snow travelers',
            'national ski areas association', 'institute for multi-track diplomacy',
            'league of american bicyclists', 'appliance standards awareness project',
            'empower consumers', 'freedom to roam', 'center for global development',
            'institute for multi-track diplomacy', 'regulatory assistance project',
            'carnegie endowment for international peace', 'the concord coalition', 
            'peterson institute for international economics', 'urban institute',
            'actionaid', 'american recreation coalition', 'adaptation network',
            'natural resources stewardship project', 'rural economic development',
            'center on budget and policy priorities', 'federation of american scientists',
            'social security task force', 'government accountability project',
            'brookings institution', 'the brookings institute', 
            'center on budget policies and priorities', 'energy program, public citizen',
            'center for budget and policy priorities', 'chesapeake bay maritime museum',
            
            'citizens for tax justice', 'institute of the north', 
            # Denialist
            'center for climate/ocean resources study', 'copenhagen consensus center',
            ]

t.match_witnesses(keywords, print_witnesses = False)

11.4 OTHER - Non-Profit Institutions:

There are 8 Non-Profit Institutions witnesses.

44 more Non-Profit Institutions witnesses were matched, resulting in a total of 52 witnesses.


In [19]:
# 11.1 OTHER - Retired: 
t.select_industry(11, 5)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['retired', '\(ret.\)', 'former', 
            'mahoney, james r., ph.d., (environmental consultant)']
antikeywords = ['\(retired\);', '\(ret.\), (president|chairman)', 'inc\.', 
                'fellow', 'member', 'director', 'president']

t.match_witnesses(keywords, antikeywords, print_witnesses = False)

11.5 OTHER - Retired:

There are 0 Retired witnesses.

16 more Retired witnesses were matched, resulting in a total of 16 witnesses.


In [20]:
# 11.6 OTHER - International/Intergovernmental
t.select_industry(11, 6)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['ipcc', 'intergovernmental panel on climate change', 
            'international energy agency', 'united arab emirates',
            '\w+\s?\w+\s?\w+\s?\w+\s?\w+\s?\w+, united kingdom',
            'government of the republic of maldives', 'european commission',
            'u.k. department for environment, food, and rural affairs',
            'official negotiator, u.n. framework convention on climate change and the kyoto protocol',
            'minister for environment, nature conservation and nuclear safety, federal republic of germany']#,'\w+\s?\w+\s?\w+\s?\w+\s?\w+']
antikeywords = ['london school of economics']
          
t.match_witnesses(keywords, print_witnesses = False)
# 14 witnesses

11.6 OTHER - International/Intergovernmental:

There are 0 International/Intergovernmental witnesses.

14 more International/Intergovernmental witnesses were matched, resulting in a total of 14 witnesses.


In [21]:
# Search for further matches

keywords = ['']
antikeywords = []

t.search_witnesses(keywords, antikeywords)

0 witnesses were found.


### 3 DEFENSE

In [22]:
# 3.0 DEFENSE: Defense Aerospace
t.select_industry(3, 0)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['aerospace and defense industry', 'honeywell']

t.match_witnesses(keywords, print_witnesses = False)

3.0 DEFENSE - Defense Aerospace:

There are 8 Defense Aerospace witnesses.

2 more Defense Aerospace witnesses were matched, resulting in a total of 10 witnesses.


### 7 IDEOLOGY/SINGLE-ISSUE

In [23]:
# 7 IDEOLOGICAL/SINGLE-ISSUE (Q)
# 	 16 Republican/Conservative
# 	 5 Democratic/Liberal
# 	 12 Leadership PACs
#    	 4 Democratic leadership PAC
#    	 15 Republican leadership PAC
# 	 7 Foreign & Defense Policy
# 	 13 Pro-Israel
# 	 17 Women's Issues
# 	 11 Human Rights
#    	 8 Gay & lesbian rights & issues
# 	 6 Environment
# 	 9 Gun Control
# 	 10 Gun Rights
# 	 0 Abortion Policy/Anti-Abortion
# 	 1 Abortion Policy/Pro-Abortion Rights
# 	 2 Candidate Committees
#    	 14 Republican Candidate Committees
#    	 3 Democratic Candidate Committees

In [24]:
# 7.6 IDEOLOGY/SINGLE-ISSUE: Environment
t.select_industry(7,6)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['resources for the future', 'world resources institute', 
            'conservation international', 'pew environmental',
            'pew center on global climate change', 'pew center', 
            'western organization of resource councils', 'ceres', 
            'american council for an energy[-\s]{1}efficient economy', 'aceee',
            'environmental resources trust', 'heinz \w*\s*center',
            'environmental defense', 'energy future coalition',
            'energy future coalition', 'oxfam', 'sonoran institute',
            'climate central', 'renewable energy alaska project',
            'alaska conservation solutions', 'wetlands watch',
            'wilderness workshop', 'wildlife management institute',
            'new hampshire wildlife federation', 'pacific environment',
            'pacific institute', 'winter wildlands alliance',
            'society for conservation biology', 'the climate registry',
            'forest climate working group', 'institute for applied ecology',
            'climate action reserve', 'coalition for green capital',
            'national religious partnership for the environment',
            'evangelical climate initiative', ' venice charter fishing',
            'waterkeepers alliance', 'watershed research and training center',
            'international fund for animal welfare', 'green for all',
            'national mitigation banking association', 'apollo alliance',
            'tomales bay institute', 'polar oceans research group',
            'institute for energy and environmental research',
            # individuals
            '\w+ a?\.?\s?gore', 'gore, \w+\. \w+', 'marshall herskovitz']

t.match_witnesses(keywords, print_witnesses = False)

7.6 IDEOLOGICAL/SINGLE-ISSUE - Environment:

There are 110 Environment witnesses.

106 more Environment witnesses were matched, resulting in a total of 216 witnesses.


In [25]:
# 7.7 IDEOLOGICAL/SINGLE-ISSUE - Foreign & Defense Policy
t.select_industry(7,7)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['center for strategic and budgetary assessment', 'military advisory board',
            'association of the united states army', 'truman national security project',
            'the atlantic council', 'center for a new american security',
            'american security project', 'woodrow wilson international',
            'center for strategic and international studies', 
            'nonproliferation policy education center']

t.match_witnesses(keywords, print_witnesses = False)

7.7 IDEOLOGICAL/SINGLE-ISSUE - Foreign & Defense Policy:

There are 2 Foreign & Defense Policy witnesses.

13 more Foreign & Defense Policy witnesses were matched, resulting in a total of 15 witnesses.


In [26]:
# 7.16 IDEOLOGICAL/SINGLE-ISSUE - Republican/Conservative
t.select_industry(7,16)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['american enterprise institute', 'competitive enterprise institute',
           'cato institute', 'americans for tax reform', 'the heartland institute',
           'business & media institute']

t.match_witnesses(keywords, print_witnesses = False)

7.16 IDEOLOGICAL/SINGLE-ISSUE - Republican/Conservative:

There are 9 Republican/Conservative witnesses.

26 more Republican/Conservative witnesses were matched, resulting in a total of 35 witnesses.


In [27]:
# Search for further matches

keywords = ['']
antikeywords = []

t.search_witnesses(keywords, antikeywords)

0 witnesses were found.


### 8 LABOR

In [28]:
# 8 LABOR (P)
# 	 0 Air transport unions
# 	 1 Building Trade Unions
# 	 2 Industrial Unions
# 	 3 Misc Unions
# 	 4 Public Sector Unions
# 	 5 Teachers unions
# 	 6 Transportation Unions
# 	 7 US Postal Service unions & associations

In [29]:
# 8.0 LABOR: Air transport unions
t.select_industry(8, 0)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['professional aviation safety specialists']

t.match_witnesses(keywords, print_witnesses = False)

8.0 LABOR - Air transport unions:

There are 2 Air transport unions witnesses.

1 more Air transport unions witnesses were matched, resulting in a total of 3 witnesses.


In [30]:
# 8.2 LABOR: Industrial Unions
t.select_industry(8, 2)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['(united automobile, aerospace (and|&) agricultural implement workers of america)',
            'united steel workers', 'international brotherhood of boilermakers',
           ' laborers\' international union']

t.match_witnesses(keywords, print_witnesses = False)

8.2 LABOR - Industrial Unions:

There are 8 Industrial Unions witnesses.

8 more Industrial Unions witnesses were matched, resulting in a total of 16 witnesses.


In [31]:
# 8.3 LABOR: Misc Unions
t.select_industry(8, 3)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['farmers union']

t.match_witnesses(keywords, print_witnesses = False)

8.3 LABOR - Misc Unions:

There are 0 Misc Unions witnesses.

1 more Misc Unions witnesses were matched, resulting in a total of 1 witnesses.


In [32]:
# 8.4 LABOR: Public Sector Unions
t.select_industry(8, 4)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['national association of \w+ \w+\s?\w+']

t.match_witnesses(keywords, print_witnesses = False)

8.4 LABOR - Public Sector Unions:

There are 2 Public Sector Unions witnesses.

6 more Public Sector Unions witnesses were matched, resulting in a total of 8 witnesses.


In [33]:
# 8.6 LABOR: Transportation Unions
t.select_industry(8, 6)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['transportation trades department, afl- cio']

t.match_witnesses(keywords, print_witnesses = False)

8.6 LABOR - Transportation Unions:

There are 0 Transportation Unions witnesses.

1 more Transportation Unions witnesses were matched, resulting in a total of 1 witnesses.


In [34]:
# Search for further matches

keywords = ['']
antikeywords = []

t.search_witnesses(keywords, antikeywords)

0 witnesses were found.


### 4 ENERGY/NAT RESOURCE

In [35]:
# 4.0 ENERGY/NAT RESOURCE: Alternate energy production & services
t.select_industry(4, 0)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['confluence energy', '\w+ windpower', 'utc power', 'biofuels \w+',
            '\w*\s?\w*solar', 'gasification \w+\s?\w+', '1366 technologies',
            'iberdrola renewables', 'renewable ventures', 'energy ventures',
            'sunedison', 'gore and associates', 'sunworks, llc', 'orpc alaska',
            'columbia energy partners', 'saic--energy solutions group',
            'colorado river energy distributors association', 'platinum ethanol',
            'nrg systems',
            # Nuclear
            'babcock and wilcox', 'enrichment corporation']

t.match_witnesses(keywords, print_witnesses = False)

4.0 ENERGY & NATURAL RESOURCES - Alternate energy production & services:

There are 34 Alternate energy production & services witnesses.

29 more Alternate energy production & services witnesses were matched, resulting in a total of 63 witnesses.


In [36]:
# 4.1 ENERGY/NAT RESOURCE: Coal Mining
t.select_industry(4, 1)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['\w*\s?\w*coal \w*', 'buckeye industrial mining co' ]

t.match_witnesses(keywords, print_witnesses = False)

4.1 ENERGY & NATURAL RESOURCES - Coal mining:

There are 10 Coal mining witnesses.

6 more Coal mining witnesses were matched, resulting in a total of 16 witnesses.


In [37]:
# 4.2 ENERGY/NAT RESOURCE: Electric Utilities
t.select_industry(4, 2)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['((\w+\s?\w+\s?\w+ )?authority( \w+\s?\w+)?)',
            '((\w+ )?\w*power (\w+\s?\w+)?)', '\w+ public service \w+',
            '((mid\s?american|cps|og&e|txu|cal|shell|pepco|fpl)\s?energy)',
            'exelon', 'memphis light, gas, and water', 'powerspan',
            'electric cooperative', '\w+\s?\w+\s?\w+ fuels \w+', 'national grid',
            'electricite de france', 'pacific gas and electric \w+',
            ', (\w+\s?&?\s?\w+ nuclear)', 'old dominion cooperative',
            'independent transmission system operator', 'tenaska', 'eon energie',
            'midamerican corporation', ' jea$', 'generators for clean air',
            'futuregen alliance','purgen one', 'pseg service corporation']

antikeywords = ['small power consultants', 'empower', 'air power systems']

t.match_witnesses(keywords, antikeywords, print_witnesses = False)

4.2 ENERGY & NATURAL RESOURCES - Electric Utilities:

There are 89 Electric Utilities witnesses.

47 more Electric Utilities witnesses were matched, resulting in a total of 136 witnesses.


In [38]:
# 4.3 ENERGY/NAT RESOURCE: Mining
t.select_industry(4, 3)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['rio tinto', 'kennecott energy', 'alcoa \w+\s?\w+', 'quaterra corporation']

t.match_witnesses(keywords, print_witnesses = False)

4.3 ENERGY & NATURAL RESOURCES - Mining:

There are 5 Mining witnesses.

6 more Mining witnesses were matched, resulting in a total of 11 witnesses.


In [39]:
# 4.4 ENERGY/NAT RESOURCE: Natural Gas transmission & distribution
t.select_industry(4, 4)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['piedmont natural gas company', 'transcanada pipeline',
            'pipeline and industrial group', 'kinder morgan']

t.match_witnesses(keywords, print_witnesses = False)
# Natural Gas transmission & distribution could be included into the main category oil & gas as in the MoC financial data

4.4 ENERGY & NATURAL RESOURCES - Natural Gas transmission & distribution:

There are 5 Natural Gas transmission & distribution witnesses.

4 more Natural Gas transmission & distribution witnesses were matched, resulting in a total of 9 witnesses.


In [40]:
# 4.5 ENERGY/NAT RESOURCE: Oil & Gas
t.select_industry(4, 5)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['(\w+ oil (\w+\s?\w+\s?\w+)?)', ', (shell(\s|$)\w*\s?\w*)',
           'independent petroleum association \w+\s?\w+\s?\w+', 'chevron',
           'jireh resources', 'continental resources', 'conoco- phillips',
           'advanced resources international', 'xto energy', 'cota & cota',
           'suez lng', 'unimark', 'countrymark']

t.match_witnesses(keywords, print_witnesses = False)

4.5 ENERGY & NATURAL RESOURCES - Oil & Gas:

There are 22 Oil & Gas witnesses.

19 more Oil & Gas witnesses were matched, resulting in a total of 41 witnesses.


In [41]:
# 4.6 ENERGY/NAT RESOURCE: Waste Management
t.select_industry(4, 6)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['advanced waste management systems', 'ses, inc',
            'cbl industrial services']

t.match_witnesses(keywords, print_witnesses = False)

4.6 ENERGY & NATURAL RESOURCES - Waste Management:

There are 3 Waste Management witnesses.

4 more Waste Management witnesses were matched, resulting in a total of 7 witnesses.


In [42]:
# Search for further matches
keywords = []
antikeywords = []

t.search_witnesses(keywords, antikeywords)
# t.print_key('content', )

0 witnesses were found.


### 0 AGRIBUSINESS

In [43]:
# 0 AGRIBUSINESS (A)
# 	 1 Crop Production & Basic Processing
#    	 13 Vegetables, fruits and tree nut
#    	 11 Sugar cane & sugar beets
# 	 12 Tobacco
# 	 2 Dairy
# 	 10 Poultry & Eggs
# 	 8 Livestock
# 	 0 Agricultural Services/Products
#    	 3 Farm bureaus
# 	 5 Food Processing & Sales
#    	 4 Food and kindred products manufacturing
#    	 6 Food stores
#    	 9 Meat processing & products
# 	 7 Forestry & Forest Products

# >> No witnesses in the subcategories >> these can be deleted.

In [44]:
# 0.1 AGRIBUSINESS - Crop Production & Basic Processing
t.select_industry(0, 1)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['\S*\s*producer', 'pacific northwest direct seed association',
           'cargill', 'vineyard', 'grain and input cooperative',
           'grain growers association', 'soybean association']


t.match_witnesses(keywords,  print_witnesses = False)

0.1 AGRIBUSINESS - Crop Production & Basic Processing:

There are 8 Crop Production & Basic Processing witnesses.

81 more Crop Production & Basic Processing witnesses were matched, resulting in a total of 89 witnesses.


In [45]:
# 0.2 AGRIBUSINESS - Dairy
t.select_industry(0, 2)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['brubaker farms', '(dairy(.+?)producer)' , 'dairy \w+']

t.match_witnesses(keywords, print_witnesses = False)

0.2 AGRIBUSINESS - Dairy:

There are 0 Dairy witnesses.

3 more Dairy witnesses were matched, resulting in a total of 3 witnesses.


In [46]:
# 0.5 AGRIBUSINESS - Food Processing & Sales
t.select_industry(0, 5)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['northwest food processors association']


t.match_witnesses(keywords,  print_witnesses = False)

0.5 AGRIBUSINESS - Food Processing & Sales:

There are 0 Food Processing & Sales witnesses.

1 more Food Processing & Sales witnesses were matched, resulting in a total of 1 witnesses.


In [47]:
# 0.7 AGRIBUSINESS - Forestry & Forest Products
t.select_industry(0, 7)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['(?:on behalf of )?(\w*\s*\w*\s*\w*\s*forest(?!,)\s*\w*\s*\w*\s*\w*\s*\w*\s*\w*)',
           'lumber inc', 'neiman enterprises']

t.match_witnesses(keywords, print_witnesses = False)

0.7 AGRIBUSINESS - Forestry & Forest Products:

There are 3 Forestry & Forest Products witnesses.

8 more Forestry & Forest Products witnesses were matched, resulting in a total of 11 witnesses.


In [48]:
# 0.8 AGRIBUSINESS - Livestock
t.select_industry(0, 8)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['livestock \w+', '(cattle(.+?)producer)', '(pork(.+?)producer)',
           '(beef(.+?)producer)', '(lamb(.+?)producer)', 'shellfish farmer']

t.match_witnesses(keywords, print_witnesses = False)

0.8 AGRIBUSINESS - Livestock:

There are 0 Livestock witnesses.

2 more Livestock witnesses were matched, resulting in a total of 2 witnesses.


In [49]:
# Search for further matches
keywords = ['']
antikeywords = []

t.search_witnesses(keywords, antikeywords)

0 witnesses were found.


### 12 TRANSPORTATION 

In [50]:
# 12 TRANSPORTATION (M)
# 	 0 Air Transport (11)
#    	 1 Airlines (0)
# 	 5 Automotive (0)
#    	 4 Auto manufacturers (9)
#    	 3 Auto dealers, new & used (0)
#    	 2 Auto dealers, foreign imports (0)
# 	 9 Trucking (1)
# 	 7 Railroads (5)
# 	 8 Sea Transport (2)
#    	 6 Cruise ships & lines (0)

In [51]:
# 12.0 TRANSPORTATION - Air Transport
t.select_industry(12,0)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['\w+ \w+ \w+ airport', 'air transport association', 'united airlines']

t.match_witnesses(keywords, print_witnesses = False)

12.0 TRANSPORTATION - Air Transport:

There are 11 Air Transport witnesses.

6 more Air Transport witnesses were matched, resulting in a total of 17 witnesses.


In [52]:
# 12.5 TRANSPORTATION - Automotive
t.select_industry(12,5)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['air power systems co', 'pridgeon & clay', 'bus lines', 
            'the community bus service', 'american highway users alliance',
            'toyota', 'honda', 'better place', 'suv owners of america']

t.match_witnesses(keywords, print_witnesses = False)

12.5 TRANSPORTATION - Automotive:

There are 0 Automotive witnesses.

11 more Automotive witnesses were matched, resulting in a total of 11 witnesses.


In [53]:
# 12.7 TRANSPORTATION - Railroad
t.select_industry(12,7)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['american public transportation association', 'colorado railcar']

t.match_witnesses(keywords, print_witnesses = False)

12.7 TRANSPORTATION - Railroads:

There are 5 Railroads witnesses.

4 more Railroads witnesses were matched, resulting in a total of 9 witnesses.


In [54]:
# 112.9 TRANSPORTATION - Trucking
t.select_industry(12,9)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['hahn transportation', 'watkins and shepard trucking', 'con-way']

t.match_witnesses(keywords, print_witnesses = False)

12.9 TRANSPORTATION - Trucking:

There are 1 Trucking witnesses.

4 more Trucking witnesses were matched, resulting in a total of 5 witnesses.


In [55]:
# 12.8 TRANSPORTATION - Sea Transport
t.select_industry(12,8)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['port fourchon', 'maersk']

t.match_witnesses(keywords, print_witnesses = False)

12.8 TRANSPORTATION - Sea Transport:

There are 2 Sea Transport witnesses.

2 more Sea Transport witnesses were matched, resulting in a total of 4 witnesses.


In [56]:
# Search for further matches
keywords = ['']
antikeywords = []

t.search_witnesses(keywords, antikeywords)

0 witnesses were found.


### 2 CONSTRUCTION

In [57]:
# 2 CONSTRUCTION (C)
# 	 0 Architectural services
# 	 1 Building Materials & Equipment
# 	 2 Construction Services
# 	 3 General Contractors
# 	 4 Home Builders
# 	 5 Special Trade Contractors

In [58]:
# 2.1 CONSTRUCTION - Building Materials & Equipment
t.select_industry(2,1)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['headwaters incorporated', 'holcim cement']

t.match_witnesses(keywords, print_witnesses = False)

2.1 CONSTRUCTION - Building Materials & Equipment:

There are 0 Building Materials & Equipment witnesses.

3 more Building Materials & Equipment witnesses were matched, resulting in a total of 3 witnesses.


### 5 FINANCE, INSURANCE & REAL ESTATE

In [59]:
# 5 FINANCE, INSURANCE & REAL ESTATE (F)
# 	 1 Commercial Banks
# 	 11 Savings & Loans
# 	 2 Credit Unions
# 	 3 Finance/Credit Companies
#    	 13 Student loan companies
#    	 8 Payday lenders
# 	 12 Securities & Investment
#    	 14 Venture capital
#    	 4 Hedge Funds
#    	 9 Private Equity & Investment Firms
# 	 5 Insurance
# 	 10 Real Estate
#    	 7 Mortgage bankers and brokers
# 	 0 Accountants
# 	 6 Misc Finance

In [60]:
# 5.1 FINANCE, INSURANCE & REAL ESTATE - Accountants
t.select_industry(5,0)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['']

t.match_witnesses(keywords, print_witnesses = False)

5.0 FINANCE, INSURANCE & REAL ESTATE - Accountants:

There are 0 Accountants witnesses.

0 more Accountants witnesses were matched, resulting in a total of 0 witnesses.


In [61]:
# 5.1 FINANCE, INSURANCE & REAL ESTATE - Commercial Banks
t.select_industry(5,1)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['td bank']

t.match_witnesses(keywords, print_witnesses = False)

5.1 FINANCE, INSURANCE & REAL ESTATE - Commercial Banks:

There are 0 Commercial Banks witnesses.

1 more Commercial Banks witnesses were matched, resulting in a total of 1 witnesses.


In [62]:
# 5.5 FINANCE, INSURANCE & REAL ESTATE - Insurance
t.select_industry(5,5)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['insurance agent', 'willis of texas', 'swiss re']

t.match_witnesses(keywords, print_witnesses = False)

5.5 FINANCE, INSURANCE & REAL ESTATE - Insurance:

There are 2 Insurance witnesses.

4 more Insurance witnesses were matched, resulting in a total of 6 witnesses.


In [63]:
# 5.6 FINANCE, INSURANCE & REAL ESTATE - Misc Finance
t.select_industry(5,6)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['eurex clearing', 'nasdaq', 'minneapolis grain exchange', 
            'investor responsibility research center',  'clearwater',
            'securities industry and financial markets association',
            'natsource', 'new energy finance']

t.match_witnesses(keywords, print_witnesses = False)

5.6 FINANCE, INSURANCE & REAL ESTATE - Misc Finance:

There are 2 Misc Finance witnesses.

10 more Misc Finance witnesses were matched, resulting in a total of 12 witnesses.


In [64]:
# 5.10 FINANCE, INSURANCE & REAL ESTATE - Real Estate
t.select_industry(5,10)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['malkin holdings']

t.match_witnesses(keywords, print_witnesses = False)

5.10 FINANCE, INSURANCE & REAL ESTATE - Real Estate:

There are 0 Real Estate witnesses.

1 more Real Estate witnesses were matched, resulting in a total of 1 witnesses.


In [65]:
# 5.12 FINANCE, INSURANCE & REAL ESTATE - Securities & Investment
t.select_industry(5,12)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['masters capital management', 'jp morgan chase', 'icap energy',
            'friedman billings ramsey and company', 'miller buckfire & co',
            'missionpoint capital partners']

t.match_witnesses(keywords, print_witnesses = False)

5.12 FINANCE, INSURANCE & REAL ESTATE - Securities & Investment:

There are 34 Securities & Investment witnesses.

7 more Securities & Investment witnesses were matched, resulting in a total of 41 witnesses.


In [66]:
# 5.14 FINANCE, INSURANCE & REAL ESTATE - Venture capital
t.select_industry(5,14)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['arch venture partners', 'vantagepoint venture partners',
            'in-q-tel', 'new energy associates', '\w+ capital partners',
            'kleiner perkins caufield & byers']

t.match_witnesses(keywords, print_witnesses = False)

5.14 FINANCE, INSURANCE & REAL ESTATE - Venture capital:

There are 0 Venture capital witnesses.

9 more Venture capital witnesses were matched, resulting in a total of 9 witnesses.


In [67]:
# Search for further matches
keywords = ['']
antikeywords = []

t.search_witnesses(keywords, antikeywords)

0 witnesses were found.


### 10 MISC BUSINESS

In [68]:
# 10 MISC BUSINESS (N)

# 	 2 Business Associations
# 	 8 Food & Beverage
#    	 18 Restaurants & drinking establishments
# 	 1 Beer, Wine & Liquor
# 	 19 Retail Sales
# 	 15 Misc Services
#    	 9 Funeral services
# 	 3 Business Services
#    	 0 Advertising & public relations services
# 	 17 Recreation/Live Entertainment
#    	 16 Professional sports, arenas & related equip & svcs
# 	 4 Casinos/Gambling
#    	 10 Indian Gaming
# 	 11 Lodging/Tourism
# 	 12 Marijuana
# 	 13 Marijuana
# 	 7 Correctional facilities constr & mgmt/for-profit
# 	 5 Chemical & Related Manufacturing
# 	 20 Steel Production
# 	 14 Misc Manufacturing & Distributing
#    	 6 Clothing & accessories
# 	 21 Textiles

In [69]:
# 10.2 MISC BUSINESS - Business Association
t.select_industry(10, 2)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['the chlorine institute', 'the center for small business and the environment',
            'institute of clean air companies', 'chamber of commerce',
            'small business tax compliance & fairness coalition', 
            'international council for capital formation']

t.match_witnesses(keywords, print_witnesses = False)

10.2 MISC BUSINESS - Business Associations:

There are 22 Business Associations witnesses.

7 more Business Associations witnesses were matched, resulting in a total of 29 witnesses.


In [70]:
# 10.5 MISC BUSINESS - Chemical & Related Manufacturing
t.select_industry(10, 5)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['dow corning', 'zaclon chemical', 'dupont']

t.match_witnesses(keywords, print_witnesses = False)

10.5 MISC BUSINESS - Chemical & Related Manufacturing:

There are 1 Chemical & Related Manufacturing witnesses.

3 more Chemical & Related Manufacturing witnesses were matched, resulting in a total of 4 witnesses.


In [71]:
# 10.11 MISC BUSINESS - Lodging/Tourism
t.select_industry(10, 11)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['aspen skiing company']

t.match_witnesses(keywords, print_witnesses = False)

10.11 MISC BUSINESS - Lodging/Tourism:

There are 2 Lodging/Tourism witnesses.

1 more Lodging/Tourism witnesses were matched, resulting in a total of 3 witnesses.


In [72]:
# 10.14 MISC BUSINESS - Misc Manufacturing & Distributing
t.select_industry(10, 14)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['energy[\s-]{1}intensive manufacturers', 'manufacturers association',
            'general electric', 'ge global research', 'factory service agency', 
            'warwick mills', 'siemens', 'ess llc', 'lighting science group corporation',
            'timberland', 'target', 'aircuity', 'zumiez']

t.match_witnesses(keywords, print_witnesses = False)

10.14 MISC BUSINESS - Misc Manufacturing & Distributing:

There are 10 Misc Manufacturing & Distributing witnesses.

26 more Misc Manufacturing & Distributing witnesses were matched, resulting in a total of 36 witnesses.


In [73]:
# 10.15 MISC BUSINESS - Misc Services
t.select_industry(10, 15)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['terrapass', 'planktos', 'cook inlet region inc.', 'google',
            'fairbanks economic development corporation',
            'schlumberger carbon services',
            # Consultancies
            'energy and environmental analysis', 'f.l. fernandez', 
            'ocean associates', 'industrial economics', 'meagher & flom',
            'cambridge energy research associates', 'orbis energy advisors',
            'dhe consulting', 'nativeenergy', 'standard credit group',
            'arduin, laffer and moore econometrics', 'point carbon',  ', (pace)$',
            'ada environmental solutions', 'the lindsey group', 'climate advisers',
            'cra international', 'charles river associates', 'small power consultants']

t.match_witnesses(keywords, print_witnesses = False)

10.15 MISC BUSINESS - Misc Services:

There are 0 Misc Services witnesses.

36 more Misc Services witnesses were matched, resulting in a total of 36 witnesses.


In [74]:
# 10.20 MISC BUSINESS - Steel Production
t.select_industry(10, 20)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['\w+-?\w+\s?\w+\s?steel \w+', 'dowding']

t.match_witnesses(keywords, print_witnesses = False)

10.20 MISC BUSINESS - Steel Production:

There are 3 Steel Production witnesses.

3 more Steel Production witnesses were matched, resulting in a total of 6 witnesses.


In [75]:
# Search for further matches
keywords = ['']
antikeywords = []

t.search_witnesses(keywords, antikeywords)
# t.print_key('content', 250)

0 witnesses were found.


### 9 LAWYERS & LOBBYISTS

In [76]:
# 9 LAWYERS & LOBBYISTS (K)
# 	 0 Lawyers/Law Firms
# 	 1 Lobbyists

In [77]:
# 9.0 LAWYERS & LOBBYISTS - Lawyers/Law Firms
t.select_industry(9, 0)

t.count_witnesses('witness_industry', t.industry, print_witnesses = False)

# Match witnesses
keywords = ['latham & watkins', 'baker and hostetler', 'crowell and moring',
           'cravath, swaine, and moore']

t.match_witnesses(keywords, print_witnesses = False)

9.0 LAWYERS & LOBBYISTS - Lawyers/Law Firms:

There are 7 Lawyers/Law Firms witnesses.

4 more Lawyers/Law Firms witnesses were matched, resulting in a total of 11 witnesses.


In [78]:
# 9.1 LAWYERS & LOBBYISTS - Lobbyists
t.select_industry(9, 1)

t.count_witnesses('witness_industry', t.industry, print_witnesses = True)

# Match witnesses
keywords = ['bonner & associates']

t.match_witnesses(keywords, print_witnesses = False)

9.1 LAWYERS & LOBBYISTS - Lobbyists:

There are 3 Lobbyists witnesses.

9 2 The Honorable Carol Browner, Principal, The Albright Group LLC
35 2 Browner, Carol M., Principal, The Albright Group, LLC
231 41 James Kerr, Partner, McGuire Woods LLP, Former Commissioner, North Carolina Public Utilities Commission


1 more Lobbyists witnesses were matched, resulting in a total of 4 witnesses.


### 1 COMMUNICATIONS/ELECTRONICS

In [79]:
# 1 COMMUNICATIONS/ELECTRONICS (B)
# 	 0 Book, newspaper & periodical publishing
# 	 1 Cable & satellite TV production
# 	 2 Commercial TV & radio stations
# 	 3 Computer software
# 	 4 Electronics Mfg & Equip
# 	 5 Internet
# 	 6 Motion Picture production & distribution
# 	 7 Printing & Publishing
# 	 8 Recorded Music & music production
# 	 9 Telecom Services
# 	 10 Telephone Utilities
# 	 11 TV production
# 	 12 TV/Movies/Music

In [80]:
# 1.12 COMMUNICATIONS/ELECTRONICS - TV/Movies/Music
t.select_industry(1, 12)

t.count_witnesses('witness_industry', t.industry, print_witnesses = True)

# Match witnesses
keywords = ['kusi']

t.match_witnesses(keywords, print_witnesses = True)

1.12 COMMUNICATIONS/ELECTRONICS - TV/Movies/Music:

There are 0 TV/Movies/Music witnesses.



136 1 Coleman, John, Senior Meteorologist, KUSI, San Diego, California 
 Kusi 


1 more TV/Movies/Music witnesses were matched, resulting in a total of 1 witnesses.


In [81]:
# Search for further matches
keywords = [' ']
antikeywords = []
t.search_witnesses(keywords, antikeywords)
# t.print_key('content', index)

0 witnesses were found.


In [82]:
# save_as(t, 'Hearings/05_all_witnesses_temp.pkl') # Last completed on Dec 14, 2020
# # t = load('05_all_witnesses_temp.pkl')

In [83]:
# Summary:
matched = 0
witnesses = 0

for text in t.texts:
    for i, witness in enumerate(text['witnesses']):
        witnesses += 1
        if text['witness_affiliation'][i] != None:
            matched += 1              

print('The affiliations of {} out of {} witnesses were successfully matched.\n\n'.format(matched, witnesses))

# Dec 01:  1338 out of 1789 
# Dec 02:  1339 out of 1789 
# Dec 03:  1375 out of 1789 
# Dec 03:  1500 out of 1789 
# Dec 14:  1601 out of 1789
# Dec 15:  1767 out of 1789
# Dec 16:  1780 out of 1780

The affiliations of 1780 out of 1780 witnesses were successfully matched.




In [84]:
# for i, text in enumerate(t.texts):
#     for j, desmog_witness in enumerate(text['desmog_witness']):
#         if desmog_witness != None:
#             print(i, j, desmog_witness, ': ', text['witness_industry'][j],'- ',  text['witness_affiliation'][j], '\n\n')

In [85]:
for sector in sectors:
    t.count_witnesses('witness_sector',  sector['name'], print_witnesses = False)

There are 128 Agribusiness witnesses.

There are 9 Communications/Electronics witnesses.

There are 16 Construction witnesses.

There are 12 Defense witnesses.

There are 283 Energy & Natural Resources witnesses.

There are 74 Finance, Insurance & Real Estate witnesses.

There are 6 Health witnesses.

There are 278 Ideological/Single-Issue witnesses.

There are 31 Labor witnesses.

There are 15 Lawyers & Lobbyists witnesses.

There are 127 Misc Business witnesses.

There are 746 Other witnesses.

There are 55 Transportation witnesses.



In [86]:
# for sector in sectors:
#     for industry in sector['industries']:
#         t.count_witnesses('witness_industry',  industry['industry'], print_witnesses = False)

In [87]:
# Should these be combined? (as seen here https://www.opensecrets.org/industries/slist.php)
# Mining and coal mining could be combined into the main category mining
# Natural Gas transmission & distribution could be included into the main category oil & gas
# Transportation and air transportation >> Transportation unions

In [88]:
save_as(t, 'Hearings/05_witnesses.pkl') # Last completed on Jan 13, 2021