# Match the *DeSmog* Denialists

#### This script extracts the matches all witnesses to known denialists and denialit organisations from the *DeSmog* Database

In [1]:
from TextCollection import *

In [2]:
# # Reinstantiate class after changing the Textcollection.py script
# # Do not run this in the last run! Leads to a conflict with pickle.

# from importlib import reload

# os.chdir('/home/mirjam/OneDrive/congress_committees/ArticleOne/Article_Scripts/Hearings')
# import TextCollection; reload(TextCollection)

# # Reinstantiate class
# t.__class__ = HearingsCollection

In [3]:
# Change directory
os.chdir('../../Data/')

<br>

## 1) Preparation: Loading, correcting and tidying the data

In [4]:
t = load('Hearings/02_witnesses.pkl')
with open('DeSmog/contrarian_actors.json', 'r') as jfile:
    contrarians = json.load(jfile)
with open('DeSmog/contrarian_organisations.json', 'r') as jfile:
    organisations = json.load(jfile)
print('We imported {} hearings, {} climate change contrarians and {} contrarianal organisations.'.format(len(t),len(contrarians),len(organisations)))

We imported 263 hearings, 480 climate change contrarians and 234 contrarianal organisations.


In [5]:
t.texts[0].keys()

dict_keys(['filename', 'identifier', 'content_raw', 'content', 'content_stripped', 'congress', 'climate_related', 'keywords_climate', 'keywords_carbonpricing', 'keywords_climatechange', 'title', 'date', 'year', 'committee', 'MODS', 'witnesses_MODS', 'witnesses_transcript', 'witnesses'])

In [6]:
# Correct wrongly scraped names
print(contrarians[3]['name'])
contrarians[3]['name'] = 'Arun Ahluwalia'
print(contrarians[3]['name'], '\n')

print(contrarians[244]['name'])
contrarians[244]['name'] = 'Steven Koonin'
print(contrarians[244]['name'], '\n')

print(contrarians[276]['name'])
contrarians[276]['name'] = 'Bjorn Lomborg'
print(contrarians[276]['name'], '\n')

Arun
Arun Ahluwalia 

Steve Koonin
Steven Koonin 

Bj
Bjorn Lomborg 



In [7]:
# Create base name for better matching (remove initials, nicknames and suffixes))
for i, contrarian in enumerate(contrarians):
    contrarians[i]['name_base'] = contrarians[i]['name']
# Remove leading inititals
    try:
        contrarians[i]['name_base'] = re.findall('^(?:\s?[A-Z]{1}\.{1}\s)(.+)', contrarians[i]['name'])[0]
    except:
        pass
# Remove middle inititals
    try:
        contrarians[i]['name_base'] = ' '.join(re.findall('(.+?)(?:\s?[A-Z]{1}\.{1}\s?|\s[A-Z]{2}\s){1,2}(.*)', contrarians[i]['name_base'])[0])
    except:
        pass
# Remove nicknames
    try:
        contrarians[i]['name_base'] = ' '.join(re.findall('(.+?)\s(?:\(.+?\))\s?(.+)?', contrarians[i]['name_base'])[0])
    except:
        pass
# Remove suffixes (except for Pielke Sr and Jr as both are contrarians)
    try:
        contrarians[i]['name_base'] = re.findall('(.+?)(?<!Pielke)(?:,?\s(Jr|Sr))', contrarians[i]['name_base'])[0][0]
    except:
        pass
# Remove special characters
    contrarians[i]['name_base'] = unidecode.unidecode(contrarians[i]['name_base'])

print('Here are some examples:\n')
print(contrarians[6]['name'], '-->', contrarians[6]['name_base'])
print(contrarians[56]['name'], '-->', contrarians[56]['name_base'])
print(contrarians[62]['name'], '-->', contrarians[62]['name_base'])
print(contrarians[408]['name'], '-->', contrarians[408]['name_base'])

Here are some examples:

William JR Alexander --> William Alexander
Robert L. Bradley Jr. --> Robert Bradley
H. Sterling Burnett --> Sterling Burnett
Robert C. Shoup (Bob Shoup) --> Robert Shoup 


In [8]:
for i, contrarian in enumerate(contrarians):
    print(contrarians[i]['name_base'])

Tony Abbott
John Stuart Agnew
Alexandre Aguiar
Arun Ahluwalia
Syun-Ichi Akasofu
Ralph Alexander
William Alexander
Claude Allegre
Harry Alford
Helmut Alt
Gabriel Calzada Alvarez
Klaus Angerer
Bob Armstrong
Martin Armstrong
Scott Armstrong
Jerome Arnett
Ron Arnold
Edward Atkin
August Auer
Dennis Avery
Lawson Bader
Ronald Bailey
Steve Baker
Joe Balash
Sallie Baliunas
Timothy Ball 
Whitney Ball
Robert Balling
Arron Banks
George David Banks
Steve Bannon
Jack Barrett
Joseph Bast
Michael Bastasch
Joe Bastardi
Bryan Bateman
Cody Battershill
Charles Battig
Bernard Beauzamy
Calvin Beisner
Larry Bell
David Bellamy
Tim Benson
Richard Berman
David Bernhardt
James Bethell
Roger Bezdek
Paul Blair
Godfrey Bloom
Sonja Boehmer-Christiansen
Andrew Bolt
John Bolton
Sir Nicholas Bonsor
Christopher Booker
Donald Boudreaux
Alexandra Liddy Bourne
Robert Bradley
Jan Breslow
Barry Brill
Arthur Brooks
Robert Bryce
Reid Bryson
Sterling Burnett
Nigel Calder
Alan Carlin
Bob Carter
Oren Cass
Mike Catanzaro
Lord Rich

<br>

## 2) Matching: match denialist witnesses giving testimony

### Match names

In [9]:
count = 0
for i, text in enumerate(t.texts):
    t.texts[i]['desmog_witness'] = []
    for j, witness in enumerate(text['witnesses']):
        t.texts[i]['desmog_witness'].append(None)
        for contrarian in contrarians:
            ratio = fuzz.token_set_ratio(contrarian['name_base'],witness)
            if ratio >= 90:
                print(i, ratio, ': ',
                      witness,'\n\t',
                      contrarian['name_base'], '\n')
                count+=1
                t.texts[i]['desmog_witness'][j] = contrarian['name_base']
                break
            ratio = fuzz.partial_ratio(contrarian['name_base'],witness)
            if ratio > 92:
                print(i, ratio, ': ',
                      witness,'\n\t',
                      contrarian['name_base'], '\n')
                count+=1
                t.texts[i]['desmog_witness'][j] = contrarian['name_base']
                break
print('{} denialist witnesses were matched.'.format(count))

1 100 :  Marlo Lewis, Senior Fellow, Competitive Enterprise Institute 
	 Marlo Lewis 

9 100 :  David W. Kreutzer, Ph.D., Senior Policy Analyst, The Heritage Foundation 
	 David Kreutzer 

19 100 :  Fred Smith, Chairman, President and Chief Executive Officer, FedEx Corporation 
	 Fred Smith 

19 100 :  Charles Drevna, President of the National Petrochemical and Refiners Association 
	 Charles Drevna 

27 100 :  Drevna, Charles T., President, National Petrochemical & Refiners Association 
	 Charles Drevna 

28 100 :  Beisner, E. Calvin, associate professor of Historical Theological and Social Ethics, Knox Theological Seminary; Spokesman for the Interfaith Stewardship Alliance 
	 Calvin Beisner 

28 100 :  Lomborg, Bjorn, adjunct professor, Copenhagen Consensus Center 
	 Bjorn Lomborg 

32 100 :  Robert Bradley, Director Of International Climate Policy, World Resources Institute 
	 Robert Bradley 

37 100 :  Jack Gerard, National Mining Association 
	 Jack Gerard 

40 94 :  Hon. Jeffery 

In [10]:
# # Search for for further matches
# count = 0
# for i, text in enumerate(t.texts):
#     for j, witness in enumerate(text['witnesses']):
#         if t.texts[i]['desmog'][j] == None:
#             for contrarian in contrarians:
#                 ratio = fuzz.partial_ratio(contrarian['name_base'],unidecode.unidecode(witness))
#                 if ratio >= 90:
#                     print(i, ratio, ': ',
#                           witness,'\n\t',
#                           contrarian['name_base'], '\n')
#                     count+=1
#                     break
# print('{} further denialist witnesses were matched.'.format(count))

### Match organisations

In [11]:
count = 0
for i, text in enumerate(t.texts):
    t.texts[i]['desmog_organisation'] = []
    for j, witness in enumerate(text['witnesses']):
        t.texts[i]['desmog_organisation'].append(None)
        for organisation in organisations:
            ratio = fuzz.partial_ratio(organisation['name'].lower(),witness.lower())
            if ratio >= 90:
                print(i, ratio, ': ',
                      witness,'\n\t',
                      organisation['name'], '\n')
                count+=1
                t.texts[i]['desmog_organisation'][j] = organisation['name']
                break
print('{} denialist witnesses were matched.'.format(count))

0 100 :  Margo Thorning, Ph.D., Senior Vice President and Chief Economist, American Council for Capital Formation 
	 American Council for Capital Formation 

1 100 :  Lee Lane, Resident Fellow, American Enterprise Institute 
	 American Enterprise Institute 

1 100 :  Marlo Lewis, Senior Fellow, Competitive Enterprise Institute 
	 Competitive Enterprise Institute 

6 100 :  Margo Thorning, Ph.D., Senior Vice President and Chief Economist, American Council for Capital Formation 
	 American Council for Capital Formation 

9 100 :  David W. Kreutzer, Ph.D., Senior Policy Analyst, The Heritage Foundation 
	 Heritage Foundation 

15 100 :  Thomas F. Farrell, Ii, Chairman, President And Ceo, Dominion (On Behalf Of Edison Electric Institute) 
	 Edison Electric Institute 

16 100 :  Mr. Lee Lane, Resident Fellow, American Enterprise Institute 
	 American Enterprise Institute 

28 100 :  Lomborg, Bjorn, adjunct professor, Copenhagen Consensus Center 
	 Copenhagen Consensus Center 

37 100 :  Jac

223 100 :  Michael Morris, Chairman Of The Board Of Directors, Edison Electric Institute 
	 Edison Electric Institute 

223 100 :  Craig Montesano, Director Of Governmental Affairs, National Mining Association 
	 National Mining Association 

231 100 :  Dr. Steven Hayward, F.K. Weyerhaeuser Fellow, American Enterprise Institute 
	 American Enterprise Institute 

231 100 :  David Kreutzer, Senior Policy Analyst in Energy Economics and Climate Change, The Heritage Foundation 
	 Heritage Foundation 

231 100 :  Myron Ebell, Director, Energy and Global Warming Policy, Competitive Enterprise Institute 
	 Competitive Enterprise Institute 

231 100 :  Jeffry E. Sterba, Chairman and CEO, PNM Resources Inc., On Behalf of the Edison Electric Institute 
	 Edison Electric Institute 

231 100 :  Lee Lane, Resident Fellow, American Enterprise Institute 
	 American Enterprise Institute 

231 95 :  William L. Kovacs, Vice President, Environment, Technology and Regulatory Affairs, U.S. Chamber of Comme

In [12]:
# count = 0
# for i, text in enumerate(t.texts):
#     for j, witness in enumerate(text['witnesses']):
#         if t.texts[i]['desmog_organisation'][j] == None:
#             for organisation in organisations:
#                 ratio = fuzz.token_sort_ratio(re.sub(r'\s*\((.+)\)\s*', ' ',organisation['name'].lower()).strip(),witness.lower())
#                 if ratio >= 90:
#                     print(i, ratio, ': ',
#                           witness,'\n\t',
#                           organisation['name'], '\n')
#                     count+=1
#                     break

# print('{} denialist witnesses were matched.'.format(count))

<br>

### Summary

In [13]:
# Summary:
denialists = 0
denialists_hearings = 0

for text in t.texts:
    denialists_present = False
    for i, witness in enumerate(text['witnesses']):
            if text['desmog_witness'][i] != None or text['desmog_organisation'][i] != None:
                denialists += 1
                denialists_present = True
    if denialists_present:
        denialists_hearings += 1
                

print('{} denialists were found at {} of the {} hearings.\n\n'.format(denialists, denialists_hearings, len(t)))

110 denialists were found at 75 of the 263 hearings.




<br>

### Save data


In [14]:
# Run for maximum recursion error during saving

import resource
import sys

print(resource.getrlimit(resource.RLIMIT_STACK))
print(sys.getrecursionlimit())

# May segfault without this line. 0x100 is a guess at the size of each stack frame.
max_rec = 0x100000
resource.setrlimit(resource.RLIMIT_STACK, [0x100 * max_rec, resource.RLIM_INFINITY])
sys.setrecursionlimit(max_rec)

print(sys.getrecursionlimit())

(8388608, -1)
3000
1048576


In [15]:
save_as(t, 'Hearings/03_desmog_witnesses.pkl') # Last completed on Oct 30, 2020