## Section 1
### First we use word lists from psychometric tools and obtain synonyms and hyponyms using WordNet

In [1]:
## The first time you use this script, uncomment lines 3 and 5

!pip install nltk
import nltk
nltk.download('all')
from nltk.corpus import wordnet as wn

# Here we can change the langauge of analysis
languages = ['eng']






[nltk_data] Downloading collection 'all'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to
[nltk_data]    |     C:\Users\azeez\AppData\Roaming\nltk_data...
[nltk_data]    |   Package abc is already up-to-date!
[nltk_data]    | Downloading package alpino to
[nltk_data]    |     C:\Users\azeez\AppData\Roaming\nltk_data...
[nltk_data]    |   Package alpino is already up-to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger to
[nltk_data]    |     C:\Users\azeez\AppData\Roaming\nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger is already up-
[nltk_data]    |       to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger_ru to
[nltk_data]    |     C:\Users\azeez\AppData\Roaming\nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger_ru is already
[nltk_data]    |       up-to-date!
[nltk_data]    | Downloading package basque_grammars to
[nltk_data]    |     C:\Users\azeez\AppData\Roaming\nltk_data...
[nltk_data]    | 

In [3]:
### Function to Generate synonyms and hyponyms using Wordnet

def generate_wordnet_list(word_base,languages,exclude_list):
  word_lists = []

  #iterate over languages
  for language in languages:
    word_list = []

    # iterate over words in the word_base list
    for word in word_base:

      ## iterate over different meanings of the word synsets
      for meaning in wn.synsets(word, pos=wn.NOUN+wn.VERB+wn.ADJ):
        if meaning.name() not in exclude_list:
          print(meaning)
          print(meaning.definition())
          print('\n')


          ## iterate over different synonyms and add them to list
          for synonym in meaning.lemmas(language):
            #print(synonym)

            ## if the synonym is no in the list add it
            if synonym.name() not in word_list:
              word_list.append(synonym.name())
              #print(synonym.name())

          ## iterate over hyponyms
          for hyponym in meaning.hyponyms():
            #print(hyponym.lemmas(language))

            ## iterate over synonyms of hyponyms
            for synonym_of_hyponym in hyponym.lemmas(language):
              #print(synonym_of_hyponym.name())

              ## if the synonym of the hyponym is no in the list add it
              if synonym_of_hyponym.name() not in word_list:
                word_list.append(synonym_of_hyponym.name())

    ## add the language-specific word list to the global word list, sort the words in alphabetic order   
    word_lists +=[sorted(word_list)]

  ## there are 3 lists now, one for each language, we iterate over the list of lists and print each
  for lst in word_lists:
    #print(lst)
    return(lst)

In [5]:
## Example of seed-words related to a target concept - for instance, prosociality. 
## In this case, I obtained based seed word list from:
# Sterett, S. M. (2018). Climate change adaptation: Existential threat, welfare states and legal management. Oñati Socio-Legal Series, Forthcoming.
word_base_adaptation = ['adaptation','management','intervention','shifting','tranformation','governance','compesation','insurance','litigation','support','responsibility',]
## Using the function above, we can obtain the list of synonyms and hyponyms of the seed words,
## We can also exclude word meanings that are irrelvant for the concept we want to measure

## For instance, 'quilt' is not related to prosociality, and we can add it to the exclusion list
## We run the script until all the words in the list of synsets displayed below have relevant meanings 

exclude_list_adaptation = ['landslide.n.01','flood.n.03','weather.v.01','weather.v.01','weather.v.03','dry.n.01','dry.s.02','dry.a.04','dry.a.05','dry.a.06','dry.a.07','dry.s.08','dry.s.09','dry.s.10','dry.s.12','dry.s.15','dry.s.16',
                       'adaptation.n.01','adaptation.n.03','season.n.03','season.v.01','season.v.02','temper.v.04','natural.n.01','natural.n.02','natural.n.03','natural.a.05','natural.s.06','natural.s.07','natural.s.08','natural.s.09','lifelike.s.02',
                       'dry.s.13','dry.s.14','management.n.02','intervention.n.04','shift.v.10','shift.v.12','support.n.04','support.n.06','accompaniment.n.02','support.n.10','support.v.09',]
adaptation_list = generate_wordnet_list(word_base_adaptation,languages,exclude_list_adaptation)


Synset('adaptation.n.02')
the process of adapting to something (such as environmental conditions)


Synset('management.n.01')
the act of managing something


Synset('intervention.n.01')
the act of intervening (as to mediate a dispute, etc.)


Synset('intervention.n.02')
a policy of intervening in the affairs of other countries


Synset('interposition.n.02')
the act or fact of interposing one thing between or among others


Synset('treatment.n.01')
care provided to improve a situation (especially medical procedures or applications that are intended to relieve illness or injury)


Synset('shift.n.05')
the act of moving from one place to another


Synset('switch.v.04')
make a shift in or exchange of


Synset('shift.v.02')
change place or direction


Synset('transfer.v.04')
move around


Synset('stir.v.02')
move very slightly


Synset('shift.v.05')
move from one setting or context to another


Synset('shift.v.06')
change in quality


Synset('shift.v.07')
move and exchange for another


Syn

In [6]:
## This is a very broad list which includes all possible synonyms and hyponyms 
## related to the seed words we chose for climate

## As one can verify, this list needs prunning 
## some of it by excluding irrelevant synsets a priori (cell above), 
## but some of it can also be done manually a posteriori

print(adaptation_list)

['Curia', 'G-Jo', 'abide', 'abutment', 'accept', 'acclimation', 'acclimatisation', 'acclimatization', 'accountability', 'acupressure', 'acupuncture', 'adaptation', 'adaption', 'adherence', 'adhesion', 'adjustment', 'administration', 'advance', 'advocacy', 'affirm', 'agitate', 'aid', 'anchor', 'answerability', 'answerableness', 'apologise', 'apologize', 'approval', 'approving', 'architrave', 'assist', 'assurance', 'attachment', 'authorisation', 'authorization', 'autogenic_therapy', 'autogenic_training', 'autogenics', 'automobile_insurance', 'back', 'back_up', 'backbone', 'backing', 'base', 'basement', 'bear', 'bear_out', 'bear_up', 'beat_down', 'bench', 'blessing', 'block', 'boost', 'brace', 'bracket', 'brass', 'break', 'bring_home_the_bacon', 'brook', 'budge', 'bunker', 'buoy', 'buoy_up', 'burden_of_proof', 'business_interruption_insurance', 'buttress', 'buttressing', 'buy_at', 'canalisation', 'canalization', 'car_insurance', 'careen', 'carry', 'carry_forward', 'carry_over', 'champion'

In [9]:
# ## THREAT

#Sterett, S. M. (2018). Climate change adaptation: Existential threat, welfare states and legal management. Oñati Socio-Legal Series, Forthcoming.
word_base_threat = ['warning','threathen','risk','problem','terrify','danger','attack','problems', 'trials','causation','harm','damage','blame']
   
                    # 'disaster','unpredictable', 'risk', 'hindrances', 'Lack','vulnerable','decreasing','Poor','incidence','devastating','changing','challenges','repercussions' 'scarcity', 'diminishing', 'waste', 'critical','fight','disaster','exacerbated','severity','adverse','pest','depletion','endanger','displacement','loss','decline','impeded','fragile','inundation','constraints','negative','problem']


exclude_list_threat = ['risk.n.02','attack.n.01','attack.n.02','fire.n.09','attack.v.02','attack.v.03','assail.v.01','attack.v.05',
                       'test.n.05','trial.n.02','test.n.04','trial.n.04','trial.n.05','damage.n.02','price.n.02','blasted.s.01'] #'vulnerable.s.02','decreasing.a.02','poor_people.n.01','hapless.s.01','poor.a.02','poor.a.03','incidence.n.02','annihilating.s.02','switch.v.03','change.v.05','exchange.v.01','transfer.v.06','deepen.v.04','change.v.10','challenge.n.01','challenge.n.02','challenge.n.03','challenge.n.04','challenge.n.05','challenge.v.01','challenge.v.02','challenge.v.03','challenge.v.04','diminish.v.02','waste.n.01','waste.n.02','waste.n.05','waste.v.01','waste.v.02','neutralize.v.04','consume.v.03','godforsaken.s.01','critical.a.01',
#                        'critical.a.03','critical.s.04','battle.n.01','gamble.v.01','competitiveness.n.01','fight.n.04','fight.n.05','contend.v.06','pest.n.03','supplanting.n.01','translation.n.07','displacement.n.04','displacement.n.07','loss.n.05','loss.n.06','personnel_casualty.n.01','refuse.v.02','refuse.v.01','decline.v.07','obstruct.v.02','flimsy.s.03''constraint.n.01','constraint.n.03','negative.n.01',
#                         'negative.n.02','veto.v.01','negative.a.04','negative.s.06','damaging.s.02','negative.s.08','vulnerable.s.02','decreasing.a.02','poor.a.02','vulnerable.s.02','poor_people.n.01','poor.a.02','poor.a.03','incidence.n.02','annihilating.s.02','change.v.05','change.v.06','exchange.v.01','transfer.v.06']

threat_list = generate_wordnet_list(word_base_threat,languages,exclude_list_threat)


a message informing of danger


Synset('admonition.n.01')
cautionary advice about something imminent (especially imminent danger or other unpleasantness)


notification of something, usually in advance


Synset('warn.v.01')
notify of danger, potential harm, or risk


Synset('warn.v.02')
admonish or counsel in terms of someone's behavior


Synset('warn.v.03')
ask to go away


Synset('warn.v.04')
notify, usually in advance


Synset('admonitory.s.01')
serving to warn


Synset('hazard.n.01')
a source of danger; a possibility of incurring loss or misfortune


Synset('risk.n.03')
the probability of becoming infected given that exposure to an infectious agent has occurred


Synset('risk.n.04')
the probability of being exposed to an infectious agent


Synset('risk.v.01')
expose to a chance of loss or damage


Synset('gamble.v.01')
take a risk in the hope of a favorable outcome


Synset('problem.n.01')
a state of difficulty that needs to be resolved


Synset('problem.n.02')
a question raised fo

In [10]:
# ## DAMAGE

# #Stewart A. E. (2021). Psychometric Properties of the Climate Change Worry Scale. International journal of environmental research and public health, 18(2), 494. https://doi.org/10.3390/ijerph18020494

# exclude_list_damage = ['price.n.02','personnel_casualty.n.01','worry.v.05','worry.v.06','effect.n.04','effect.n.05','affect.n.01','involve.v.01','feign.v.01','severe.s.04','reverence.v.01','stress.n.01','stress.n.03','stress.n.05','stress.v.01','stress.v.02','try.v.07','depression.n.05','depression.n.08','depression.n.09','extreme.n.01','extreme_point.n.01','extreme.s.04','damage.n.02','risk.n.02','gamble.v.01','sensitivity.n.03','fear.n.03']
# #
# word_base_damage = ['damage','loss','worry','effect','affect','outbreak','severe','paralyzed','difficult','fear','stress','anxiety','depression','extreme','threat','risks','sensitivity']
# damage_list = generate_wordnet_list(word_base_damage,languages,exclude_list_damage)


In [12]:
print(threat_list)



## Section 2: Generate a semantic vector map with word2vec

In [13]:
from gensim.models.word2vec import Word2Vec
import os
from os import path

## This function organizes corpus as list of sentences, and each sentence as a list of words,
## as input to the function WordVec

## Collect preprocessed texts in txt format
root_folder = os.getcwd()
print(root_folder)

## This will be a list of clean sentences
word2vec_input = []

## This iterates over your path, folders and subfolders looking for txt files
for path, subdirs, files in os.walk(root_folder):
    for file in files:
        if '.txt' in file and 'model' not in file:
            print(file)
            name = os.path.join(path, file)
        
            file_text = open(name, encoding = 'utf-8',errors='ignore').read()

            ## this creates the list of paragraphs - lines
            text_list_paragraphs = file_text.split('\n')

            ## this will clean the paragraphs further -- getting read of \r at the end of the line
            for paragraph in text_list_paragraphs:
                #print (paragraph)
                
                paragraph = paragraph.replace('\r', '')

                ## we add the paragraphs to the word2vec input list
                word2vec_input += [paragraph.split(' ')]
            
                #print(word2vec_input)

C:\Users\azeez\Documents\EM3 SUBMISSION FOLDER
aaditya-2012-Climate responsive.txt
abdel-2013-Ski.txt
abdul-2012-Mod.txt
abo-2012-Microaerobic.txt
Abu-2009-compression.txt
afonso-2013-Tracer gas.txt
afrasiabian-2019-analysis.txt
agrawal-2011-Flow characteristics.txt
agrawal-2012-Study.txt
agyeni-2012-Experimenta.txt
ahmad-2016-Investigation of the ef.txt
ahmadi-2016-Applying.txt
ahmadi-2016-Thermodynamic anal.txt
ahmadi-2020-Portfolio optimization.txt
ahmed-2018-Design methodology.txt
Ajay-2009-simultaneous.txt
akyuz-2010-Energetic.txt
alamier-2020- Synthesi.txt
alayietal-2020-Energy.txt
alayietal-2020-Technical and environmental.txt
aldali-2011-energetic.txt
aldali-2011-Solar absorber tube analysis.txt
aleknaviciute-2012-Plasma assis.txt
aleknaviviute-2016-Towards clean.txt
alexander-2013-solid sorption.txt
ali ahmadi-2016-Modeling solubility of carb.txt
alkhalidi etal-2019-Energy efficient cooling.txt
alkhalidi etal-2020-Using Wood.txt
alkhalidi-2020- Do green buildings.txt
Allison-2

In [14]:
len(word2vec_input)

525882

In [15]:
## Here we build the vector space with Word2Vec

SentenceCorpus = word2vec_input
word2vec_output = Word2Vec(SentenceCorpus, min_count=1)

In [16]:
## Save vector space

word2vec_output.save('adap_threat_w2v_model.txt')

## Section 3. Use the vector semantic map to evaluate if the bags of words created in section 1 are ecologically valid

In [17]:
###  funtion to use word2vec to inquiry about the 10 most similar semantically words to each seed word in word_list

def get_word2vec_list(word_list,model):

  list_of_word2vec_lists = []
  for word in word_list:
    try:

      ## here is the crucial line - we are using the model that we trained to get the most similar words within our corpus
      list_vects=model.wv.most_similar([word],topn=10)

      new_list = []
      new_list +=[word]
      for item in list_vects:
        word1 = item[0]
        new_list += [word1]

      #print(new_list)
      #print('\n')
      list_of_word2vec_lists += [new_list]


    
    except KeyError:
      continue
  return(list_of_word2vec_lists)

In [20]:
## open vector space for english in the early modern period
model = Word2Vec.load('adap_threat_w2v_model.txt')


In [23]:
## get word2vec list of 10 most similar words for the climate bag of words

list_of_adaptation_w2v = get_word2vec_list(adaptation_list,model)

index =0
for w2v_list in list_of_adaptation_w2v:
  print(index, w2v_list)
  index +=1

0 ['abide', 'flare-out', 'Bhaduri', 'deadlines—and', 'scholarship', '2031–2032', '5400', 'conservatives', 'managed', 'facility-level', 'displaced']
1 ['abutment', 'Evaporating', 'lift', '(Pa)', '(Pa),', '(8C)', 'frictional', '(kJ/kg)', 'allowable', '24.6', '[Pa]']
2 ['accept', 'encourage', 'consequences,', 'attract', 'express', 'consumers’', 'Poor', 'inbound', 'minister', 'trigger', 'ultimately']
3 ['acclimation', 'Payback', 'Apr.', 'CM', '(node', '(days,', '\x0cA.K.', '+2.0', 'inactivity', 'Mesh', '31.9']
4 ['acclimatisation', 'pouring', 'sleeve', 'upkeep,', '0.995', 'axi-symmetry', 'A/L,', 'created,', '‘classify’', '86.02,', '900;']
5 ['accountability', 'single-', 'HGHEs', 'shoreline', 'timing', 'geotechnics', 'emergencies', 'typology', 'background', 'nanopower', '4.22%']
6 ['adaptation', 'planning', 'sustainability', 'regional', 'practices', 'policies', 'risks', 'mitigation', 'SLR', 'health', 'long-term']
7 ['adaption', 'Less-progressive', 'zoning', 'Coronavirus', 'ethanol;', 'opera

In [29]:
## chose from the word2vec outputs, the lists that seem to have clouds of meanings coherent with climate

relevant_adaptation_w2v_words = list( list_of_adaptation_w2v[i] for i in [0,3,4,5,6,8,10,15,19,28,31,32,43,46,47,55,58,70,71,76,78,82,92,93,103,105,116,133,134,137,140,142])
                                     
## Add all the words into one final bags of words
adaptation_BoW = [item for sublist in relevant_adaptation_w2v_words for item in sublist]

print(adaptation_BoW)

['abide', 'flare-out', 'Bhaduri', 'deadlines—and', 'scholarship', '2031–2032', '5400', 'conservatives', 'managed', 'facility-level', 'displaced', 'acclimation', 'Payback', 'Apr.', 'CM', '(node', '(days,', '\x0cA.K.', '+2.0', 'inactivity', 'Mesh', '31.9', 'acclimatisation', 'pouring', 'sleeve', 'upkeep,', '0.995', 'axi-symmetry', 'A/L,', 'created,', '‘classify’', '86.02,', '900;', 'accountability', 'single-', 'HGHEs', 'shoreline', 'timing', 'geotechnics', 'emergencies', 'typology', 'background', 'nanopower', '4.22%', 'adaptation', 'planning', 'sustainability', 'regional', 'practices', 'policies', 'risks', 'mitigation', 'SLR', 'health', 'long-term', 'adherence', 'biodiesel,', 'actors', 'similarly', 'Just', 'bioboards', 'first,', 'SNG', 'pretreatment', 'people,', 'engineers', 'adjustment', 'complexity', 'limiting', 'failure', 'reliability', 'identification', 'computing', 'privacy', 'limits', 'institutional', 'action', 'aid', 'extend', 'assist', 'replace', 'examine', 'facilitate', 'establi

In [30]:
## get word2vec words for authoritarianism

list_of_threat_w2v = get_word2vec_list(threat_list,model)

index =0
for w2v_list in list_of_threat_w2v:
  print(index, w2v_list)
  index +=1

0 ['alarm', 'EEV,', 'inactive', 'nanoreactor', '‘hydrogen', 'cellulose-bounded', 'innocent', 'illusion', 'five-day', 'underestimation', 'Summits']
1 ['approach', 'technique', 'method', 'algorithm', 'framework', 'methodology', 'tool', 'strategy', 'concept', 'problem', 'procedure']
2 ['assault', 'Essay', 'invoke', 'organism.3', '“Reflections', '(tH1)', 'all-silica', 'moiety', '(TR)', 'Volpe', '‘Research']
3 ['attack', '(TA,', '\x0cM.H.', 'beliefs', 'sexual', 'Rigby.', 'vastly', 'southeastern', 'age,', 'famines,', 'Zirconia']
4 ['attempt', 'effort', 'Analogous', 'opportunity', 'organism,', 'unexpected', 'answer', 'intuitive', 'deciding', 'H-pot', 'in-house']
5 ['avenue', 'party', 'Partnerships.', 'route', 'definite', 'versatile', '300L', 'metaphor', 'stepped', 'philosophical', 'HCFCs.']
6 ['bite', 'furnace', 'extractor', 'RFID', 'ideal', 'infinite', 'enclosure', 'massing,', 'ejector,', 'indirect', 'tenant']
7 ['blame', 'Discussion', 'Essays', '\x0cResearch', '“Next', 'Potential),', 'Rican

In [31]:
## chose from the word2vec outputs, the lists that seem to have clouds of meanings coherent with prosociality
## in this example we choose indexes 3,6,9,11,13 and 14
relevant_threat_w2v_words = list( list_of_threat_w2v[i] for i in [0,2,3,7,14,15,19,20,21,22,23,24,25,26,27,31,32,39,40,42,43,48,49,50,51,55,57,60,63,65,71,70,76,77,82,83,86,87,88])


## Add all the words into one final bags of words
threat_BoW = [item for sublist in relevant_threat_w2v_words for item in sublist]

print(threat_BoW)



In [32]:
## An hypothetical bag of words was thus obtained, which can be used for frequency analyses
## See next script

print('adaptation',adaptation_BoW)
print('\n')
print('threat',threat_BoW)

adaptation ['abide', 'flare-out', 'Bhaduri', 'deadlines—and', 'scholarship', '2031–2032', '5400', 'conservatives', 'managed', 'facility-level', 'displaced', 'acclimation', 'Payback', 'Apr.', 'CM', '(node', '(days,', '\x0cA.K.', '+2.0', 'inactivity', 'Mesh', '31.9', 'acclimatisation', 'pouring', 'sleeve', 'upkeep,', '0.995', 'axi-symmetry', 'A/L,', 'created,', '‘classify’', '86.02,', '900;', 'accountability', 'single-', 'HGHEs', 'shoreline', 'timing', 'geotechnics', 'emergencies', 'typology', 'background', 'nanopower', '4.22%', 'adaptation', 'planning', 'sustainability', 'regional', 'practices', 'policies', 'risks', 'mitigation', 'SLR', 'health', 'long-term', 'adherence', 'biodiesel,', 'actors', 'similarly', 'Just', 'bioboards', 'first,', 'SNG', 'pretreatment', 'people,', 'engineers', 'adjustment', 'complexity', 'limiting', 'failure', 'reliability', 'identification', 'computing', 'privacy', 'limits', 'institutional', 'action', 'aid', 'extend', 'assist', 'replace', 'examine', 'facilitate