## Section 1
### First we use word lists from psychometric tools and obtain synonyms and hyponyms using WordNet

In [None]:
## The first time you use this script, uncomment lines 3 and 5

!pip install nltk
import nltk
nltk.download('all')
from nltk.corpus import wordnet as wn

# Here we can change the langauge of analysis
languages = ['eng']


[nltk_data] Downloading collection 'all'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/abc.zip.
[nltk_data]    | Downloading package alpino to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/alpino.zip.
[nltk_data]    | Downloading package averaged_perceptron_tagger to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data]    | Downloading package averaged_perceptron_tagger_ru to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Unzipping
[nltk_data]    |       taggers/averaged_perceptron_tagger_ru.zip.
[nltk_data]    | Downloading package basque_grammars to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Unzipping grammars/basque_grammars.zip.
[nltk_data]    | Downloading package biocreative_ppi to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Unzipping corpora/biocreative_ppi.zip.
[nltk_data]    | Downloadin

In [None]:
### Function to Generate synonyms and hyponyms using Wordnet

def generate_wordnet_list(word_base,languages,exclude_list):
  word_lists = []

  #iterate over languages
  for language in languages:
    word_list = []

    # iterate over words in the word_base list
    for word in word_base:

      ## iterate over different meanings of the word synsets
      for meaning in wn.synsets(word, pos=wn.NOUN+wn.VERB+wn.ADJ):
        if meaning.name() not in exclude_list:
          print(meaning)
          print(meaning.definition())
          print('\n')


          ## iterate over different synonyms and add them to list
          for synonym in meaning.lemmas(language):
            #print(synonym)

            ## if the synonym is no in the list add it
            if synonym.name() not in word_list:
              word_list.append(synonym.name())
              #print(synonym.name())

          ## iterate over hyponyms
          for hyponym in meaning.hyponyms():
            #print(hyponym.lemmas(language))

            ## iterate over synonyms of hyponyms
            for synonym_of_hyponym in hyponym.lemmas(language):
              #print(synonym_of_hyponym.name())

              ## if the synonym of the hyponym is no in the list add it
              if synonym_of_hyponym.name() not in word_list:
                word_list.append(synonym_of_hyponym.name())

    ## add the language-specific word list to the global word list, sort the words in alphabetic order   
    word_lists +=[sorted(word_list)]

  ## there are 3 lists now, one for each language, we iterate over the list of lists and print each
  for lst in word_lists:
    #print(lst)
    return(lst)

In [None]:
## Example of seed-words related to a target concept - for instance, prosociality. 
## In this case, I obtained based seed word list from:
# Ishaya, S., & Abaje, I. B. (2008). Indigenous people's perception on climate change and adaptation strategies in Jema'a local government area of Kaduna State, Nigeria. Journal of geography and regional planning, 1(8), 138.
# Ayalew, M. S., Demissie, G. D., Muchie, K. F., Tadesse, S., & Alemu, K. PERCEPTION OF CLIMATE CHANGE AND ASSOCIATED FACTORS AMONG RURAL DWELLERS OF GONDAR ZURIA DISTRICT, NORTHWEST ETHIOPIA.
word_base_climate = ['Drought','typhoon', 'landslide','flood','temperature','weather','environment','storm','rainfall','dry','adaptation','season','natural','ecosystems']
## Using the function above, we can obtain the list of synonyms and hyponyms of the seed words,
## We can also exclude word meanings that are irrelvant for the concept we want to measure

## For instance, 'quilt' is not related to prosociality, and we can add it to the exclusion list
## We run the script until all the words in the list of synsets displayed below have relevant meanings 

exclude_list_climate = ['landslide.n.01','flood.n.03','weather.v.01','weather.v.01','weather.v.03','dry.n.01','dry.s.02','dry.a.04','dry.a.05','dry.a.06','dry.a.07','dry.s.08','dry.s.09','dry.s.10','dry.s.12','dry.s.15','dry.s.16',
                       'adaptation.n.01','adaptation.n.03','season.n.03','season.v.01','season.v.02','temper.v.04','natural.n.01','natural.n.02','natural.n.03','natural.a.05','natural.s.06','natural.s.07','natural.s.08','natural.s.09','lifelike.s.02',
                       'dry.s.13','dry.s.14']
climate_list = generate_wordnet_list(word_base_climate,languages,exclude_list_climate)


Synset('drought.n.01')
a shortage of rainfall


Synset('drought.n.02')
a prolonged shortage


Synset('typhoon.n.01')
a tropical cyclone occurring in the western Pacific or Indian oceans


Synset('landslide.n.02')
a slide of a large mass of dirt and rock down a mountain or cliff


Synset('flood.n.01')
the rising of a body of water and its overflowing onto normally dry land


Synset('flood.n.02')
an overwhelming number or amount


Synset('flood.n.04')
a large flow


Synset('flood.n.05')
the act of flooding; filling to overflowing


Synset('flood_tide.n.02')
the occurrence of incoming water (between a low tide and the following high tide)


Synset('deluge.v.01')
fill quickly beyond capacity; as with a liquid


Synset('flood.v.02')
cover with liquid, usually water


Synset('flood.v.03')
supply with an excess of


Synset('flood.v.04')
become filled to overflowing


Synset('temperature.n.01')
the degree of hotness or coldness of a body or environment (corresponding to its molecular activity)

In [None]:
## This is a very broad list which includes all possible synonyms and hyponyms 
## related to the seed words we chose for climate

## As one can verify, this list needs prunning 
## some of it by excluding irrelevant synsets a priori (cell above), 
## but some of it can also be done manually a posteriori

print(climate_list)

['Curie_point', 'Curie_temperature', 'Noachian_deluge', "Noah's_flood", 'Noah_and_the_Flood', 'Whitsun', 'Whitsuntide', 'Whitweek', 'absolute_temperature', 'absolute_zero', 'acclimation', 'acclimatisation', 'acclimatization', 'adaptation', 'adaption', 'adjustment', 'air', 'air_current', 'alluvion', 'ambiance', 'ambience', 'area', 'arena', 'atmosphere', 'atmospheric_condition', 'atmospheric_state', 'autumn', 'background', 'bad_weather', 'baseball_season', 'basketball_season', 'blizzard', 'blood_heat', 'blow-dry', 'body_temperature', 'boil', 'boiling_point', 'circumstance', 'cloudburst', 'cold', 'cold_weather', 'coldness', 'comfort_zone', 'conditions', 'context', 'current_of_air', 'debacle', 'dedifferentiation', 'dehumidify', 'dehydrate', 'deluge', 'desiccate', 'dew_point', 'differentiation', 'domain', 'domestication', 'downfall', 'downpour', 'drench', 'drip-dry', 'drizzle', 'drought', 'drouth', 'dry', 'dry_out', 'dry_season', 'dry_up', 'ecology', 'ecosystem', 'effusion', 'electric_storm

In [None]:
# ## THREAT

# #Ishaya, S., & Abaje, I. B. (2008). Indigenous people's perception on climate change and adaptation strategies in Jema'a local government area of Kaduna State, Nigeria. Journal of geography and regional planning, 1(8), 138.
# #Pickson, R. B., & He, G. (2021). Smallholder Farmers’ Perceptions, Adaptation Constraints, and Determinants of Adaptive Capacity to Climate Change in Chengdu. SAGE Open. https://doi.org/10.1177/21582440211032638

# exclude_list_threat = ['vulnerable.s.02','decreasing.a.02','poor_people.n.01','hapless.s.01','poor.a.02','poor.a.03','incidence.n.02','annihilating.s.02','switch.v.03','change.v.05','exchange.v.01','transfer.v.06','deepen.v.04','change.v.10','challenge.n.01','challenge.n.02','challenge.n.03','challenge.n.04','challenge.n.05','challenge.v.01','challenge.v.02','challenge.v.03','challenge.v.04','diminish.v.02','waste.n.01','waste.n.02','waste.n.05','waste.v.01','waste.v.02','neutralize.v.04','consume.v.03','godforsaken.s.01','critical.a.01',
#                       'critical.a.03','critical.s.04','battle.n.01','gamble.v.01','competitiveness.n.01','fight.n.04','fight.n.05','contend.v.06','pest.n.03','supplanting.n.01','translation.n.07','displacement.n.04','displacement.n.07','loss.n.05','loss.n.06','personnel_casualty.n.01','refuse.v.02','refuse.v.01','decline.v.07','obstruct.v.02','flimsy.s.03''constraint.n.01','constraint.n.03','negative.n.01',
#                        'negative.n.02','veto.v.01','negative.a.04','negative.s.06','damaging.s.02','negative.s.08','vulnerable.s.02','decreasing.a.02','poor.a.02','vulnerable.s.02','poor_people.n.01','poor.a.02','poor.a.03','incidence.n.02','annihilating.s.02','change.v.05','change.v.06','exchange.v.01','transfer.v.06']

# word_base_threat = ['unpredictable', 'risk', 'hindrances', 'Lack','vulnerable','decreasing','Poor','incidence','devastating','changing','challenges','repercussions' 'scarcity', 'diminishing', 'waste', 'critical','fight','disaster','exacerbated','severity','adverse','pest','depletion','endanger','displacement','loss','decline','impeded','fragile','inundation','constraints','negative','problem']
# threat_list = generate_wordnet_list(word_base_threat,languages,exclude_list_education)


In [None]:
## DAMAGE

#Stewart A. E. (2021). Psychometric Properties of the Climate Change Worry Scale. International journal of environmental research and public health, 18(2), 494. https://doi.org/10.3390/ijerph18020494

exclude_list_damage = ['price.n.02','personnel_casualty.n.01','worry.v.05','worry.v.06','effect.n.04','effect.n.05','affect.n.01','involve.v.01','feign.v.01','severe.s.04','reverence.v.01','stress.n.01','stress.n.03','stress.n.05','stress.v.01','stress.v.02','try.v.07','depression.n.05','depression.n.08','depression.n.09','extreme.n.01','extreme_point.n.01','extreme.s.04','damage.n.02','risk.n.02','gamble.v.01','sensitivity.n.03','fear.n.03']

word_base_damage = ['damage','loss','worry','effect','affect','outbreak','severe','paralyzed','difficult','fear','stress','anxiety','depression','extreme','threat','risks','sensitivity']
damage_list = generate_wordnet_list(word_base_damage,languages,exclude_list_damage)


Synset('damage.n.01')
the occurrence of a change for the worse


Synset('damage.n.03')
the act of damaging something or someone


Synset('wrong.n.02')
any harm or injury resulting from a violation of a legal right


Synset('damage.v.01')
inflict damage upon


Synset('damage.v.02')
suffer or be susceptible to damage


Synset('loss.n.01')
something that is lost


Synset('loss.n.02')
gradual decline in amount or activity


Synset('loss.n.03')
the act of losing someone or something


Synset('loss.n.04')
the disadvantage that results from losing something


Synset('loss.n.05')
the experience of losing a loved one


Synset('loss.n.06')
the amount by which the cost of a business exceeds its revenue


Synset('passing.n.02')
euphemistic expressions for death


Synset('concern.n.04')
something or someone that causes anxiety; a source of unhappiness


Synset('worry.n.02')
a strong feeling of anxiety


Synset('worry.v.01')
be worried, concerned, anxious, troubled, or uneasy


Synset('worry.v.02')


In [None]:
print(damage_list)

['Coriolis_effect', 'accomplish', 'act_upon', 'action', 'acuteness', 'adventure', 'affect', 'afflict', 'affright', 'aftereffect', 'aftermath', 'agitated_depression', 'alarm', 'alienate', 'anaclitic_depression', 'angst', 'antenna', 'anxiety', 'anxiousness', 'apprehension', 'apprehensiveness', 'arouse', 'attack', 'austere', 'awaken', 'backdate', 'backwash', 'bandwagon_effect', 'bang_up', 'basin', 'bear_on', 'bear_upon', 'bed', 'bell_the_cat', 'bilge', 'blemish', 'blue_devils', 'blue_funk', 'blues', 'bother', 'bottom', 'brace', 'branch', 'break', 'breaking_point', 'bring_to_bear', 'brisance', 'brood', 'bruise', 'bugaboo', 'bummer', 'burden', 'burn', 'business', 'butterfly_effect', 'by-product', 'byproduct', 'calm', 'capitulation', 'care', 'carry', 'carry_out', 'carry_through', 'castration_anxiety', 'change', 'chill', 'click', 'clinical_depression', 'cloud', 'coattails_effect', 'color', 'colour', 'commination', 'concern', 'consequence', 'consternation', 'corrode', 'cramp', 'crater', 'creep

## Section 2: Generate a semantic vector map with word2vec

In [None]:
from gensim.models.word2vec import Word2Vec
import os
from os import path

## This function organizes corpus as list of sentences, and each sentence as a list of words,
## as input to the function WordVec

## Collect preprocessed texts in txt format
root_folder = os.getcwd()
print(root_folder)

## This will be a list of clean sentences
word2vec_input = []

## This iterates over your path, folders and subfolders looking for txt files
for path, subdirs, files in os.walk(root_folder):
    for file in files:
        if '.txt' in file and 'model' not in file:
            print(file)
            name = os.path.join(path, file)
        
            file_text = open(name, encoding = 'utf-8',errors='ignore').read()

            ## this creates the list of paragraphs - lines
            text_list_paragraphs = file_text.split('\n')

            ## this will clean the paragraphs further -- getting read of \r at the end of the line
            for paragraph in text_list_paragraphs:
                #print (paragraph)
                
                paragraph = paragraph.replace('\r', '')

                ## we add the paragraphs to the word2vec input list
                word2vec_input += [paragraph.split(' ')]
            
                #print(word2vec_input)

/content
Santana-2020-wind glazing.txt
chen etal-2010-Development of photovoltaic thermal.txt
Dhahri etal-2021-effect of inlet outlet on thermal performa.txt
pike and slater-2011-examination of their.txt
law etal-2013-heat pump and an organic Rankine.txt
Riffat etal-2013-experimental investigation.txt
sahlot and riffat-2016-Desiccant cooling systems.txt
alamier-2020- Synthesi.txt
Embaye etal-2014-effect of flow pulsation.txt
Jiang etal-2009-utilizing bent heat pipes.txt
aleknaviviute-2016-Towards clean.txt
hormazabal etal-2016-The performance.txt
onyelowe-2019-Strength.txt
herdt etal-2016-Newly invented biobased.txt
Wang and Zhang-2021-Decision and coordination.txt
sadeghzadeh-2021-novelexergy.txt
Budania etal-2013-solar absorption.txt
ahmed-2018-Design methodology.txt
auid etal-2013-Organic Rankine cycles.txt
Shilliday etal-2009-Comparative energy.txt
man etal-2010-Operation.txt
Zhang etal-2018-revolutio.txt
kim-2013-the investigation of an openi.txt
utlu-2019-Thermophotovoltaic.txt
C

In [None]:
len(word2vec_input)

964809

In [None]:
## Here we build the vector space with Word2Vec

SentenceCorpus = word2vec_input
word2vec_output = Word2Vec(SentenceCorpus, min_count=1)

In [None]:
## Save vector space

word2vec_output.save('climate_w2v_model.txt')

## Section 3. Use the vector semantic map to evaluate if the bags of words created in section 1 are ecologically valid

In [None]:
###  funtion to use word2vec to inquiry about the 10 most similar semantically words to each seed word in word_list

def get_word2vec_list(word_list,model):

  list_of_word2vec_lists = []
  for word in word_list:
    try:

      ## here is the crucial line - we are using the model that we trained to get the most similar words within our corpus
      list_vects=model.wv.most_similar([word],topn=10)

      new_list = []
      new_list +=[word]
      for item in list_vects:
        word1 = item[0]
        new_list += [word1]

      #print(new_list)
      #print('\n')
      list_of_word2vec_lists += [new_list]


    
    except KeyError:
      continue
  return(list_of_word2vec_lists)

In [None]:
## open vector space for english in the early modern period
model = Word2Vec.load('climate_w2v_model.txt')


In [None]:
## get word2vec list of 10 most similar words for the climate bag of words

list_of_climate_w2v = get_word2vec_list(climate_list,model)

index =0
for w2v_list in list_of_climate_w2v:
  print(index, w2v_list)
  index +=1

0 ['acclimation', '60-day', 'INPUT', '–5', 'Tgo', '2−y', 'Column', 'Layout', 'isopentane)', '403', 'MATHEMATICAL']
1 ['acclimatisation', 'preparation', 'accountability', 'community-based', '(OAC)', 'Yjt—flows', 'contingency', 'arrangements', 'PERN.', 'design—professional’s', 'Statutes']
2 ['adaptation', 'mitigation', 'policies', 'strategies', 'investments', 'policy', 'implementation', 'planning', 'measures', 'policies,', 'implementing']
3 ['adaption', 'adaptation', 'foster', 'materialize', 'impetus', '‘information', 'community’s', 'adaptation,', 'intentional', 'obstacle', 'responding']
4 ['adjustment', 'encroachment', 'inertia', 'incentive', 'determining', 'estimation', 'equilibrium', 'consistency', 'transformation', 'expansion', 'dynamic']
5 ['air', 'water', 'air.', 'airflow', 'desiccant', 'indoor', 'air,', 'groundwater', 'evaporation', 'outdoor', 'liquid']
6 ['area', 'area,', 'floor', 'region', 'roof', 'length', 'zone', 'area.', 'wall', 'south', 'size']
7 ['arena', 'Ebinat', 'forests

In [None]:
## chose from the word2vec outputs, the lists that seem to have clouds of meanings coherent with climate

relevant_climate_w2v_words = list( list_of_climate_w2v[i] for i in [1,2,3,5,8,9,13,14,16,17,19,22,24,25,26,27,28,31,32,35,37,38,40,41,43,47,51,52,53,59,61,62,63,65,68,71,73,74,77,78,79,84,85,86,87,89,90,92,93,94,95])


## Add all the words into one final bags of words
climate_BoW = [item for sublist in relevant_climate_w2v_words for item in sublist]

print(climate_BoW)

['acclimatisation', 'preparation', 'accountability', 'community-based', '(OAC)', 'Yjt—flows', 'contingency', 'arrangements', 'PERN.', 'design—professional’s', 'Statutes', 'adaptation', 'mitigation', 'policies', 'strategies', 'investments', 'policy', 'implementation', 'planning', 'measures', 'policies,', 'implementing', 'adaption', 'adaptation', 'foster', 'materialize', 'impetus', '‘information', 'community’s', 'adaptation,', 'intentional', 'obstacle', 'responding', 'air', 'water', 'air.', 'airflow', 'desiccant', 'indoor', 'air,', 'groundwater', 'evaporation', 'outdoor', 'liquid', 'atmosphere', 'atmosphere,', 'gases', 'oceans', 'ice', 'dioxide', 'evaporation', 'sunlight', 'ammonia', 'snow', 'exhaust', 'autumn', 'isochoric', 'overcast', '8-h', 'semiconductor,', 'anti-Semitic', 'interminable', 'erroneous', 'spotty', 'harvest—are', 'highway', 'cold', 'hot', 'humid', 'warm', 'dry', 'ground', 'wet', 'winter', 'summer', 'tank', 'condensation', 'conditions', 'conditions,', 'condition', 'condit

In [None]:
## get word2vec words for authoritarianism

list_of_damage_w2v = get_word2vec_list(damage_list,model)

index =0
for w2v_list in list_of_damage_w2v:
  print(index, w2v_list)
  index +=1

0 ['accomplish', 'validate', 'analyse', 'refine', 'formulate', 'simplify', 'famers.', 'derail', 'resolve', 'listen', 'verify']
1 ['action', 'action,', 'efforts', 'commitments', 'actions', 'measures', 'collective', 'policy', 'policies', 'leadership', 'governments']
2 ['affect', 'alter', 'exacerbate', 'threaten', 'trigger', 'cause', 'disrupt', 'contribute', 'aggravate', 'worsen', 'modify']
3 ['aftermath', 'midst', 'outbreak', 'Horn', 'Ida', 'highlands', 'Delta', 'Bay', 'context', 'ITCZ,', 'northwest']
4 ['alarm', 'ringing', 'bell', 'Ethernet.', 'Case1-Rig', 'Brexit),', 'UDP/IP', 'Case3-Rig', '110,000', 'dominion', 'Rioboo']
5 ['alienate', 'rancor', 'express', 'trait:', 'adsorb', 'less’.', 'devote', 'follow,', '[38];', 'denialism', 'threat"']
6 ['antenna', '2014;4:553–73.', 'silica)', 'Stick', 'Herve', 'autoencoder', 'Spanou', 'Decision-Making', 'Bedriñana,', '100–2000', 'Wooden']
7 ['anxiety', 'conflict,', 'events,', 'violence,', 'attitudes,', 'shocks', 'exacerbation', 'mental', 'culture

In [None]:
## chose from the word2vec outputs, the lists that seem to have clouds of meanings coherent with prosociality
## in this example we choose indexes 3,6,9,11,13 and 14
relevant_damage_w2v_words = list( list_of_damage_w2v[i] for i in [2,3,4,5,7,10,17,22,23,38,42,43,46,49,50,51,52,53,54,55,56,58,61,62,63,64,66,67,68,74,75,76,77,86,87,88,89,92,95,99,102,103,104,107,108,109,110,116,119,120,122,129,134,137,138,140,148,152,157,162,170,183,184,191,192,193,200,210,211,213,214])


## Add all the words into one final bags of words
damage_BoW = [item for sublist in relevant_damage_w2v_words for item in sublist]

print(damage_BoW)

['affect', 'alter', 'exacerbate', 'threaten', 'trigger', 'cause', 'disrupt', 'contribute', 'aggravate', 'worsen', 'modify', 'aftermath', 'midst', 'outbreak', 'Horn', 'Ida', 'highlands', 'Delta', 'Bay', 'context', 'ITCZ,', 'northwest', 'alarm', 'ringing', 'bell', 'Ethernet.', 'Case1-Rig', 'Brexit),', 'UDP/IP', 'Case3-Rig', '110,000', 'dominion', 'Rioboo', 'alienate', 'rancor', 'express', 'trait:', 'adsorb', 'less’.', 'devote', 'follow,', '[38];', 'denialism', 'threat"', 'anxiety', 'conflict,', 'events,', 'violence,', 'attitudes,', 'shocks', 'exacerbation', 'mental', 'culture,', 'psychosocial', 'chronic', 'attack', 'window,', 'birthday', 'backlash', 'comedian', 'sister', 'hanged', 'blockade', 'mattress', 'hillside', '1920-22', 'bother', 'ndia', 'Liberaltarianism', 'GUATEMALA.', 'EC-funded', 'overt', '<http://web.', 'necessitating', 'authority-order', 'ARM', 'acknowledgement', 'burden', 'likelihood', 'prevalence', 'population,', 'proportion', 'consequences', 'brunt', 'burdens', 'impacts',

In [None]:
## An hypothetical bag of words was thus obtained, which can be used for frequency analyses
## See next script

print('climate',climate_BoW)
print('\n')
print('damage',damage_BoW)

climate ['acclimatisation', 'preparation', 'accountability', 'community-based', '(OAC)', 'Yjt—flows', 'contingency', 'arrangements', 'PERN.', 'design—professional’s', 'Statutes', 'adaptation', 'mitigation', 'policies', 'strategies', 'investments', 'policy', 'implementation', 'planning', 'measures', 'policies,', 'implementing', 'adaption', 'adaptation', 'foster', 'materialize', 'impetus', '‘information', 'community’s', 'adaptation,', 'intentional', 'obstacle', 'responding', 'air', 'water', 'air.', 'airflow', 'desiccant', 'indoor', 'air,', 'groundwater', 'evaporation', 'outdoor', 'liquid', 'atmosphere', 'atmosphere,', 'gases', 'oceans', 'ice', 'dioxide', 'evaporation', 'sunlight', 'ammonia', 'snow', 'exhaust', 'autumn', 'isochoric', 'overcast', '8-h', 'semiconductor,', 'anti-Semitic', 'interminable', 'erroneous', 'spotty', 'harvest—are', 'highway', 'cold', 'hot', 'humid', 'warm', 'dry', 'ground', 'wet', 'winter', 'summer', 'tank', 'condensation', 'conditions', 'conditions,', 'condition',