## Section 1
### First we use word lists from psychometric tools and obtain synonyms and hyponyms using WordNet

In [None]:
## The first time you use this script, uncomment lines 3 and 5

!pip install nltk
import nltk
nltk.download('all')
from nltk.corpus import wordnet as wn

# Here we can change the langauge of analysis
languages = ['eng']


In [None]:
### Function to Generate synonyms and hyponyms using Wordnet

def generate_wordnet_list(word_base,languages,exclude_list):
  word_lists = []

  #iterate over languages
  for language in languages:
    word_list = []

    # iterate over words in the word_base list
    for word in word_base:

      ## iterate over different meanings of the word synsets
      for meaning in wn.synsets(word, pos=wn.NOUN+wn.VERB+wn.ADJ):
        if meaning.name() not in exclude_list:
          print(meaning)
          print(meaning.definition())
          print('\n')


          ## iterate over different synonyms and add them to list
          for synonym in meaning.lemmas(language):
            #print(synonym)

            ## if the synonym is no in the list add it
            if synonym.name() not in word_list:
              word_list.append(synonym.name())
              #print(synonym.name())

          ## iterate over hyponyms
          for hyponym in meaning.hyponyms():
            #print(hyponym.lemmas(language))

            ## iterate over synonyms of hyponyms
            for synonym_of_hyponym in hyponym.lemmas(language):
              #print(synonym_of_hyponym.name())

              ## if the synonym of the hyponym is no in the list add it
              if synonym_of_hyponym.name() not in word_list:
                word_list.append(synonym_of_hyponym.name())

    ## add the language-specific word list to the global word list, sort the words in alphabetic order   
    word_lists +=[sorted(word_list)]

  ## there are 3 lists now, one for each language, we iterate over the list of lists and print each
  for lst in word_lists:
    #print(lst)
    return(lst)

In [None]:
## Example of seed-words related to a target concept - for instance, conservation. 
## In this case, I obtained based seed word list from:
# Peng, L., Tan, J., Lin, L., & Xu, D. (2019). Understanding sustainable disaster mitigation of stakeholder engagement: Risk perception, trust in public institutions, and disaster insurance. Sustainable Development, 27(5), 885-897..
word_base_conservation = ['controllability','mitigation', 'insurance','conservation','development', 'recover', 'alleviate','sustainable','resilience','avoidance']
## Using the function above, we can obtain the list of synonyms and hyponyms of the seed words,
## We can also exclude word meanings that are irrelvant for the concept we want to measure

## For instance, 'quilt' is not related to conservation, and we can add it to the exclusion list
## We run the script until all the words in the list of synsets displayed below have relevant meanings 

exclude_list_conservation = ['extenuation.n.01','policy.n.03','development.n.06','development.n.08','development.n.09']
conservation_list = generate_wordnet_list(word_base_conservation,languages,exclude_list_conservation)


Synset('extenuation.n.02')
to act in such a way as to cause an offense to seem less serious


Synset('moderation.n.04')
the action of lessening in severity or intensity


Synset('insurance.n.01')
promise of reimbursement in the case of loss; paid to people or companies so concerned about hazards that they have made prepayments to an insurance company


Synset('indemnity.n.01')
protection against future loss


Synset('conservation.n.01')
an occurrence of improvement by virtue of preventing loss or injury or other change


Synset('conservation.n.02')
the preservation and careful management of the environment and of natural resources


Synset('conservation.n.03')
(physics) the maintenance of a certain quantities unchanged during chemical reactions or physical transformations


Synset('development.n.01')
act of improving by expanding or enlarging or refining


Synset('development.n.02')
a process in which something passes by degrees to a different stage (especially a more advanced or matur

In [None]:
## This is a very broad list which includes all possible synonyms and hyponyms 
## related to the seed words we chose for climate

## As one can verify, this list needs prunning 
## some of it by excluding irrelevant synsets a priori (cell above), 
## but some of it can also be done manually a posteriori

print(conservation_list)

['abatement', 'access', 'advancement', 'alleviate', 'amelogenesis', 'angiogenesis', 'anthesis', 'apposition', 'assibilation', 'assuage', 'assurance', 'automobile_insurance', 'auxesis', 'aversion', 'averting', 'avoidance', 'blossoming', 'broadening', 'business_interruption_insurance', 'caenogenesis', 'cainogenesis', 'capitalisation', 'capitalization', 'car_insurance', 'catch_up_with', 'cenogenesis', 'cohesion', 'coinsurance', 'comfort', 'commercialisation', 'commercialization', 'complication', 'conservancy', 'conservation', 'conservation_of_charge', 'conservation_of_electricity', 'conservation_of_energy', 'conservation_of_mass', 'conservation_of_matter', 'conservation_of_momentum', 'conservation_of_parity', 'convalesce', 'cultivation', 'culture', 'cytogenesis', 'cytogeny', 'deepening', 'dentition', 'development', 'dodging', 'ease', 'efflorescence', 'elaboration', 'electrification', 'escape', 'evolution', 'exploitation', 'extenuation', 'facilitate', 'find', 'fire_insurance', 'first_law_o

In [None]:
# DISASTER

#Peng, L., Tan, J., Lin, L., & Xu, D. (2019). Understanding sustainable disaster mitigation of stakeholder engagement: Risk perception, trust in public institutions, and disaster insurance. Sustainable Development, 27(5), 885-897.
word_base_disaster = ['disaster','risk','injury','loss','threats','difficulty','destruction','limitations','lethality','harm','fear','affect','uncontrollable']
   

exclude_list_disaster = ['wound.n.02','personnel_casualty.n.01']
disaster_list = generate_wordnet_list(word_base_disaster,languages,exclude_list_disaster)


Synset('catastrophe.n.02')
a state of extreme (usually irremediable) ruin and misfortune


Synset('calamity.n.01')
an event resulting in great loss and misfortune


Synset('disaster.n.03')
an act that has disastrous consequences


Synset('hazard.n.01')
a source of danger; a possibility of incurring loss or misfortune


Synset('risk.n.02')
a venture undertaken without regard to possible loss or injury


Synset('risk.n.03')
the probability of becoming infected given that exposure to an infectious agent has occurred


Synset('risk.n.04')
the probability of being exposed to an infectious agent


Synset('risk.v.01')
expose to a chance of loss or damage


Synset('gamble.v.01')
take a risk in the hope of a favorable outcome


Synset('injury.n.01')
any physical damage to the body caused by violence or accident or fracture etc.


Synset('injury.n.02')
an accident that results in physical damage or hurt


Synset('injury.n.04')
an act that causes someone or something to receive physical damage




In [None]:
print(disaster_list)

['accidental_injury', 'act_of_God', 'act_upon', 'adventure', 'affect', 'affright', 'alarm', 'alienate', 'annihilation', 'apocalypse', 'apprehension', 'apprehensiveness', 'arms_control', 'arouse', 'asperity', 'attack', 'awaken', 'awe', 'balk', 'baulk', 'bear_on', 'bear_upon', 'bell_the_cat', 'birth_trauma', 'bitch', 'bite', 'blast_trauma', 'bleeding', 'blunt_trauma', 'booby_trap', 'bother', 'brace', 'brain_damage', 'break', 'bruise', 'bull', 'bullshit', 'bump', 'burdensomeness', 'burn', 'calamity', 'calm', 'capitulation', 'care', 'cataclysm', 'catastrophe', 'chance', 'check', 'chill', 'clampdown', 'cloud', 'color', 'colour', 'commination', 'concern', 'concussion', 'consternation', 'contusion', 'cramp', 'crapshoot', 'creeps', 'cryopathy', 'cutoff', 'damage', 'danger', 'deadliness', 'death', 'decimation', 'defacement', 'default', 'defloration', 'deformation', 'demolishing', 'demolition', 'departure', 'depredation', 'deprivation', 'destruction', 'deterrent', 'detriment', 'devastation', 'di

## Section 2: Generate a semantic vector map with word2vec

In [None]:
from gensim.models.word2vec import Word2Vec
import os
from os import path

## This function organizes corpus as list of sentences, and each sentence as a list of words,
## as input to the function WordVec

## Collect preprocessed texts in txt format
root_folder = os.getcwd()
print(root_folder)

## This will be a list of clean sentences
word2vec_input = []

## This iterates over your path, folders and subfolders looking for txt files
for path, subdirs, files in os.walk(root_folder):
    for file in files:
        if '.txt' in file and 'model' not in file:
            print(file)
            name = os.path.join(path, file)
        
            file_text = open(name, encoding = 'utf-8',errors='ignore').read()

            ## this creates the list of paragraphs - lines
            text_list_paragraphs = file_text.split('\n')

            ## this will clean the paragraphs further -- getting read of \r at the end of the line
            for paragraph in text_list_paragraphs:
                #print (paragraph)
                
                paragraph = paragraph.replace('\r', '')

                ## we add the paragraphs to the word2vec input list
                word2vec_input += [paragraph.split(' ')]
            
                #print(word2vec_input)

/content
Vivekh etal-2015-Desalination.txt
Zisisetal-2006-heat.txt
esfahankalteh-2020-Achieving.txt
ismail-2012-energy eff.txt
Sun-2020-water shed water pollution.txt
Lazzarin etal-2013-annual air conditioning.txt
auid etal-2013-Organic Rankine cycles.txt
sampedro etal-2016-Spanish.txt
haghighi and maerefat-2014-Design.txt
fang etal-2017-Experimental.txt
zhang etal-2010-performanc.txt
Maheshwari etal-2009-performance analysis.txt
nygard-2012-Review.txt
hegazy etal-2017-The living building.txt
khoshbazan etal-2018-Thermo economic analy.txt
renato etal-2009-Energetic.txt
Newton etal-2011-nanoparticle.txt
amhadi and assaf-2019-Assessment.txt
Ashouri etal-2015-organic rankine cycle.txt
yu-2018-Theeconomic.txt
zishang-2012-Assessment.txt
salman-2020-Thermal.txt
jiang etal-2010-solar thermosyphon systems.txt
Riffat etal-2013-experimental investigation.txt
shuai-2018-Whatdo.txt
sawant-2011-Performance.txt
teixeira-2010-Temperature.txt
spataru-2010-Domestic energy and occupancy.txt
rao-2013-An

In [None]:
## Here we build the vector space with Word2Vec

SentenceCorpus = word2vec_input
word2vec_output = Word2Vec(SentenceCorpus, min_count=1)

In [None]:
## Save vector space

word2vec_output.save('conservation_disaster_w2v_model.txt')

## Section 3. Use the vector semantic map to evaluate if the bags of words created in section 1 are ecologically valid

In [None]:
###  funtion to use word2vec to inquiry about the 10 most similar semantically words to each seed word in word_list

def get_word2vec_list(word_list,model):

  list_of_word2vec_lists = []
  for word in word_list:
    try:

      ## here is the crucial line - we are using the model that we trained to get the most similar words within our corpus
      list_vects=model.wv.most_similar([word],topn=10)

      new_list = []
      new_list +=[word]
      for item in list_vects:
        word1 = item[0]
        new_list += [word1]

      #print(new_list)
      #print('\n')
      list_of_word2vec_lists += [new_list]


    
    except KeyError:
      continue
  return(list_of_word2vec_lists)

In [None]:
## open vector space 
model = Word2Vec.load('conservation_disaster_w2v_model.txt')


In [None]:
## get word2vec list of 10 most similar words for the conservation bag of words

list_of_conservation_w2v = get_word2vec_list(conservation_list,model)

index =0
for w2v_list in list_of_conservation_w2v:
  print(index, w2v_list)
  index +=1

0 ['abatement', '(NOx)', 'Geological', 'Ruhr', 'futures', 'excise', '–brine', 'ownership', 'Nanotechnology', 'Recycling', 'Durban,']
1 ['access', 'contributing', 'contributed', 'preserve', 'trying', 'sufficient', 'attention', 'users', 'implement', 'damage', 'risks']
2 ['advancement', 'cost-effectiveness', 'durability', 'consequences', 'instability', 'unemployment', 'disadvantages', 'interests', 'consistency', 'substitution', 'riots']
3 ['alleviate', 'prevent', 'minimize', 'enable', 'contribute', 'accommodate', 'handle', 'promote', 'manage', 'conserve', 'maximize']
4 ['assurance', 'digitalization', 'and/or', '2001;40:2934', 'informatization', '926.73', 'iÞn\x045', '2017;157:1047–55.', 'Nano-fuel', 'Vaulted', 'Euronoise.']
5 ['aversion', 'won', 'issue,', 'inert,', 'agenda', 'plan,', 'homogenization', 'requiring', 'polysaccharide', 'unmodified', 'accomplishing']
6 ['avoidance', 'certification', 'exploration', 'institutional', 'accommodation', 'anaerobic', 'CSH', 'alternate', 'object', 'pr

In [None]:
## chose from the word2vec outputs, the lists that seem to have clouds of meanings coherent with conservation

relevant_conservation_w2v_words = list( list_of_conservation_w2v[i] for i in [0,2,3,4,5,13,14,15,18,23,24,27,28,29,30,32,34,35,40,41,42,45,47,48,49,50,51,52,56,59])
                                     
## Add all the words into one final bags of words
conservation_BoW = [item for sublist in relevant_conservation_w2v_words for item in sublist]

print(conservation_BoW)

['abatement', '(NOx)', 'Geological', 'Ruhr', 'futures', 'excise', '–brine', 'ownership', 'Nanotechnology', 'Recycling', 'Durban,', 'advancement', 'cost-effectiveness', 'durability', 'consequences', 'instability', 'unemployment', 'disadvantages', 'interests', 'consistency', 'substitution', 'riots', 'alleviate', 'prevent', 'minimize', 'enable', 'contribute', 'accommodate', 'handle', 'promote', 'manage', 'conserve', 'maximize', 'assurance', 'digitalization', 'and/or', '2001;40:2934', 'informatization', '926.73', 'iÞn\x045', '2017;157:1047–55.', 'Nano-fuel', 'Vaulted', 'Euronoise.', 'aversion', 'won', 'issue,', 'inert,', 'agenda', 'plan,', 'homogenization', 'requiring', 'polysaccharide', 'unmodified', 'accomplishing', 'conservancy', 'electrolysis;', 'yrd', 'Rich', '(WSOME),', '[72,', 'k–ω,', 'ecological,', 'wastepaper,', 'Khedari’s', 'exergy).', 'conservation', 'kinetic', 'embodied', 'conservation.', 'balance', 'balances', 'balance.', 'momentum,', 'saving,', 'requests', 'budget', 'cultivat

In [None]:
## get word2vec words for disaster

list_of_disaster_w2v = get_word2vec_list(disaster_list,model)

index =0
for w2v_list in list_of_disaster_w2v:
  print(index, w2v_list)
  index +=1

0 ['affect', 'cause', 'reflect', 'alter', 'involve', 'improve', 'promote', 'effectively', 'lead', 'bring', 'enhance']
1 ['alarm', '+0.28C', 'arbor', 'inverted', 'occupation)', '(coshared', 'CaCl2–LiNO3', 'fiber-reinforced-polymer', 'cabinets', 'visitor’s', 'R600a/R1234yf']
2 ['apprehension', 'Spearheaded', 'electronegativity', 'Sango,', '135,', 'Ghazali', 'M.H.', 'Marten', 'Gilot', 'Lara,', 'perils']
3 ['attack', '458', 'awnings', 'infections', 'malate', 'Pollen', 'eye', 'B2', 'mangroves', 'passengers', 'children']
4 ['bite', 'visible;', 'exemplum,', 'objective/cost', 'ocean,', 'inflammatory', '(Perterse', 'intermediate-temperature', '85-year', 'examination.', 'atmospheric-pressure']
5 ['bleeding', 'flowed', 'https://academic.oup.com/ijlct/article/13/2/140/4922818', 'Qsc', '3319', 'escape', 'planking', 'guides’', '6:00', 'Þt3', 'http://www.fogquest.org/latestnews/.']
6 ['bother', 'you’re', 'Responding.', '83–100.', 'Ateliê', 'Your', 'radical', 'Yasin;', 'Leandro', 'responses,', '(Fusar

In [None]:
## chose from the word2vec outputs, the lists that seem to have clouds of meanings coherent with disaster

relevant_disaster_w2v_words = list( list_of_disaster_w2v[i] for i in [0,1,2,3,4,11,14,23,24,25,26,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,45,52,53,60,61,62,63,64,67,68,69,70,71,73,74,76,79,81,83,85,86,87,88,89,90,97,99,101,113,114,117,119,121,129,130,134,136,137,145,147])


## Add all the words into one final bags of words
disaster_BoW = [item for sublist in relevant_disaster_w2v_words for item in sublist]

print(disaster_BoW)

['affect', 'cause', 'reflect', 'alter', 'involve', 'improve', 'promote', 'effectively', 'lead', 'bring', 'enhance', 'alarm', '+0.28C', 'arbor', 'inverted', 'occupation)', '(coshared', 'CaCl2–LiNO3', 'fiber-reinforced-polymer', 'cabinets', 'visitor’s', 'R600a/R1234yf', 'apprehension', 'Spearheaded', 'electronegativity', 'Sango,', '135,', 'Ghazali', 'M.H.', 'Marten', 'Gilot', 'Lara,', 'perils', 'attack', '458', 'awnings', 'infections', 'malate', 'Pollen', 'eye', 'B2', 'mangroves', 'passengers', 'children', 'bite', 'visible;', 'exemplum,', 'objective/cost', 'ocean,', 'inflammatory', '(Perterse', 'intermediate-temperature', '85-year', 'examination.', 'atmospheric-pressure', 'burn', 'buy', 'eventually', 'combustion,', 'deforestation', 'corrosion,', 'adapting', 'afford', 'clothing', 'hold', 'algal', 'catastrophe', 'occupant)', 'recorded,', '2236.73', 'impossibility', '2133', '~66', '4309', 'non-linearly', '\x0215', '32.2%', 'cutoff', 'measure', 'draw', 'controller', 'perpendicularly', 'Greec

In [None]:
## An hypothetical bag of words was thus obtained, which can be used for frequency analyses
## See next script

print('conservation',conservation_BoW)
print('\n')
print('disaster',disaster_BoW)

conservation ['abatement', '(NOx)', 'Geological', 'Ruhr', 'futures', 'excise', '–brine', 'ownership', 'Nanotechnology', 'Recycling', 'Durban,', 'advancement', 'cost-effectiveness', 'durability', 'consequences', 'instability', 'unemployment', 'disadvantages', 'interests', 'consistency', 'substitution', 'riots', 'alleviate', 'prevent', 'minimize', 'enable', 'contribute', 'accommodate', 'handle', 'promote', 'manage', 'conserve', 'maximize', 'assurance', 'digitalization', 'and/or', '2001;40:2934', 'informatization', '926.73', 'iÞn\x045', '2017;157:1047–55.', 'Nano-fuel', 'Vaulted', 'Euronoise.', 'aversion', 'won', 'issue,', 'inert,', 'agenda', 'plan,', 'homogenization', 'requiring', 'polysaccharide', 'unmodified', 'accomplishing', 'conservancy', 'electrolysis;', 'yrd', 'Rich', '(WSOME),', '[72,', 'k–ω,', 'ecological,', 'wastepaper,', 'Khedari’s', 'exergy).', 'conservation', 'kinetic', 'embodied', 'conservation.', 'balance', 'balances', 'balance.', 'momentum,', 'saving,', 'requests', 'budge