## Climate change laws of the world
### Data augmentation

In [1]:
import pandas as pd

In [2]:
data = pd.read_csv('laws_and_policies_25022021.csv')

In [3]:
data.head(2)

Unnamed: 0,Title,Type,Geography,Geography ISO,Frameworks,Responses,Instruments,Document Types,Natural Hazards,Keywords,Sectors,Events,Documents,Parent Legislation,Description
0,Energy Sector Strategy 1387-1391 (2007/8-2012/3),executive,Afghanistan,AFG,,,,Strategy,,Energy Supply,Energy,25/12/2008|Law passed,Full text|https://climate-laws.org/rails/activ...,,This strategy sets the vision and goals for th...
1,Rural Renewable Energy Policy (RREP),executive,Afghanistan,AFG,,Adaptation,,Policy,,"Adaptation, Energy Supply",Energy,25/12/2013|Law passed||,Full text (PDF)|https://climate-laws.org/rails...,,<div>This policy aims at creating better socia...


In [4]:
data['Title'][1723]

'Clean Air Conservation Act (No. 10615)'

#### Type

In [5]:
data.Type.unique()

array(['executive', 'legislative'], dtype=object)

#### Aungmenting countries

In [6]:
len(data.Geography.unique())

198

In [7]:
countries = pd.read_csv('continents.csv')

In [8]:
data = data.merge(countries[['alpha-3','region','sub-region']], left_on='Geography ISO', right_on='alpha-3', how='left')

In [9]:
data.rename(columns = {'region':'Region', 
                       'Geography':'Country',
                       'Geography ISO':'Country ISO',
                       'sub-region':'SubRegion'}, inplace=True)

#### Homogenizing frameworks

In [10]:
data.groupby('Frameworks').count()['Title']

Frameworks
Adaptation                          44
Adaptation, DRM/DRR                 13
Adaptation, Mitigation             104
Adaptation, Mitigation, DRM/DRR     24
DRM/DRR                             89
Mitigation                          57
Name: Title, dtype: int64

In [11]:
#data[data['Frameworks'] == 'Adaptation, DRM/DRR']
column = []
for row in data.Frameworks:
    if row == 'Adaptation, DRM/DRR':
        column.append('Adaptation')
    elif row == 'Adaptation, Mitigation':
        column.append('Adaptation and Mitigation')
    elif row == 'Adaptation, Mitigation, DRM/DRR':
        column.append('Adaptation and Mitigation')
    elif row == 'DRM/DRR':
        column.append('Disaster Risk Management')
    else:
        column.append(row)
data.Frameworks = column

In [12]:
data.groupby('Responses').count()['Title']

Responses
Adaptation                                                           299
Adaptation, Disaster Risk Management                                 190
Adaptation, Disaster Risk Management, Loss and Damage                  5
Adaptation, Disaster Risk Management, Loss and Damage, Mitigation      4
Adaptation, Disaster Risk Management, Mitigation                      38
Adaptation, Loss and Damage, Mitigation                                2
Adaptation, Mitigation                                               260
Disaster Risk Management                                              39
Disaster Risk Management, Mitigation                                   5
Loss and Damage                                                        3
Mitigation                                                           454
Name: Title, dtype: int64

In [13]:
column = []
for row in data.Responses:
    if row == 'Adaptation, Disaster Risk Management':
        column.append('Adaptation and Disaster Risk Management')
    elif row == 'Adaptation, Disaster Risk Management, Loss and Damage':
        column.append('Adaptation and Disaster Risk Management')
    elif row == 'Adaptation, Disaster Risk Management, Loss and Damage, Mitigation':
        column.append('Adaptation and Disaster Risk Management')
    elif row == 'Adaptation, Loss and Damage, Mitigation':
        column.append('Adaptation and Mitigation')
    elif row == 'Disaster Risk Management, Mitigation':
        column.append('Mitigation')
    elif row == 'Loss and Damage':
        column.append('Disaster Risk Management')
    elif row == 'Adaptation, Mitigation':
        column.append('Adaptation and Mitigation')
    elif row == 'Adaptation, Disaster Risk Management, Mitigation':
        column.append('Adaptation and Mitigation')
    else:
        column.append(row)
data.Responses = column

In [14]:
data['Emergency Framework'] = [str(i) + '-' + str(j) for i, j in zip(data.Responses, data.Frameworks)]

In [15]:
data.groupby('Emergency Framework').count()['Title']

Emergency Framework
Adaptation and Disaster Risk Management-Adaptation                    19
Adaptation and Disaster Risk Management-Adaptation and Mitigation     16
Adaptation and Disaster Risk Management-Disaster Risk Management      77
Adaptation and Disaster Risk Management-nan                           87
Adaptation and Mitigation-Adaptation                                   6
Adaptation and Mitigation-Adaptation and Mitigation                   62
Adaptation and Mitigation-Mitigation                                  11
Adaptation and Mitigation-nan                                        221
Adaptation-Adaptation                                                 32
Adaptation-Adaptation and Mitigation                                  36
Adaptation-Disaster Risk Management                                    3
Adaptation-Mitigation                                                  4
Adaptation-nan                                                       224
Disaster Risk Management-Disast

In [16]:
column = []
for row in data['Emergency Framework']:
    if row == 'Adaptation and Disaster Risk Management-Adaptation':
        column.append('Adaptation and Disaster Risk Management')
    elif row == 'Adaptation and Disaster Risk Management-Adaptation and Mitigation':
        column.append('Adaptation and Mitigation')
    elif row == 'Adaptation and Disaster Risk Management-Disaster Risk Management':
        column.append('Adaptation and Disaster Risk Management')
    elif row == 'Adaptation and Disaster Risk Management-nan':
        column.append('Adaptation and Disaster Risk Management')
    elif row == 'Adaptation and Mitigation-Adaptation':
        column.append('Adaptation and Mitigation')
    elif row == 'Adaptation and Mitigation-Adaptation and Mitigation':
        column.append('Adaptation and Mitigation')
    elif row == 'Adaptation and Mitigation-Mitigation':
        column.append('Adaptation and Mitigation')
    elif row == 'Adaptation and Mitigation-nan':
        column.append('Adaptation and Mitigation')
    elif row == 'Adaptation-Adaptation':
        column.append('Adaptation')
    elif row == 'Adaptation-Adaptation and Mitigation':
        column.append('Adaptation and Mitigation')
    elif row == 'Adaptation-Disaster Risk Management':
        column.append('Adaptation and Disaster Risk Management')
    elif row == 'Adaptation-Mitigation':
        column.append('Adaptation and Mitigation')
    elif row == 'Adaptation-nan':
        column.append('Adaptation')
    elif row == 'Disaster Risk Management-Disaster Risk Management':
        column.append('Disaster Risk Management')
    elif row == 'Disaster Risk Management-nan':
        column.append('Disaster Risk Management')
    elif row == 'Mitigation-Adaptation and Mitigation':
        column.append('Adaptation and Mitigation')
    elif row == 'Mitigation-Mitigation':
        column.append('Mitigation')
    elif row == 'Mitigation-nan':
        column.append('Mitigation')
    elif row == 'nan-Adaptation and Mitigation':
        column.append('Adaptation and Mitigation')
    elif row == 'nan-Mitigation':
        column.append('Mitigation')
    elif row == 'nan-nan':
        column.append('No Framework Specified')
    else:
        column.append(row)
data['Emergency Framework'] = column

#### Instruments

In [17]:
data.Instruments#.unique()

0                                                     NaN
1                                                     NaN
2                                                     NaN
3       Designing processes|Governance and planning;De...
4                                                     NaN
                              ...                        
2100                                                  NaN
2101    Creating bodies and institutions|Governance an...
2102                                                  NaN
2103                                 Subsidies|Incentives
2104                                                  NaN
Name: Instruments, Length: 2105, dtype: object

#### Homogenizing document types

In [18]:
data.groupby('Document Types').count()['Title']

Document Types
Act                                          64
Action Plan                                   8
Action Plan, Strategy                         7
Constitution                                  2
Decree                                       72
Decree Law                                   30
Decree, Strategy                              1
Decree/Order/Ordinance                      189
Decree/Order/Ordinance, Strategy              3
Decree/Order/Ordinance, Strategy, Accord      5
Directive                                     2
Directive, Decree/Order/Ordinance             1
Eu Decision                                   5
Eu Directive                                 16
Eu Directive, Eu Decision                     1
Eu Regulation                                14
Eu Regulation, Eu Directive                   1
Framework                                    12
Framework, Policy                             1
Law                                         707
Law, Act                 

In [19]:
column = []
for row in data['Document Types']:
    if (row == 'Action Plan, Strategy') or (row == 'Action Plan') or (row == 'Radmap') or (row == 'Road Map/Vision') or (row == 'Road Map/Vision, Policy'):
        column.append('Road Map/Vision/Agenda')
    elif (row == 'Decree') or (row == 'Decree, Strategy') or (row == 'Decree, Strategy') or (row == 'Law, Decree') or (row == 'Law, Decree'):
        column.append('Decree Law')
    elif (row == 'Decree/Order/Ordinance, Strategy') or (row == 'Decree/Order/Ordinance, Strategy, Accord') or (row == 'Directive, Decree/Order/Ordinance') or (row == 'Plan, Decree/Order/Ordinance'):
        column.append('Decree/Order/Ordinance')
    elif (row == 'Eu Directive, Eu Decision') or (row == 'Eu Regulation, Eu Directive'):
        column.append('Eu Directive')
    elif (row == 'Directive, Decree/Order/Ordinance') or (row == 'Directive'):
        column.append('Act')
    elif (row == 'Framework, Policy'):
        column.append('Framework')
    elif (row == 'Law, Decree') or (row == 'Law, Strategy') or (row == 'Law, Strategy')  :
        column.append('Framework')
    elif (row == 'Policy, Resolution') or (row == 'Policy, Strategy') or (row == 'Law, Strategy') or (row == 'Plan, Policy') :
        column.append('Policy')
    elif (row == 'Programme, Plan') or (row == 'Programme, Resolution') or (row == 'Law, Strategy') :
        column.append('Programme')
    elif (row == 'Resolution, Regulation/Rules') or (row == 'Resolution, Strategy'):
        column.append('Resolution')
    elif (row == 'Royal Decree, Decree Law') or (row == 'Resolution, Strategy'):
        column.append('Royal Decree')
    elif (row == 'Law, Act'):
        column.append('Act')
    elif (row == 'Plan, Regulation/Rules') or (row == 'Plan, Resolution') or (row == 'Plan, Resolution') or (row == 'Plan, Strategy'):
        column.append('Plan')
    else:
        column.append(row)
data['Document Types'] = column

#### Hazards

In [20]:
len(data['Natural Hazards'].unique())

177

In [21]:
#This one goes away

#### Keywords

In [22]:
data['Keywords'].unique()[1:10]

array(['Adaptation, Energy Supply', 'Research And Development',
       'Adaptation', 'Adaptation, Carbon Pricing',
       'Carbon Pricing, Energy Supply, Energy Demand',
       'Disaster Risk Management', 'Renewables, Energy',
       'Health, Taxes, Energy, Forest', 'Forest'], dtype=object)

In [23]:
data['Keywords Splited'] = data['Keywords'].str.split(',')

In [24]:
keywords = list(data['Keywords Splited'].explode('Keywords Splited'))

In [25]:
keywords[2][1:]

'Energy Supply'

In [26]:
keywords_clean = []
for i in keywords:
    k = str(i)
    if k[0] ==' ':
        keywords_clean.append(k[1:])
    else:
        keywords_clean.append(k)

In [27]:
#set(keywords_clean)

In [28]:
#Selected keywords
terms = ['Agriculture','Biodiversity','Building','Coal','Deforestation','Education','Energy','Transport',
         'Circular Economy','Waste','Water']
terms_lower = [i.lower() for i in terms]

In [29]:
l1 = data['Frameworks']
l2 = data['Responses']
l3 = data['Instruments']
l4 = data['Natural Hazards']
l5 = data['Description']
l6 = data['Keywords']
data['Text'] = [str(a).lower()+' '+str(b).lower()+' '+str(c).lower()+' '+str(d).lower()+' '+str(e).lower()+' '+str(f).lower()
                for a,b,c,d,e,f in zip(l1,l2,l3,l4,l5,l6)]

In [30]:
from flashtext import KeywordProcessor

In [31]:
def extract(vec, dictionary, info=False):
    matrix = []
    for line in vec:
        matrix.append(dictionary.extract_keywords(str(line), span_info=info))
    return matrix

In [32]:
for t1, t2 in zip(terms, terms_lower):
    terms_dict = KeywordProcessor()
    terms_dict.add_keyword(t2)
    terms_extracted = extract(data['Text'], terms_dict)
    data['Keyword '+str(t1)+' Count'] = [len(i) for i in terms_extracted]

In [33]:
#data

#### Sectors

In [34]:
len(data.Sectors.unique())#[1:10] #to be dummies

470

In [35]:
data.groupby('Sectors').count()['Title'].sort_values()

Sectors
Adaptation, Economy-wide                                                1
Waste, Transportation, Industry, Health                                 1
Waste, Transportation, Industry, Environment, Energy, Economy-wide      1
Waste, Transportation, Industry, Energy, Economy-wide, Agriculture      1
Waste, Transportation, Health, Energy, Residential and Commercial       1
                                                                     ... 
Energy, Residential and Commercial                                     63
Economy-wide                                                           63
Transportation                                                        105
Transportation, Energy                                                115
Energy                                                                344
Name: Title, Length: 469, dtype: int64

In [36]:
#Many sectors per document, too difficult to melt 

#### Cleaning events (Manually)

In [37]:
data.Events.unique()

array(['25/12/2008|Law passed', '25/12/2013|Law passed||',
       '25/12/2007|Law passed', ..., '12/12/2019|Law passed',
       '19/03/2020|Policy released', '19/03/2020|Policy launched'],
      dtype=object)

In [38]:
len(data)

2105

In [39]:
events = pd.DataFrame([str(i).split('|') for i in data.Events])

In [40]:
events.groupby(1).count()

Unnamed: 0_level_0,0,2,3,4,5,6,7,8,9,10,11,12,13,14,15
1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Adopted,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0
Approved,147,144,144,7,7,7,3,3,3,2,2,2,2,2,2
Approved by President,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0
Approved by Senate,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0
Approved by cabinet,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
entry into force,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
entry into force of enforcement decree,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
law passed,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0
passed,3,3,3,3,3,3,2,2,2,0,0,0,0,0,0


In [41]:
len(events)

2105

In [42]:
#events.to_excel('events_laws_raw.xls')

In [43]:
events_cleaned = pd.read_csv('events_laws.csv', sep=';')

In [44]:
events_cleaned

Unnamed: 0,Document First Event,Date Document Release,Date Last Amendment
0,Law passed,25/12/2008,
1,Law passed,25/12/2013,
2,Law passed,25/12/2007,
3,Law passed,25/12/2011,
4,Law passed,25/12/2009,
...,...,...,...
2100,Law passed,11/06/2018,
2101,Law passed,25/12/1989,25/12/2001
2102,Law passed,12/12/2019,
2103,Policy released,19/03/2020,


#### Adding Months Variables

In [45]:
import datetime as dt

In [46]:
events_cleaned['Date Document Release'] = [dt.datetime.strptime(str(i), '%d/%m/%Y') for i in events_cleaned['Date Document Release']]

In [47]:
list_ammendments = []
for i, j in zip(events_cleaned['Date Document Release'], events_cleaned['Date Last Amendment']):
    if str(j) == 'nan':
        list_ammendments.append(i)
    else:
        list_ammendments.append(dt.datetime.strptime(str(j), '%d/%m/%Y'))

events_cleaned['Date Last Amendment'] = list_ammendments

In [48]:
#check
print(events_cleaned['Date Last Amendment'][2101], events_cleaned['Date Last Amendment'][2101])

2001-12-25 00:00:00 2001-12-25 00:00:00


In [49]:
events_cleaned['Date Last Amendment'].sort_values()

979    1948-09-18
64     1957-05-21
920    1957-12-25
921    1960-12-25
1995   1963-12-17
          ...    
2005   2021-01-27
1508   2021-02-02
691    2021-02-12
692    2021-02-17
960    2022-04-01
Name: Date Last Amendment, Length: 2105, dtype: datetime64[ns]

In [50]:
events_cleaned['Year Document Release'] = [y.year for y in events_cleaned['Date Document Release']]

In [51]:
events_cleaned['Date Document Release'][0].year

2008

In [52]:
events_cleaned['Month Document Release'] = [row.strftime('%Y-%m') for row in events_cleaned['Date Document Release']]

In [53]:
events_cleaned['Now']  = dt.datetime.now()

In [54]:
from dateutil.relativedelta import relativedelta
def month_delta(start_date, end_date):
    delta = relativedelta(end_date, start_date)
    return 12 * delta.years + delta.months

In [55]:
d1 = events_cleaned['Date Last Amendment'][2101]
d2 = events_cleaned['Date Document Release'][2101]
total_months = month_delta(d2, d1)
total_months

144

In [56]:
events_cleaned['Months Between Release and Last Amendment'] = \
    [month_delta(i, j) for i, j in zip(events_cleaned['Date Document Release'], events_cleaned['Date Last Amendment'])]
events_cleaned['Months Since Release'] = \
    [month_delta(i, j) for i, j in zip(events_cleaned['Date Document Release'], events_cleaned['Now'])]
events_cleaned['Months Since Last Amendment'] = \
    [month_delta(i, j) for i, j in zip(events_cleaned['Date Last Amendment'], events_cleaned['Now'])]

In [57]:
events_cleaned['Year Last Amendment'] = [y.year for y in events_cleaned['Date Document Release']]

In [58]:
#events_cleaned = events_cleaned[['Document First Event','Date Document Release','Date Last Amendment','Month Document Release',
#               'Months Between Release and Last Amendment','Months Since Release','Months Since Last Amendment']]
events_cleaned = events_cleaned.drop(columns=['Now','Date Last Amendment','Date Document Release'])

In [59]:
data = pd.merge(data, events_cleaned, left_index=True, right_index=True)

#### Documents

In [60]:
data['Documents'].unique()

array(['Full text|https://climate-laws.org/rails/active_storage/blobs/eyJfcmFpbHMiOnsibWVzc2FnZSI6IkJBaHBBcG9IIiwiZXhwIjpudWxsLCJwdXIiOiJibG9iX2lkIn19--be6991246abda10bef5edc0a4d196b73ce1b1a26/f|',
       'Full text (PDF)|https://climate-laws.org/rails/active_storage/blobs/eyJfcmFpbHMiOnsibWVzc2FnZSI6IkJBaHBBbGNNIiwiZXhwIjpudWxsLCJwdXIiOiJibG9iX2lkIn19--507033a8de75d3830fc5f9549a4f647549aaef29/2013%20Afghanistan%20Rural%20Renewable%20Energy%20Policy.pdf|',
       nan, ...,
       'full text (PDF)|https://climate-laws.org/rails/active_storage/blobs/eyJfcmFpbHMiOnsibWVzc2FnZSI6IkJBaHBBbFVGIiwiZXhwIjpudWxsLCJwdXIiOiJibG9iX2lkIn19--7128f1545dd66b0b9dcedf918ad634d3e6dbf125/f|',
       'full text (PDF)|https://climate-laws.org/rails/active_storage/blobs/eyJfcmFpbHMiOnsibWVzc2FnZSI6IkJBaHBBbFlGIiwiZXhwIjpudWxsLCJwdXIiOiJibG9iX2lkIn19--bf9abf508f3559881fef05818cfdb4e77ed34a8b/f|',
       '2019 draft policy|https://t3n9sm.c2.acecdn.net/wp-content/uploads/2019/08/Zimbabwe-RE-Policy-2019.pdf|en']

In [61]:
data.rename(columns = {'Documents':'Document Link'}, inplace=True)

#### Parent Legislation

In [62]:
#data['Parent Legislation'].unique()

to remove

#### Description

In [63]:
data['Description'][5]

'This document notably examines the threats on biodiversity of climate change and desertification. The document establishes a number of sectoral targets and subtargets to organise the resilience of the components of biodiversity to adapt to climate change maintained and enhanced; pollution and its impacts on biodiversity reduced.'

In [64]:
from nltk.tokenize import word_tokenize

In [65]:
def read_dictionary(path):
    file = open(path,'r')
    return file.read().lower().split('\n')

In [66]:
# Loading positive words
positive_words = read_dictionary('dictionaries/positive_words.txt')
# Loading negative words
negative_words = read_dictionary('dictionaries/negative_words.txt')
# Adding uncertanty words
negative_words.extend(read_dictionary('dictionaries/uncertainty_dictionary.txt'))

In [67]:
# Function to calculate scores
def generate_score(text, list_to_compare):
    numWords = 0
    tokens = word_tokenize(str(text))
    for word in tokens:
        cap_word = word
        if cap_word in list_to_compare:
            numWords  += 1
    
    cumsum = numWords
    return cumsum

In [68]:
data['Positive Words'] = [generate_score(row, positive_words) for row in data['Description']]
data['Negative Words'] = [generate_score(row, negative_words) for row in data['Description']]

In [69]:
def emergency_rate(positive, negative):
    if (negative == 0) & (positive == 0):
        rate = 0
    else:
        rate = (negative - positive) / ((negative + positive))
    return round(rate, 4)

In [70]:
data['Emergency Sentiment Rate'] = \
    [emergency_rate(i, j) for i, j in zip(data['Positive Words'], data['Negative Words'])]

In [71]:
data['Emergency Sentiment Rate'].unique()

array([-1.    ,  0.    ,  1.    ,  0.4286, -0.3333,  0.3333, -0.6667,
        0.6667,  0.5   , -0.84  ,  0.6   ,  0.7143,  0.75  , -0.5   ,
       -0.6364,  0.25  , -0.1111,  0.2   ,  0.1429, -0.4286, -0.1429,
        0.8   ,  0.4667, -0.2   , -0.6   , -0.3846,  0.2727,  0.6923,
       -0.7778,  0.5556, -0.92  ,  0.4   ,  0.5238,  0.4545, -0.0588,
       -0.75  , -0.7143, -0.2727, -0.0667,  0.6364, -0.8182,  0.1667,
       -0.0769,  0.875 , -0.25  ,  0.2308,  0.0769, -0.4   ,  0.5652,
        0.7778,  0.2857,  0.7333,  0.0476, -0.6923, -0.5385, -0.5294,
       -0.5714,  0.3846,  0.1111, -0.1667, -0.0909, -0.2381,  0.1765,
        0.125 ,  0.5385,  0.5152,  0.619 ,  0.7037,  0.5714,  0.5833,
       -0.619 , -0.5556, -0.2857, -0.8261,  0.8333,  0.8462,  0.8182,
       -0.8571, -0.1765, -0.8889,  0.625 , -0.0526])

In [72]:
def total_word_count(text):
    tokens = word_tokenize(str(text))
    if len(tokens) == 1:
        count = 0
    else:
        count = len(tokens)
    return count

In [73]:
data['Summary Words Length'] = [total_word_count(row) for row in data['Description']]

In [74]:
data.rename(columns = {'Description':'Document Summary'}, inplace=True)

#### Final Filter

In [75]:
data.columns

Index(['Title', 'Type', 'Country', 'Country ISO', 'Frameworks', 'Responses',
       'Instruments', 'Document Types', 'Natural Hazards', 'Keywords',
       'Sectors', 'Events', 'Document Link', 'Parent Legislation',
       'Document Summary', 'alpha-3', 'Region', 'SubRegion',
       'Emergency Framework', 'Keywords Splited', 'Text',
       'Keyword Agriculture Count', 'Keyword Biodiversity Count',
       'Keyword Building Count', 'Keyword Coal Count',
       'Keyword Deforestation Count', 'Keyword Education Count',
       'Keyword Energy Count', 'Keyword Transport Count',
       'Keyword Circular Economy Count', 'Keyword Waste Count',
       'Keyword Water Count', 'Document First Event', 'Year Document Release',
       'Month Document Release', 'Months Between Release and Last Amendment',
       'Months Since Release', 'Months Since Last Amendment',
       'Year Last Amendment', 'Positive Words', 'Negative Words',
       'Emergency Sentiment Rate', 'Summary Words Length'],
      dtype='

In [76]:
data_augmented = data[['Title','Type','Country','Country ISO','Region', 'SubRegion','Emergency Framework',
      'Document Types','Document Link','Document Summary',
      'Month Document Release', 'Year Document Release','Year Last Amendment',
        'Months Between Release and Last Amendment','Months Since Release',
      'Positive Words','Negative Words', 'Emergency Sentiment Rate', 'Summary Words Length',
      'Keyword Agriculture Count',
       'Keyword Biodiversity Count', 'Keyword Building Count',
       'Keyword Coal Count', 'Keyword Deforestation Count',
       'Keyword Education Count', 'Keyword Energy Count',
       'Keyword Transport Count', 'Keyword Circular Economy Count',
       'Keyword Waste Count', 'Keyword Water Count'
     ]]

In [77]:
data_augmented.columns = data_augmented.columns.str.replace(' ', '_')

In [78]:
data_augmented.to_excel('BaGS data Period5 - Climate Change Laws of the World.xls',
                        sheet_name='climate_laws', index=False)

## Datasets by country

- CO2 emissions (metric tons per capita)  
https://data.worldbank.org/indicator/EN.ATM.CO2E.PC

- Renewable electricity output (% of total electricity output)  
https://data.worldbank.org/indicator/EG.GDP.PUSE.KO.PP
    

In [79]:
# countries = list(data_augmented['Country ISO'].unique())
# years = list(range(1990,2020))

# renewable_electricity = pd.read_excel('data/Renewable electricity output (% of total electricity output).xls', 
#                                       header=3 ,sheet_name='Data')

# renewable_electricity = pd.melt(renewable_electricity, id_vars=renewable_electricity.columns[1], 
#             value_vars=list(renewable_electricity.columns[4:]), 
#             var_name='Year', 
#             value_name='% Renewable Electricity')

# renewable_electricity = renewable_electricity[(renewable_electricity['Year'].isin(list(map(str,years))))&
#                                              (renewable_electricity['Country Code'].isin(countries))]
# renewable_electricity['Year'] = list(map(int, renewable_electricity['Year']))

# renewable_electricity.head(2)

# emissions_percapita = pd.read_excel('data/CO2 emissions (metric tons per capita).xls', 
#                                       header=3 ,sheet_name='Data')

# emissions_percapita = pd.melt(emissions_percapita, id_vars=emissions_percapita.columns[1], 
#             value_vars=list(emissions_percapita.columns[4:]), 
#             var_name='Year', 
#             value_name='CO2 emissions per capita')

# emissions_percapita = emissions_percapita[(emissions_percapita['Year'].isin(list(map(str,years))))&
#                                              (emissions_percapita['Country Code'].isin(countries))]
# emissions_percapita['Year'] = list(map(int, emissions_percapita['Year']))

# emissions_percapita.head(2)

# data_aggregated = \
#     data_augmented[data_augmented['Year Document Release'] < 2021]\
#     .groupby('Country ISO')\
#     .count()['Title'].reset_index()

# data_aggregated = data_aggregated.merge(
#     emissions_percapita[emissions_percapita['Year'] < 2021]\
#     .groupby('Country Code')\
#     .mean()['CO2 emissions per capita'].reset_index(),
# left_on='Country ISO', right_on='Country Code', how='left')\
#     .rename(columns={'Title':'Number of Laws & Policies',
#                     'CO2 emissions per capita':'AVG CO2 emissions per capita',})\
#     .drop(columns='Country Code')

# #data_aggregated.to_excel('data/Aggregated Example.xls')

# data_aggregated