## Datasets by country

- CO2 emissions (metric tons per capita)  
https://data.worldbank.org/indicator/EN.ATM.CO2E.PC

- Renewable electricity output (% of total electricity output)  
https://data.worldbank.org/indicator/EG.GDP.PUSE.KO.PP
    

In [1]:
import pandas as pd

data = pd.read_excel('BaGS data Period5 - Climate Change Laws of the World.xls')

In [2]:
data.columns

Index(['Title', 'Type', 'Country', 'Country_ISO', 'Region', 'SubRegion',
       'Emergency_Framework', 'Document_Types', 'Document_Link',
       'Document_Summary', 'Month_Document_Release', 'Year_Document_Release',
       'Year_Last_Amendment', 'Months_Between_Release_and_Last_Amendment',
       'Months_Since_Release', 'Positive_Words', 'Negative_Words',
       'Emergency_Sentiment_Rate', 'Summary_Words_Length',
       'Keyword_Agriculture_Count', 'Keyword_Biodiversity_Count',
       'Keyword_Building_Count', 'Keyword_Coal_Count',
       'Keyword_Deforestation_Count', 'Keyword_Education_Count',
       'Keyword_Energy_Count', 'Keyword_Transport_Count',
       'Keyword_Circular_Economy_Count', 'Keyword_Waste_Count',
       'Keyword_Water_Count'],
      dtype='object')

In [3]:
countries = list(data['Country_ISO'].unique())
years = list(range(1990,2020))

In [4]:
renewable_electricity = pd.read_excel('Renewable electricity output (% of total electricity output).xls', 
                                      header=3 ,sheet_name='Data')

renewable_electricity = pd.melt(renewable_electricity, id_vars=renewable_electricity.columns[1], 
            value_vars=list(renewable_electricity.columns[4:]), 
            var_name='Year', 
            value_name='Perc_Renewable_Electricity')

renewable_electricity = renewable_electricity[(renewable_electricity['Year'].isin(list(map(str,years))))&
                                             (renewable_electricity['Country Code'].isin(countries))]
renewable_electricity['Year'] = list(map(int, renewable_electricity['Year']))

renewable_electricity.head(2)

Unnamed: 0,Country Code,Year,Perc_Renewable_Electricity
7921,AFG,1990,67.730496
7922,AGO,1990,86.206897


In [5]:
emissions_percapita = pd.read_excel('CO2 emissions (metric tons per capita).xls', 
                                      header=3 ,sheet_name='Data')

emissions_percapita = pd.melt(emissions_percapita, id_vars=emissions_percapita.columns[1], 
            value_vars=list(emissions_percapita.columns[4:]), 
            var_name='Year', 
            value_name='CO2_emissions_per_capita')

emissions_percapita = emissions_percapita[(emissions_percapita['Year'].isin(list(map(str,years))))&
                                             (emissions_percapita['Country Code'].isin(countries))]
emissions_percapita['Year'] = list(map(int, emissions_percapita['Year']))

emissions_percapita.head(2)


Unnamed: 0,Country Code,Year,CO2_emissions_per_capita
7921,AFG,1990,0.210643
7922,AGO,1990,0.431744


In [6]:
data_aggregated = \
    data[data['Year_Document_Release'] < 2021]\
    .groupby(['Country_ISO','Country'])\
    .count()['Title'].reset_index()

data_aggregated = data_aggregated.merge(
    data[data['Year_Document_Release'] < 2021]\
    .groupby('Country_ISO')\
    .mean()[['Year_Last_Amendment','Keyword_Agriculture_Count','Keyword_Energy_Count','Emergency_Sentiment_Rate']].reset_index(),
    on='Country_ISO', how='left')\
    .rename(columns={'Title':'Laws_Policy',
                    'Emergency_Sentiment_Rate':'Emergency_Sentiment',
                    'Keyword_Agriculture_Count':'Agriculture',
                    'Keyword_Energy_Count':'Energy',})

data_aggregated = data_aggregated.merge(
    emissions_percapita[(emissions_percapita['Year'] < 2021) & (emissions_percapita['Year'] > 2000)]\
    .groupby('Country Code')\
    .mean()['CO2_emissions_per_capita'].reset_index(),
left_on='Country_ISO', right_on='Country Code', how='left')\
    .rename(columns={'Title':'Laws_Policy',
                    'CO2_emissions_per_capita':'CO2_Emissions',})\
    .drop(columns='Country Code')

data_aggregated = data_aggregated.merge(
    renewable_electricity[(renewable_electricity['Year'] < 2021) & (renewable_electricity['Year'] > 2000)]\
    .groupby('Country Code')\
    .mean()['Perc_Renewable_Electricity'].reset_index(),
left_on='Country_ISO', right_on='Country Code', how='left')\
    .rename(columns={'Title':'Laws_Policy',
                    'Perc_Renewable_Electricity':'Renewable_Electricity',})\
    .drop(columns='Country Code')


data_aggregated.head()

Unnamed: 0,Country_ISO,Country,Laws_Policy,Year_Last_Amendment,Agriculture,Energy,Emergency_Sentiment,CO2_Emissions,Renewable_Electricity
0,AFG,Afghanistan,14,2012.642857,0.142857,2.0,-0.064621,0.177821,78.003943
1,AGO,Angola,21,2013.428571,0.190476,1.666667,0.277776,1.111482,67.341581
2,ALB,Albania,3,2011.0,0.0,6.0,-0.333333,1.496656,98.512948
3,AND,Andorra,8,2012.25,0.0,5.625,-0.125,6.553149,90.01228
4,ARE,United Arab Emirates,6,2014.333333,0.0,3.0,-0.2,23.690396,0.03899


In [7]:
data_aggregated.to_excel('Climate_data_countries.xls', index=False)