In [1]:
import numpy as np
import pandas as pd
import csv
import matplotlib
import matplotlib.pylab as pl
import matplotlib.pyplot as plt
from matplotlib.ticker import StrMethodFormatter
import scipy.stats as sst
%matplotlib inline
pd.options.display.max_rows = 500
pd.options.display.max_columns = 400
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

### Key functions

In [2]:
def tidy_split(df, column, sep=',', keep=False):
    """
    Split the values of a column and expand so the new DataFrame has one split
    value per row. Filters rows where the column is missing.

    Returns
    -------
    pandas.DataFrame
        Returns a dataframe with the same columns as `df`.
    """
    indexes = list()
    new_values = list()
    df = df.dropna(subset=[column])
    for i, presplit in enumerate(df[column].astype(str)):
        values = presplit.split(sep)
        if keep and len(values) > 1:
            indexes.append(i)
            new_values.append(presplit)
        for value in values:
            indexes.append(i)
            new_values.append(value)
    new_df = df.iloc[indexes, :].copy()
    new_df[column] = new_values
    return new_df


def create_N_and_M_matrices(climate_policy_dataframe):
    """
    Creates N and M matrices from raw climte policy database

    Returns
    -------
    N and M matrices
    """    
    
    # First, create N matrix and dataframe that sums up all the policies countries have announced in different policy instrument categories
    
    #N dataframe
    N_df = pd.pivot_table(climate_policy_dataframe,values = 'ones',index = 'Country', columns = 'Type of policy instrument', aggfunc = np.sum)
    #N matrix
    N = np.nan_to_num(N_df)

    # Second, create M matrices and dataframes based on two alternative approaches:
    # M_Nci takes a binary value 1 if country has announced a policy (ever) and zero otherwise
    M_Nci = np.where(N >= 1,1,0)
    M_Nci_df = pd.DataFrame(data = M_Nci, index = N_df.index, columns = N_df.columns)

    #M_RPP takes a binary value 1 if the country's Relative Policy Prevalence is > 1
    RPP_bipartite = np.zeros(M_Nci.shape)
    M_RPP = np.zeros(M_Nci.shape)
    num_countries = len(N[:,0])
    num_policies = len(N[0,:])
    total_policy_prevalence = N.sum()
    for p in range(num_policies):
        global_policy_prevalance = N[:,p].sum()
        for c in range(num_countries):
            country_policy_prevalence = N[c,p]
            country_total_prevalence = N[c,:].sum()

            numerator = country_policy_prevalence/float(country_total_prevalence)
            denominator = global_policy_prevalance/float(total_policy_prevalence)
            RPP_bipartite[c,p]=np.nan_to_num(numerator/float(denominator))
            if RPP_bipartite[c,p]>1:
                M_RPP[c,p]=1

    M_RPP_df = pd.DataFrame(M_RPP,index = N_df.index,columns = N_df.columns)    
    
    return N, N_df, M_Nci, M_Nci_df,M_RPP, M_RPP_df

### Read and munge raw data to create climate policy dataframe used in this paper


In [3]:
# Read in raw climate policy space (CPS) database
df = pd.read_csv('github_raw/climate_policy_database.csv', encoding = "latin1")

# Only consider policies at the country level
df_use = df[df['Jurisdiction']=='Country']
df_all_policies = df_use.copy()

# Read in WB country income and region group labels
WB_country_data = pd.read_csv('github_raw/WB_country_labels.csv', encoding = 'latin1')

# Merge with CPS data
df_WB = pd.merge(df_all_policies,WB_country_data, left_on = 'Country ISO', right_on = "Code", how = 'left')

#s Slit data by policy instrument type
df_instruments = tidy_split(df_use,'Type of policy instrument',sep =',')
df_instruments['Type of policy instrument'] = df_instruments['Type of policy instrument'].str.strip()
df_instruments = pd.merge(df_instruments,WB_country_data, left_on = 'Country ISO', right_on = "Code", how = 'left')
df_instruments['ones']=np.ones(len(df_instruments))
trend_instrument = df_instruments.groupby(['Type of policy instrument','Date of decision']).agg({'ones':'count'}).reset_index()

# Drop all categories and sub categories and only use the most detailed level policy instruments
not_include = ['Energy efficiency target','Renewable energy target','Target','GHG reduction target', 'Direct investment','Fiscal or financial incentives','Market-based instruments','Codes and standards','Regulatory Instruments','Economic instruments','Information and education','Performance label','Policy support','Research & Development and Deployment (RD&D)','Research programme','Volunatary approaches','Climate strategy','Target']

### output climate policy dataframe
CPD = df_instruments[~df_instruments['Type of policy instrument'].isin(not_include)]
CPD.to_csv('github_intermediate/country_policy_dataframe.csv', index = False)


In [4]:
## Create and output N and M matrices

N, N_df, M_Nci, M_Nci_df,M_RPP, M_RPP_df = create_N_and_M_matrices(CPD)

# Save N matrix and dataframe 
np.savetxt('github_intermediate/N_matrix.csv',N,delimiter = ',')
N_df.to_csv('github_intermediate/N_df.csv')

# Save M matrix based on Nci approach
np.savetxt('github_intermediate/M_Nci_matrix.csv',M_Nci, delimiter = ',')
M_Nci_df.to_csv('github_intermediate/M_Nci_df.csv')

# Save M matrix based on RPP approach
np.savetxt('github_intermediate/M_RPP_matrix.csv', M_RPP, delimiter = ',')
M_RPP_df.to_csv('github_intermediate/M_RPP_df.csv')

