In [4]:
import numpy as np
import pandas as pd
import csv
import matplotlib
import matplotlib.pylab as pl
import matplotlib.pyplot as plt
from matplotlib.ticker import StrMethodFormatter
import scipy.stats as sst
%matplotlib inline
pd.options.display.max_rows = 500
pd.options.display.max_columns = 400
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

### Key functions to calculate policy relatedness, policy alignment and instituional capacity building potential


In [2]:

def calculate_hh_and_weighted_policy_relatedness(M):
    """
    Calculates policy relatedness measures from the M matrix based on 
    two functional forms introduced by Hidalgo et al 2007 and Zaccharia et al 2015 

    Returns
    -------
    Policy relatedness matrices based on these two different functional forms
    """      
    ii = len(M[:,0]) # number of policies
    jj = len(M[0,:]) # number of countries
    
    # Create relatedness matrix for policies
    hh_relatedness_mat = np.zeros((ii,ii))
    weighted_relatedness_mat = np.zeros((ii,ii))
    
    R_i = M.sum(axis = 0)
    
    # For each policy vector in the M matrix, 
    # calculate the relatedness to all other policy vectors
    for this_policy in range(ii):
        this_policy_vec = M[this_policy,:]
        for other_policy in range(ii):
            other_policy_vec = M[other_policy,:]
            overlap = this_policy_vec * other_policy_vec
            pwcp_ij = overlap.sum()/float(this_policy_vec.sum())
            pwcp_ji = overlap.sum()/float(other_policy_vec.sum())            
            
            hh_relatedness_mat[this_policy,other_policy] = np.nan_to_num(min(pwcp_ij,pwcp_ji))
            
    # Now weight the overlap by country policy diversity (1/R_i) - 
    # such that countries that have introduced more policies are assigned lower weight
            overlap_weighted = np.true_divide(overlap,R_i)
            w_pwcp_ij = overlap_weighted.sum()/float(this_policy_vec.sum())
            w_pwcp_ji = overlap_weighted.sum()/float(other_policy_vec.sum())
            
            weighted_relatedness_mat[this_policy,other_policy] = np.nan_to_num(min(w_pwcp_ij,w_pwcp_ji))
                                    
    return hh_relatedness_mat,weighted_relatedness_mat



def calculate_country_policy_alignment(relatedness_mat, M_df,M):
    
    """
    Calculates country policy alignment measures from the policy relatedness matrix and 
    countries' historical mix of policies based on the M matrix

    Returns
    -------
    Country Policy Alignment matrix showing each country's relatedness to each climate policy
    """      
    

    this_countries_list = list(M_df.index)
    this_policies_list = list(M_df.columns)
    country_policy_alignment_matrix = np.zeros((len(this_countries_list),len(this_policies_list)))
    zd_relatedness_matrix = relatedness_mat
    np.fill_diagonal(zd_relatedness_matrix,0)

    for c in range(len(this_countries_list)):
        for p in range(len(this_policies_list)):
            #if the policy exists (which it should)
            if zd_relatedness_matrix[:,p].sum()>0:
                xi = M[c,:]
                relatedness_ij_vect = zd_relatedness_matrix[:,p] #how close is this product to all other products
                numerator = np.dot(xi,relatedness_ij_vect)
                denominator = relatedness_ij_vect.sum()
                country_policy_alignment_matrix[c,p] = np.nan_to_num(numerator/float(denominator))
                
                
    return country_policy_alignment_matrix


def create_N_and_M_matrices(climate_policy_dataframe):
    """
    Creates N and M matrices from raw climte policy database

    Returns
    -------
    N and M matrices
    """    
    
    # First, create N matrix and dataframe that sums up all the policies countries have announced in different policy instrument categories
    
    #N dataframe
    N_df = pd.pivot_table(climate_policy_dataframe,values = 'ones',index = 'Country', columns = 'Type of policy instrument', aggfunc = np.sum)
    #N matrix
    N = np.nan_to_num(N_df)

    # Second, create M matrices and dataframes based on two alternative approaches:
    # M_Nci takes a binary value 1 if country has announced a policy (ever) and zero otherwise
    M_Nci = np.where(N >= 1,1,0)
    M_Nci_df = pd.DataFrame(data = M_Nci, index = N_df.index, columns = N_df.columns)

    #M_RPP takes a binary value 1 if the country's Relative Policy Prevalence is > 1
    RPP_bipartite = np.zeros(M_Nci.shape)
    M_RPP = np.zeros(M_Nci.shape)
    num_countries = len(N[:,0])
    num_policies = len(N[0,:])
    total_policy_prevalence = N.sum()
    for p in range(num_policies):
        global_policy_prevalance = N[:,p].sum()
        for c in range(num_countries):
            country_policy_prevalence = N[c,p]
            country_total_prevalence = N[c,:].sum()

            numerator = country_policy_prevalence/float(country_total_prevalence)
            denominator = global_policy_prevalance/float(total_policy_prevalence)
            RPP_bipartite[c,p]=np.nan_to_num(numerator/float(denominator))
            if RPP_bipartite[c,p]>1:
                M_RPP[c,p]=1

    M_RPP_df = pd.DataFrame(M_RPP,index = N_df.index,columns = N_df.columns)    
    
    return N, N_df, M_Nci, M_Nci_df,M_RPP, M_RPP_df

def unpivot(frame): # code from https://pandas.pydata.org/docs/user_guide/reshaping.html
    N, K = frame.shape
    data = {
        "value": frame.to_numpy().ravel("F"),
        "Policy": np.asarray(frame.columns).repeat(N),
        "Country": np.tile(np.asarray(frame.index), K),
    }
    return pd.DataFrame(data, columns=[ "Country","Policy","value", ])


### Calculate policy alignment measures based on dataset only up and including 2015

In [11]:
# Read in original climate policy database
CPD = pd.read_csv('github_intermediate/country_policy_dataframe.csv')

# Subset data to up to and including 2015, and from 2016 onwards
CPD_2015 = CPD[CPD['Date of decision']<2016]
CPD_2016_onwards = CPD[CPD['Date of decision']>=2016]

# Calculate N and M matrices based on restricted dataset
N_2015, N_df_2015, M_Nci_2015, M_Nci_df_2015,M_RPP_2015, M_RPP_df_2015 = create_N_and_M_matrices(CPD_2015)

# Calculate policy relatedness matrices based on restricted dataset
hh_relatedness_mat_policy_2015, weighted_relatedness_mat_policy_2015 = calculate_hh_and_weighted_policy_relatedness(M_Nci_2015.transpose())

# Calculate country policy alignment matrices based on restricted dataset
weighted_country_policy_alignment_2015 =calculate_country_policy_alignment(weighted_relatedness_mat_policy_2015,M_Nci_df_2015,M_Nci_2015)
hh_country_policy_alignment_2015 =calculate_country_policy_alignment(hh_relatedness_mat_policy_2015,M_Nci_df_2015,M_Nci_2015)

# Convert to dataframes and save
weighted_country_policy_alignment_2015_df = pd.DataFrame(weighted_country_policy_alignment_2015,index = M_Nci_df_2015.index, columns = M_Nci_df_2015.columns)
hh_country_policy_alignment_2015_df = pd.DataFrame(hh_country_policy_alignment_2015,index = M_Nci_df_2015.index, columns = M_Nci_df_2015.columns)

### Construct test dataset based on counts of country policies from 2016 onwards

In [6]:

policy_counts_table_2016_onwards = pd.pivot_table(CPD_2016_onwards,values = 'ones',index = 'Country', columns = 'Type of policy instrument', aggfunc = np.sum)
policy_counts_table_2016_onwards_matrix = np.nan_to_num(policy_counts_table_2016_onwards)
policy_counts_table_2016_onwards_matrix_df = pd.DataFrame(data = policy_counts_table_2016_onwards_matrix, index = policy_counts_table_2016_onwards.index, columns = policy_counts_table_2016_onwards.columns)
policy_counts_table_2016_long = unpivot(policy_counts_table_2016_onwards_matrix_df)



### Munge data for regression

In [7]:
# Convert different relatedness measures to long form
weighted_country_policy_alignment_2015_long=unpivot(weighted_country_policy_alignment_2015_df)
weighted_country_policy_alignment_2015_long.rename(columns = {'value':'weighted_relatedness_pre_2016'},inplace = True)

hh_country_policy_alignment_2015_long = unpivot(hh_country_policy_alignment_2015_df)
hh_country_policy_alignment_2015_long.rename(columns = {'value':'hh_relatedness_pre_2016'},inplace = True)

# Create dataframe for policy prediction regression
test_pred = pd.merge(policy_counts_table_2016_long,weighted_country_policy_alignment_2015_long,on = ['Country','Policy'],how = 'left')
test_pred = pd.merge(test_pred,hh_country_policy_alignment_2015_long,on = ['Country','Policy'],how = 'left')

# Note that some countries don't have any policies in the years up to 2015, so we will drop these 
# these countries are 'Andorra', 'French Polynesia', 'Qatar', 'Somalia', 'Sudan'
test_pred_no_nans = test_pred.dropna()
test_pred_no_nans.rename(columns = {'value': 'Num_policies_2016_onwards'}, inplace = True)
test_pred_no_nans.to_csv('github_intermediate/policy_prediction_regression_dataframe_nonRCA_jan24.csv', index= False)

In [9]:
# Quick check of correlations
test_pred_no_nans.corr()

Unnamed: 0,Num_policies_2016_onwards,weighted_relatedness_pre_2016,hh_relatedness_pre_2016
Num_policies_2016_onwards,1.0,0.31431,0.301108
weighted_relatedness_pre_2016,0.31431,1.0,0.99288
hh_relatedness_pre_2016,0.301108,0.99288,1.0
