# Weights

This notebook aquires the weights for the principal components analysis (PCA) and analytic hierarchy process (AHP) methodology.

In [7]:
import numpy as np
import pandas as pd
import geopandas as gpd

import utils.analysis as an

In [2]:
pd.set_option('future.no_silent_downcasting', True)

## PCA

In [None]:
# Import the names of the territories
l_names = pd.read_csv('../data/input/table/mpios_names.txt',header=None)
l_names = list(l_names[0])

# Types of codes
codes_names_list = ['health','sport','education','financial','cultural','parks']

# Import the population data
DANE_data = pd.read_csv('../data/input/table/DANE_2018_personas_manz.txt',low_memory=False)
pop = DANE_data[['MANZ_CCNCT','poblacion']]

In [None]:
all_blocks = []
for m in l_names:
    # Import blocks
    blocks = gpd.read_file(f'../data/output/shape/blocks/{m}_blocks.shp')
    blocks = blocks[['MANZ_CCNCT','geometry']]
    blocks['territory'] = m
    
    # Combine the population and the blocks
    blocks = blocks.set_index('MANZ_CCNCT').join(pop.set_index('MANZ_CCNCT'))
    blocks = blocks.reset_index()
    blocks = blocks.rename(columns={'poblacion':'p_(h)'})
    # Change the NaN values for 0.0
    blocks = blocks.fillna(0.0)

    # Create a list with the accessibility type names: health, sport, etc.
    A_i_names = []
    # Create a list with the normalized accessibility type names
    N_i_names = []
    # Create a loop to agregate each accessibility measure to the blocks
    for c in codes_names_list:
        # Import the accessibility DataFrame
        acc_i_df = pd.read_csv(f'../data/output/table/accessibility_dfs/contour/accessibility_i_contour_15min_{m}_{c}.txt')
        # Filter the accessibility DataFrame
        acc_i_df = acc_i_df[['MANZ_CCNCT','Acc_i']]
        A_i_names.append(f'A_i_{c[:3]}')
        N_i_names.append(f'N_i_{c[:3]}')
        # Rename the accessibility column with the respective type
        acc_i_df = acc_i_df.rename(columns={'Acc_i':f'A_i_{c[:3]}'})
        blocks = blocks.merge(acc_i_df,on='MANZ_CCNCT',how='left')
        blocks = blocks.fillna(0.0)
    # Eliminate blocks with 0 population
    blocks_norm = blocks.copy()
    blocks_norm = blocks_norm.drop(blocks_norm[blocks_norm['p_(h)']==0].index).reset_index(drop=True)

    for i in range(len(A_i_names)):
        if blocks_norm[A_i_names[i]].max() == blocks_norm[A_i_names[i]].min():
            blocks_norm[N_i_names[i]] = blocks_norm[A_i_names[i]]
        else:
            blocks_norm[N_i_names[i]] = (blocks_norm[A_i_names[i]]-blocks_norm[A_i_names[i]].min())/(blocks_norm[A_i_names[i]].max()-blocks_norm[A_i_names[i]].min())
    blocks_norm = blocks_norm.fillna(0.0)
    blocks_norm = blocks_norm.set_index('MANZ_CCNCT')

    all_blocks.append(blocks_norm)
    
all_blocks = pd.concat(all_blocks)
blocks_norm = all_blocks.copy()

In [None]:
pca_df = blocks_norm.copy()
pca_df = pca_df[N_i_names]
n_PC, exp_var, weights = an.weights_PCA(pca_df,0.6)
weights.to_csv('../data/output/table/weights/PCA_weights.txt')

# AHP

In [9]:
## Aggregating Individual Judgements AIJ

r = 13
total_surveys = list(range(r))
# s = total_surveys[0]
# print(total_surveys)
# total_surveys = [1]

# List to store the matrices
# matrices_criteria = []
matrices_soc = []
matrices_env = []
matrices_eco = []

for s in total_surveys:
    
    data = pd.read_csv(f'C:/Users/Sergio/OneDrive - Universidad EAFIT/Accesibilidad/data/input/table/AHP_responses/Encuesta_AHP_{s}.csv',sep=';')
    data = data.fillna(0)
    # Eliminate rows with no value in the aim cell
    data = data[data[data.columns[-1]]!=0]
    data = data.rename(columns={'Encuesta para la Evaluación de Servicios Básicos Mediante la Metodología AHP':'one','Unnamed: 10':'two','Unnamed: 12':'value'})
    data = data[['one','two','value']]
    # Without the criterias
    data_soc = data.iloc[:15].reset_index(drop=True)
    data_env = data.iloc[15:30].reset_index(drop=True)
    data_eco = data.iloc[30:].reset_index(drop=True)

    # AHPs
    AHP_soc = an.extract_survey_information(data_soc)
    AHP_env = an.extract_survey_information(data_env)
    AHP_eco = an.extract_survey_information(data_eco)
    
    # Add the the lists
    matrices_soc.append(AHP_soc)
    matrices_env.append(AHP_env)
    matrices_eco.append(AHP_eco)
    
# Average matrices
# https://www.spicelogic.com/docs/ahpsoftware/intro/ahp-group-decision-making-395
# Forman 1998
avg_soc = an.calculate_average(matrices_soc, method='geometric')
avg_env = an.calculate_average(matrices_env, method='geometric')
avg_eco = an.calculate_average(matrices_eco, method='geometric')

# Weights españa
CR_soc,pre_weights_soc = an.calculate_consistency_and_pre_weights(avg_soc)
CR_env,pre_weights_env = an.calculate_consistency_and_pre_weights(avg_env)
CR_eco,pre_weights_eco = an.calculate_consistency_and_pre_weights(avg_eco)

print('Consistency of each criteria:')
print('soc CR',np.round(CR_soc,4))
print('env CR',np.round(CR_env,4))
print('eco CR',np.round(CR_eco,4))

pre_weights_matrix = pd.DataFrame()
pre_weights_matrix['soc'] = pre_weights_soc
pre_weights_matrix['env'] = pre_weights_env
pre_weights_matrix['eco'] = pre_weights_eco

criteria_matrix = pd.DataFrame({'soc':1/3,'env':1/3,'eco':1/3},index=['w']).T
criteria_matrix

weights = pre_weights_matrix@criteria_matrix
N_i_names = ['hea', 'spo', 'edu', 'fin', 'cul', 'par']
weights = weights.reindex(N_i_names)

Consistency of each criteria:
soc CR 0.0148
env CR 0.025
eco CR 0.0108


In [None]:
weights.to_csv('../data/output/table/weights/AHP_weights.txt')