# Priority Places Index Construction

Depends on data prepared in the data_prep notebook being avialable in the respective data directories defined below.

# Set-up
## Install required packages

In [6]:
import pandas as pd
import numpy as np
import scipy as sp
import os
from datetime import datetime

## Set data directory

In [7]:
data_directory = '/workspaces/priority-places-calculator/data/processed/'
raw_data_directory = '/workspaces/priority-places-calculator/data/raw/'

## Read in data

In [8]:
lookup = pd.read_csv(raw_data_directory + 'fsm_lookup/PCD_OA_LSOA_MSOA_LAD_MAY22_UK_LU.csv', encoding='latin-1', usecols=['lsoa11cd', 'pcds', 'ladcd'])
lookup['pcds'] = lookup['pcds'].str.replace(' ','')

In [9]:
# Define our dataframe of interest via the lookup table
df = lookup[['lsoa11cd', 'ladcd']].drop_duplicates()
df = df[df['lsoa11cd'].str[0].isin(['E','S','W','9'])]
df.set_index('lsoa11cd', inplace=True)

# efdi merging
efdi = pd.read_csv(data_directory + 'efdi_variables_for_ppfi.csv', index_col=0)
df = df.merge(efdi, left_index=True, right_index=True, how='left')

# fuel poverty merging
fuel_poverty = pd.read_csv(data_directory + 'fuel_poverty.csv', index_col=0)
df = df.merge(fuel_poverty, left_index=True, right_index=True, how='left', indicator=True)
df.rename({'_merge':'fuel_merge', 'Percent of households in fuel poverty':'fuel_poverty_pct'}, inplace=True, axis=1)

# healthy start voucher uptake
hsv = pd.read_csv(data_directory + 'HS_uptake_LSOA.csv')
hsv = hsv[~hsv['Uptake (%)'].isna()][['lsoa11cd', 'Uptake (%)']]
hsv.set_index('lsoa11cd',inplace=True)
df = df.merge(hsv, left_index=True, right_index=True, how='left', indicator=True)
df.rename({'_merge': 'hsv_merge', 'Uptake (%)': 'healthy_start_voucher_uptake'}, inplace=True, axis=1)

# Non supermarket distance
pcd_nonsupermarket_dist = pd.DataFrame()
for f in os.listdir(data_directory):
    if f[4:]=='pcd_nonsupermarket_dist.csv':
        pcd_nonsupermarket_dist = pd.concat([pcd_nonsupermarket_dist, pd.read_csv(data_directory + f)])
pcd_nonsupermarket_dist['PCD'] = pcd_nonsupermarket_dist['PCD'].str.replace(' ','')
nonsupermarket_dist = pcd_nonsupermarket_dist.merge(lookup, left_on='PCD', right_on='pcds', how='left', indicator=True)
lsoa_nonsupermarket_dist = nonsupermarket_dist.groupby('lsoa11cd')['0'].mean()
df = df.merge(lsoa_nonsupermarket_dist, left_index=True, right_index=True, how='left')
df.rename({'0':'nonsupermarket_distance'}, inplace=True, axis=1)

# Non supermarket 1km count
pcd_nonsupermarket_1kmcount = pd.DataFrame()
for f in os.listdir(data_directory):
    if f[4:]=='pcd_nonsupermarket_1kmcount.csv':
        pcd_nonsupermarket_1kmcount = pd.concat([pcd_nonsupermarket_1kmcount, pd.read_csv(data_directory + f)])
pcd_nonsupermarket_1kmcount['PCD'] = pcd_nonsupermarket_1kmcount['Unnamed: 0'].str.replace(' ','')
nonsupermarket_1kmcount = pcd_nonsupermarket_1kmcount.merge(lookup, left_on='PCD', right_on='pcds', how='left', indicator=True)
lsoa_nonsupermarket_1kmcount = nonsupermarket_1kmcount.groupby('lsoa11cd')['0'].mean()
df = df.merge(lsoa_nonsupermarket_1kmcount, left_index=True, right_index=True, how='left')
df.rename({'0':'nonsupermarket_1kmcount'}, inplace=True, axis=1)

# prepayment_meters merging
prepayment_meters = pd.read_csv(data_directory + 'prepayment_meters.csv', index_col='Lower Layer Super Output Area (LSOA) Code')
df = df.merge(prepayment_meters[['Total meters', 'Occupied_Households']], left_index=True, right_index=True, how='left', indicator=True)
df.rename({'_merge': 'prepayment_merge'}, inplace=True, axis=1)
df['prepayment_prevalence'] = df['Total meters'] / df['Occupied_Households']

# free school meals
fsm_eng = pd.read_csv(data_directory + 'fsm_england.csv', usecols=['lsoa11cd', 'fsm_eligible_percent'])
fsm_eng.rename({'fsm_eligible_percent':'fsm_indicator'}, inplace=True, axis=1)
fsm_wal = pd.read_csv(data_directory + 'fsm_wales.csv', usecols=['lsoa11cd', 'fsm_eligible_percent'])
fsm_wal.rename({'fsm_eligible_percent':'fsm_indicator'}, inplace=True, axis=1)
fsm_scot = pd.read_csv(data_directory + 'fsm_scotland.csv', usecols=['lsoa11cd', 'fsm_percent'])
fsm_scot.rename({'fsm_percent':'fsm_indicator'}, inplace=True, axis=1)
fsm_ni = pd.read_csv(data_directory + 'fsm_ni.csv', usecols=['LSOA11CD', 'prop_FSME_school_leavers'])
fsm_ni.rename({'LSOA11CD':'lsoa11cd', 'prop_FSME_school_leavers':'fsm_indicator'}, inplace=True, axis=1)
fsm = pd.concat([fsm_eng, fsm_wal, fsm_scot, fsm_ni])
fsm.set_index('lsoa11cd', inplace=True)
df = df.merge(fsm, left_index=True, right_index=True, how='left', indicator=True)
df.rename({'_merge':'fsm_merge'},axis=1,inplace=True)

# Impute prepayment prevalence
lad_prepayment_median = df.reset_index()[['index', 'ladcd', 'prepayment_prevalence']].groupby('ladcd')['prepayment_prevalence'].median()
df = df.merge(lad_prepayment_median, left_on='ladcd', right_index=True, how='left', suffixes=('', '_lad'))
df['prepayment_prevalence'] = df['prepayment_prevalence'].fillna(df['prepayment_prevalence_lad'])

# Food bank distance merging
foodbank_distance = pd.read_csv(data_directory + 'postcode_to_nearest_foodbank_distance.csv')
foodbank_distance['PCD'] = foodbank_distance['PCD'].str.replace(' ','')
foodbank_distance = foodbank_distance.merge(lookup[['pcds', 'lsoa11cd']], left_on='PCD', right_on='pcds', how='left', indicator=True)
foodbank_distance = foodbank_distance.groupby('lsoa11cd')['0'].mean()
df = df.merge(foodbank_distance, left_index=True, right_index=True, how='left', indicator=True)
df.rename({'_merge': 'foodbank_distance_merge', '0': 'foodbank_distance'}, inplace=True, axis=1)

#market_distance
market_distance = pd.read_csv(data_directory + 'pcd_nmftmarkets_dist.csv')
market_distance['PCD'] = market_distance['PCD'].str.replace(' ','')
market_distance = market_distance.merge(lookup[['pcds', 'lsoa11cd']], left_on='PCD', right_on='pcds', how='left', indicator=True)
market_distance = market_distance.groupby('lsoa11cd')['0'].mean()
market_distance = market_distance.reset_index()
# Exclude Scotland and NI - the data coverage isn't good enough
market_distance = market_distance[~market_distance['lsoa11cd'].str[0].isin(['9', 'S'])]
df = df.merge(market_distance.set_index('lsoa11cd'), left_index=True, right_index=True, how='left', indicator=True)
df.rename({'_merge': 'market_distance_merge', '0': 'market_distance'}, inplace=True, axis=1)

# market 1km count
market_1km_count = pd.read_csv(data_directory + 'postcode_market_1km_count.csv')
market_1km_count['PCD'] = market_1km_count['PCD'].str.replace(' ','')
market_1km_count = market_1km_count.merge(lookup[['pcds', 'lsoa11cd']], left_on='PCD', right_on='pcds', how='left', indicator=True)
market_1km_count = market_1km_count.groupby('lsoa11cd')['overlap_count'].mean()
market_1km_count = market_1km_count.reset_index()
# Exclude Scotland and NI - the data coverage isn't good enough
market_1km_count = market_1km_count[~market_1km_count['lsoa11cd'].str[0].isin(['9', 'S'])]
df = df.merge(market_1km_count.set_index('lsoa11cd'), left_index=True, right_index=True, how='left', indicator=True)
df.rename({'_merge': 'market_1km_count_merge', 'overlap_count': 'market_1km_count'}, inplace=True, axis=1)

# large supermarket distance
supermarket_distance = pd.read_csv(data_directory + 'postcode_to_nearest_large_supermarket_distance.csv')
supermarket_distance['PCD'] = supermarket_distance['PCD'].str.replace(' ','')
supermarket_distance = supermarket_distance.merge(lookup[['pcds', 'lsoa11cd']], left_on='PCD', right_on='pcds', how='left', indicator=True)
supermarket_distance = supermarket_distance.groupby('lsoa11cd')['0'].mean()
df = df.merge(supermarket_distance, left_index=True, right_index=True, how='left', indicator=True)
df.rename({'_merge': 'supermarket_distance_merge', '0': 'supermarket_distance'}, inplace=True, axis=1)

# supermarket 1km count
supermarket_1kmcount = pd.read_csv(data_directory + 'postcode_to_nearest_large_supermarket_1km_count.csv', usecols=['PCD', 'overlap_count'])
supermarket_1kmcount['PCD'] = supermarket_1kmcount['PCD'].str.replace(' ','')
supermarket_1kmcount = supermarket_1kmcount.merge(lookup[['pcds', 'lsoa11cd']], left_on='PCD', right_on='pcds', how='left', indicator=True)
supermarket_1kmcount = supermarket_1kmcount.groupby('lsoa11cd')['overlap_count'].mean()
df = df.merge(supermarket_1kmcount, left_index=True, right_index=True, how='left', indicator=True)
df.rename({'_merge': 'supermarket_1kmcount_merge', 'overlap_count': 'supermarket_1kmcount'}, inplace=True, axis=1)

# Propensity shop online
online_propensity = pd.read_csv(data_directory + 'propensity_shop_online.csv')
online_propensity.set_index('LSOA11CD', inplace=True)
df = df.merge(online_propensity, left_index=True, right_index=True, how='left', indicator=True)
df.rename({'_merge': 'online_propensity_merge', 'zshoponline': 'online_propensity'},inplace=True, axis=1)

# inc_dep
inc_dep_eng = pd.read_csv(data_directory + 'inc_dep_england.csv', index_col=0)
inc_dep_ni = pd.read_csv(data_directory + 'inc_dep_ni.csv', index_col=0)
inc_dep_ni.rename({ "Proportion of the population living in households whose equivalised income is below 60 per cent of the NI median \n(%)":"inc_dep_indicator"}, inplace=True, axis=1)
inc_dep_scotland = pd.read_csv(data_directory + 'inc_dep_scotland.csv', index_col=0)
inc_dep_scotland.rename({"Income_rate":"inc_dep_indicator"}, inplace=True, axis=1)
inc_dep_scotland['inc_dep_indicator'] = pd.to_numeric(inc_dep_scotland.replace("*", pd.to_numeric(inc_dep_scotland[inc_dep_scotland['inc_dep_indicator']!="*"]['inc_dep_indicator']).mean())['inc_dep_indicator'])
inc_dep_wal = pd.read_csv(data_directory + 'inc_dep_wal.csv', index_col=0)
inc_dep_wal.rename({"People in Income Deprivation (%)":"inc_dep_indicator"}, inplace=True, axis=1)
inc_dep_wal['inc_dep_indicator'] = inc_dep_wal['inc_dep_indicator'] / 100.0
inc_dep = pd.concat([inc_dep_eng, inc_dep_ni, inc_dep_scotland, inc_dep_wal])
df = df.merge(inc_dep, left_index=True, right_index=True, how='left', indicator=True)
df.rename({'_merge': 'inc_dep_merge'},inplace=True, axis=1)

# Car access
car = pd.read_csv(data_directory + 'car_access.csv')
car['no_car_indicator'] = car['no_cars_in_household'] / car['households']
car.set_index('geo_code', inplace=True)
df = df.merge(car['no_car_indicator'], left_index=True, right_index=True, how='left', indicator=True)
df.rename({'_merge': 'car_merge'},inplace=True, axis=1)



In [10]:
indicator_cols = [
    # Proximity to and density of retail facilities
    'supermarket_1kmcount', 
    'supermarket_distance', 
    
    # Transport to and accessibility of grocery retail facilities
    'AccessibilityViaPublicTransport', 
    'AverageTravelDistance', 
    
    # E-commerce access
    'online_propensity', 
    'OnlineGroceryAvailability', 
    
    # Neighbourhood socio-economic and demographic
    'no_car_indicator', 
    'inc_dep_indicator', 
    
    # Proximity to and density of non-supermarket food provision
    'nonsupermarket_distance',
    'nonsupermarket_1kmcount',
    'market_1km_count', 
    'market_distance', 
    
    # Food for families
    'fsm_indicator',
    'healthy_start_voucher_uptake', 
    'foodbank_distance', 
    
    #Fuel poverty pressures
    'fuel_poverty_pct',
    'prepayment_prevalence']

priority_places = df[indicator_cols].copy()

priority_places = priority_places.drop_duplicates()

# Drop Isles of Scilly
priority_places.drop('E01019077', inplace=True)

# The first task is to orient each indicator in the correct direction
# i.e. so that high values correspond to higher priority places
priority_places = pd.concat([1 * priority_places[[
                    'supermarket_distance', 
                    'AccessibilityViaPublicTransport', 
                    'AverageTravelDistance', 
                    'no_car_indicator', 
                    'inc_dep_indicator', 
                    'market_distance', 
                    'fuel_poverty_pct', 
                    'prepayment_prevalence', 
                    'nonsupermarket_distance', 
                    'fsm_indicator', 
                    'healthy_start_voucher_uptake']], 
                  -1 * priority_places[[
                      'foodbank_distance', 
                      'supermarket_1kmcount', 
                      'online_propensity', 
                      'OnlineGroceryAvailability', 
                      'market_1km_count', 
                      'nonsupermarket_1kmcount']]], axis=1)

# Find our country-level denominators
priority_places['country'] = priority_places.index.str[0]
country_counts = priority_places.reset_index().groupby('country')['index'].count()
priority_places = priority_places.merge(country_counts, left_on='country', right_index=True, how='inner')
priority_places.rename({'index': 'country_denominator'}, inplace=True, axis=1)


# Perform ranking of each indicator
priority_places.fillna(0, inplace=True)
priority_places_ranked = priority_places.groupby('country').rank(method='min', ascending=False).astype(int)

for c in priority_places_ranked[indicator_cols].columns: 
    priority_places_ranked[c] = (priority_places_ranked[c] - 0.5) / priority_places['country_denominator']
    priority_places_ranked[c] = sp.stats.norm.ppf(priority_places_ranked[c],loc=0,scale=1)
    
priority_places_ranked['country'] = priority_places_ranked.index.str[0]

#Combine transformed indicators into domains
priority_places_ranked['domain_supermarket_proximity'] = 0.5 * priority_places_ranked[['supermarket_distance', 'supermarket_1kmcount']].sum(axis=1)
priority_places_ranked['domain_supermarket_accessibility'] = 0.5 * priority_places_ranked[['AccessibilityViaPublicTransport', 'AverageTravelDistance']].sum(axis=1)
priority_places_ranked['domain_ecommerce_access'] = 0.5 * priority_places_ranked[['online_propensity', 'OnlineGroceryAvailability']].sum(axis=1)
priority_places_ranked['domain_socio_demographic'] = (1./2.) * priority_places_ranked[[ 'no_car_indicator', 'inc_dep_indicator']].sum(axis=1)
priority_places_ranked['domain_nonsupermarket_proximity'] = (1./4.) * priority_places_ranked[['nonsupermarket_distance','nonsupermarket_1kmcount','market_1km_count', 'market_distance']].sum(axis=1)
priority_places_ranked['domain_food_for_families'] = (1./4.) * priority_places_ranked[['foodbank_distance', 'healthy_start_voucher_uptake', 'fsm_indicator']].sum(axis=1)
priority_places_ranked['domain_fuel_poverty'] = 0.5 * priority_places_ranked[['fuel_poverty_pct','prepayment_prevalence']].sum(axis=1)

domain_columns = ['domain_supermarket_proximity', 
                  'domain_supermarket_accessibility', 
                  'domain_ecommerce_access', 
                  'domain_socio_demographic', 
                  'domain_nonsupermarket_proximity', 
                  'domain_food_for_families', 
                  'domain_fuel_poverty']

# Rank the domains
priority_places_domains = priority_places_ranked[domain_columns + ['country']].groupby('country').rank(method='min').astype(int)
priority_places_domains['country'] = priority_places_domains.index.str[0]

priority_places_domains = priority_places_domains.merge(country_counts, left_on='country', right_index=True, how='inner')
priority_places_domains.rename({'index': 'country_denominator'}, inplace=True, axis=1)

priority_places_domains_normalised = pd.DataFrame(columns=priority_places_domains[domain_columns].columns)
for c in priority_places_domains[domain_columns].columns:
    priority_places_domains_normalised[c] = -23 * np.log(1 - (priority_places_domains[c] / priority_places_domains['country_denominator']) * (1 - np.exp(- 100 / 23)))

priority_places_domains['combined'] = (1./8.) * priority_places_domains_normalised['domain_supermarket_proximity'] + \
(1./8.) * priority_places_domains_normalised['domain_supermarket_accessibility'] + \
(1./8.) * priority_places_domains_normalised['domain_ecommerce_access'] + \
(1./8.) * priority_places_domains_normalised['domain_nonsupermarket_proximity'] + \
(1./6.) * priority_places_domains_normalised['domain_socio_demographic'] + \
(1./6.) * priority_places_domains_normalised['domain_food_for_families'] + \
(1./6.) * priority_places_domains_normalised['domain_fuel_poverty']

priority_places_domains['combined'] = priority_places_domains[['country', 'combined']].groupby('country').rank(method='min').astype(int)

priority_places_deciles = priority_places_domains.copy()
for country in ['E', 'S', 'W', '9']:
    for col in domain_columns + ['combined']:
        if country == '9' and col in ['domain_ecommerce_access', 'domain_supermarket_accessibility', 'domain_fuel_poverty']:
            priority_places_deciles.loc[priority_places_deciles['country']==country, col] = 0
        else:
            priority_places_deciles.loc[priority_places_deciles['country']==country, col] = pd.to_numeric(pd.qcut(priority_places_domains.loc[priority_places_deciles['country']==country, col], 10, duplicates='drop', labels=range(1,11)))
            
priority_places_full = priority_places_domains.merge(priority_places_deciles, left_index=True, right_index=True, suffixes=('', '_decile'))
priority_places_full.drop(['country_decile', 'country_denominator_decile'], axis=1, inplace=True)           

priority_places_full.loc[priority_places.index.str.startswith('9'), 
                        ['domain_supermarket_accessibility', 
                         'domain_ecommerce_access', 
                         'domain_fuel_poverty', 
                         'domain_supermarket_accessibility_decile', 
                         'domain_ecommerce_access_decile', 
                         'domain_fuel_poverty_decile']] = pd.NA

# Rename decile columns to align with original field names
priority_places_full.rename({'domain_supermarket_proximity_decile': 'pp_dec_domain_supermarket_proximity',
                             'domain_supermarket_accessibility_decile': 'pp_dec_domain_supermarket_accessibility',
                             'domain_ecommerce_access_decile': 'pp_dec_domain_ecommerce_access',
                             'domain_socio_demographic_decile': 'pp_dec_domain_socio_demographic',
                             'domain_nonsupermarket_proximity_decile': 'pp_dec_domain_nonsupermarket_proximity',
                             'domain_food_for_families_decile': 'pp_dec_domain_food_for_families',
                             'domain_fuel_poverty_decile': 'pp_dec_domain_fuel_poverty',
                             'combined_decile': 'pp_dec_combined'}, inplace=True, axis=1)

priority_places_full.loc[priority_places.index.str.startswith('9'), ['country']] = 'NI'

priority_places_full.to_csv('priority_places_index_created_%s.csv' % datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

In [None]:
# Reduce weighting on accessibility domains to test London specific version of the index
priority_places_domains['combined'] = (1./12.) * priority_places_domains_normalised['domain_supermarket_proximity'] + \
(1./12.) * priority_places_domains_normalised['domain_supermarket_accessibility'] + \
(1./12.) * priority_places_domains_normalised['domain_ecommerce_access'] + \
(1./12.) * priority_places_domains_normalised['domain_nonsupermarket_proximity'] + \
(2./9.) * priority_places_domains_normalised['domain_socio_demographic'] + \
(2./9.) * priority_places_domains_normalised['domain_food_for_families'] + \
(2./9.) * priority_places_domains_normalised['domain_fuel_poverty']

priority_places_domains['combined'] = priority_places_domains[['country', 'combined']].groupby('country').rank(method='min').astype(int)

priority_places_deciles = priority_places_domains.copy()
for country in ['E', 'S', 'W', '9']:
    for col in domain_columns + ['combined']:
        if country == '9' and col in ['domain_ecommerce_access', 'domain_supermarket_accessibility', 'domain_fuel_poverty']:
            priority_places_deciles.loc[priority_places_deciles['country']==country, col] = 0
        else:
            priority_places_deciles.loc[priority_places_deciles['country']==country, col] = pd.to_numeric(pd.qcut(priority_places_domains.loc[priority_places_deciles['country']==country, col], 10, duplicates='drop', labels=range(1,11)))
            
priority_places_full = priority_places_domains.merge(priority_places_deciles, left_index=True, right_index=True, suffixes=('', '_decile'))
priority_places_full.drop(['country_decile', 'country_denominator_decile'], axis=1, inplace=True)           

priority_places_full.loc[priority_places.index.str.startswith('9'), 
                        ['domain_supermarket_accessibility', 
                         'domain_ecommerce_access', 
                         'domain_fuel_poverty', 
                         'domain_supermarket_accessibility_decile', 
                         'domain_domain_ecommerce_access_decile', 
                         'domain_fuel_poverty_decile']] = pd.NA

priority_places_full.loc[priority_places.index.str.startswith('9'), ['country']] = 'NI'

priority_places_full.to_csv(data_directory + 'priority_places_london_weighting.csv')