https://www.api.gov.uk/ons/open-geography-portal/#open-geography-portal

# Libraries

In [None]:
import numpy as np
import pandas as pd

import json
import urllib.request

In [None]:
pd.set_option('display.max_columns', None)
pd.set_option("max_colwidth", None)

## Shape files from ONS

https://geoportal.statistics.gov.uk/

https://services1.arcgis.com/ESMARspQHYMw9BZ9/ArcGIS/rest/services

## Retrieve layer data

In [None]:
url = 'https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/Clinical_Commissioning_Groups_April_2019_Boundaries_EN_BUC_2022/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson'

region_url = 'https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/Regions_December_2022_EN_BUC/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson'

In [None]:
def create_geojson_url(dataset_name:str):
    url = 'https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/' + dataset_name + '/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson'
    return url

In [None]:
def retrieve_geojson_from_url(url:str):
    with urllib.request.urlopen(url) as contents:
        geojson_data = json.loads(contents.read())
    
    shape_data = pd.json_normalize(geojson_data['features'])
    return shape_data

In [None]:
def save_geojson_from_url(url:str, filename:str):
    filename = filename + '.csv'
    shape_data = retrieve_geojson_from_url(url)
    shape_data.to_csv(filename, index=False)
    print(filename + ' successfully saved')

In [None]:
def save_geojson_from_dataset_name(dataset_name:str):
    url = create_geojson_url(dataset_name)
    save_geojson_from_url(url, dataset_name)

In [None]:
# with urllib.request.urlopen(url) as contents:
#     json_data = json.loads(contents.read())
# shape_data = pd.json_normalize(json_data['features'])

# shape_data.to_csv('test_shape_data.csv')

In [None]:
# with urllib.request.urlopen(region_url) as contents:
#     json_data = json.loads(contents.read())
# region_shape_data = pd.json_normalize(json_data['features'])

# region_shape_data.to_csv('test_region_shape_data.csv')

In [None]:
# with open('region/region_merged.geojson', 'r') as contents:
#     json_data = json.loads(contents.read())
# region_shape_data = pd.json_normalize(json_data['features'])
# region_shape_data.to_csv('region/region_merged_geojson_cleaned.csv')



## Shape directory

In [None]:
shape_directory_url = 'https://services1.arcgis.com/ESMARspQHYMw9BZ9/ArcGIS/rest/services?f=pjson'

with urllib.request.urlopen(shape_directory_url) as contents:
    shape_directory = json.loads(contents.read())
shape_directory = pd.json_normalize(shape_directory['services'])
shape_directory = shape_directory.loc[shape_directory['type']=='FeatureServer']

In [None]:
def shape_name_contains(str1:str, str2:str):
    str1 = str1.lower()
    str2 = str2.lower()
    return shape_directory[(shape_directory['name'].str.lower().str.contains(str1)) & (shape_directory['name'].str.lower().str.contains(str2))]

### icb

In [None]:
icb_shape_directory = shape_name_contains('icb_jul_2022', 'buc')
icb_shape_directory

In [None]:
# save_geojson_from_dataset_name(icb_shape_directory['name'].values[0])

### sub_icb

In [None]:
sub_icb_shape_directory = pd.concat([shape_name_contains('sub_integrated_care_board', 'bgc'),
                                     shape_name_contains('sub_integrated_care_board', 'buc')
])

In [None]:
sub_icb_shape_directory

In [None]:
# save_geojson_from_dataset_name(sub_icb_shape_directory['name'].values[0])

### ccg

In [None]:
ccg_shape_directory = pd.concat([shape_name_contains('buc', 'clinical_commissioning_groups'), # 2019, 2021
                                 shape_name_contains('generalised', 'clinical_commissioning_groups'), # 2020
                                 shape_name_contains('ugcb', 'ccg') # 2015 to 2018
])

In [None]:
ccg_shape_directory

In [None]:
# for i in ccg_shape_directory['name']:
#     # print(i)
#     save_geojson_from_dataset_name(i)

### ltla

In [None]:
ltla_shape_directory = pd.concat([shape_name_contains('local_authority_districts', 'uk_buc'), # 2019 to 2023
                                  shape_name_contains('lad', '2018_Boundaries_UK_BUC')
])

ltla_shape_directory

# shape_name_contains('local_authority_districts', 'generalised') # 2018
# shape_name_contains('local_authority_districts', 'ugcb') # 2019, 2020

In [None]:
# for i in ltla_shape_directory['name']:
#     # print(i)
#     save_geojson_from_dataset_name(i)

### utla

In [None]:
utla_shape_directory = pd.concat([shape_name_contains('counties_and_unitary_authorities', 'ew_buc'), # 2017, 2019
                                  shape_name_contains("counties_and_unitary_authorities_december_2018", 'gcb_ew'), # 2018
                                  shape_name_contains('counties_and_unitary_authorities', '2020_uk_bgc'), # 2020
                                  shape_name_contains('counties_and_unitary_authorities', 'en_buc'), # 2021
])

In [None]:
utla_shape_directory

In [None]:
# for i in utla_shape_directory['name']:
#     # print(i)
#     save_geojson_from_dataset_name(i)

### region

In [None]:
region_shape_directory = pd.concat([shape_name_contains('Regions_December_2022_EN_BUC', '')
])

region_shape_directory

In [None]:
# save_geojson_from_dataset_name(region_shape_directory['name'].values[0])

### STPs (pre Apr 2020) and STPs (from Apr 2020)

In [None]:
stp_shape_directory = pd.concat([shape_name_contains('sustainability_and_transformation', 'buc'), # 2018, 2021
                                 shape_name_contains('stp', '_gcb') # 2017, 2019, 2020
])

In [None]:
stp_shape_directory

In [None]:
# for i in stp_shape_directory['name']:
#     save_geojson_from_dataset_name(i)

# append files together

In [None]:
gsheet_areas = pd.read_csv('https://docs.google.com/spreadsheets/u/0/d/15RhWWsHPPMLWoxR5sJcpK-vraRkidRY8jsAb_Y_5GwI/gviz/tq?tqx=out:csv&tq&gid=963757659&headers=1')
files_required = gsheet_areas.loc[:, ['shape_file']].dropna()

In [None]:
area_id_to_shape_file = gsheet_areas.loc[:, ['AreaTypeId', 'shape_file']].dropna()

In [None]:
def code_and_name_fields(folder_name):
    final_list=[]
    
    for i in files_required.values:
        filename = i[0]
        filepath = folder_name + '/' + filename
        data = pd.read_csv(filepath)
        
        to_append_list = [filename, data.filter(regex='CD|cd').columns[0], data.filter(regex='NM|nm').columns[0]]
        
        final_list += [to_append_list]
        final_df = pd.DataFrame(final_list, columns=['shape_file', 'code_field', 'name_field'])
        
    return final_df


In [None]:
data = code_and_name_fields('shapes')
# data.to_csv('2023-05-19_shape_code_and_name_fields.csv', index=False)
# data

In [175]:
def append_shapes(folder_name):
    shape_fields = pd.read_csv('2023-05-19_shape_code_and_name_fields.csv')
    
    appended_shapes = pd.DataFrame()
    
    for i, j in enumerate(files_required.values):
        filename = j[0]
        filepath = folder_name + '/' + filename
        shapes = pd.read_csv(filepath)
        
        rename_cols = {'code_field': 'area_code',
                       'name_field': 'area_name'}
        
        column_mapper_reversed = shape_fields.loc[shape_fields['shape_file']==filename, ['code_field', 'name_field']].rename(rename_cols, axis=1).loc[i].to_dict()
        column_mapper = {value: key for key, value, in column_mapper_reversed.items()}
        
        shapes = shapes.rename(column_mapper, axis=1)
        shapes['area_type_id'] = area_id_to_shape_file.loc[area_id_to_shape_file['shape_file']==filename, ['AreaTypeId']].values[0][0]
        shapes['area_code'] = shapes['area_code'] + '_' + shapes['area_type_id'].astype(str)
              
        appended_shapes = pd.concat([appended_shapes, shapes])
        
    return appended_shapes


In [176]:
# append_shapes('shapes').to_csv('shapes/2023-05-19_shapes_appended.csv', index=False)