https://www.api.gov.uk/ons/open-geography-portal/#open-geography-portal

# Libraries

In [None]:
!pip install fingertips_py
!pip install requests

import fingertips_py as ftp
import requests
import io
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import pandas as pd

import json
import urllib.request
import re

from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator

import os
!pip install geopandas
import geopandas as gpd

!pip install geomet
from geomet import wkt

import gspread_dataframe as gd

import shapely

from functools import reduce

from google.colab import auth
auth.authenticate_user()
import gspread
from google.auth import default
creds, _ = default()
gc = gspread.authorize(creds)

In [None]:
pd.set_option('display.max_columns', None)
pd.set_option("max_colwidth", None)

In [None]:
area_shape_year_dict = {
    'from Apr 2021': 202122,
    'pre Apr 2019': 1,
    '2020/21': 202021,
    '2019/20': 201920,
    '2018/19': 201819,
    '2017/18': 201718,
    'No year provided': 9999
}

## Shape files from ONS

https://geoportal.statistics.gov.uk/

In [None]:
def load_ons_open_geog_data(url, boundary_file=False, drop_geometry=True, england_only=True, code_field=None):
    with urllib.request.urlopen(url) as url:
        json_data = json.loads(url.read().decode())

    data = pd.json_normalize(json_data['features'])
    data.columns = [i.replace('attributes.', '') for i in list(data.columns)]

    if boundary_file:
        if drop_geometry:
            data = data.drop('geometry.rings', axis=1)

        if england_only:
            data = data[data[code_field].str[0] == 'E']

    return data

Counties and UAs

In [None]:
counties_uas_dec_2021_url = 'https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/Counties_and_Unitary_Authorities_December_2021_UK_BUC/FeatureServer/0/query?where=1%3D1&outFields=CTYUA21CD,CTYUA21NM,CTYUA21NMW&outSR=4326&f=json'

counties_uas_dec_2021 = load_ons_open_geog_data(counties_uas_dec_2021_url, boundary_file = True, drop_geometry = False, england_only=True, code_field='CTYUA21CD')

counties_uas_dec_2021['geometry.rings']  = '[' + counties_uas_dec_2021['geometry.rings'].astype(str) + ']'

counties_uas_dec_2021 = counties_uas_dec_2021.drop('CTYUA21NMW', axis=1)

counties_uas_dec_2021 = counties_uas_dec_2021.rename(columns={"geometry.rings": "shape"})

counties_uas_dec_2021['CTYUA21CD'] = counties_uas_dec_2021['CTYUA21CD'] + '_202122'

In [None]:
counties_uas_dec_2021.head(1)

In [None]:
# from google.colab import drive
# drive.mount('drive')
# counties_uas_dec_2021.to_csv('/content/drive/Shared drives/Business Intelligence/Underserved Communities/Themes/Liver disease/counties_uas_dec_2021.csv', index=False)

CCGs

In [None]:
!pip install arcgis --no-deps
!pip install cryptography
!pip install requests_ntlm
!pip install ujson
!pip install requests_toolbelt
!pip install ntlm_auth
!pip install six

In [None]:
from arcgis.gis import GIS
gis=GIS()

In [None]:
def extract_ons_geoportal_shapes(id, code, name, shape, str_year):
    data = gis.content.get(id)

    for lyr in data.layers:
        print(lyr.properties.name)

    for f in data.layers[0].properties.fields:
        print(f['name'])

    fset = data.layers[0].query(out_sr='4326')
    df= fset.sdf #4326 if long lat

    print(df.columns)

    df = df.dropna(axis=0, subset=[shape])

    df[shape] = pd.DataFrame(df[shape].astype(str).str.replace('{"rings": ','[').str.replace(', "spatialReference": {"wkid": 4326, "latestWkid": 4326}}',']'))

    df_final = df.loc[:, [code, name, shape]]

    df_final[code] = df_final[code] + str_year

    return df_final

In [None]:
ccgs_apr_2021_df_final =  extract_ons_geoportal_shapes('d6acd30ad71f4e14b4de808e58d9bc4c', 'CCG21CD', 'CCG21NM', 'SHAPE', '_202122')

In [None]:
# drive.mount('drive')
# ccgs_apr_2021_df_final.to_csv('/content/drive/Shared drives/Business Intelligence/Underserved Communities/Themes/Liver disease/ccgs_apr_2021.csv', index=False)

In [None]:
ccgs_apr_2019_df_final  = extract_ons_geoportal_shapes('290defe41b3d46fb9a5898ef8977d37f', 'CCG19CD', 'CCG19NM', 'SHAPE', '_201920')

In [None]:
ccgs_apr_2019_df_final

In [None]:
ccgs_apr_2019_df_final.CCG19CD.isin(data_prepared[data_prepared['Indicator ID'] == 'iod_2019']['Area Code']).sum()

In [None]:
# drive.mount('drive')
# ccgs_apr_2019_df_final.to_csv('/content/drive/Shared drives/Business Intelligence/Underserved Communities/Themes/Liver disease/ccgs_apr_2019.csv', index=False)

In [None]:
print(
    len(data_prepared['Area Code'].unique()), 
len(list(ccgs_apr_2019_df_final.CCG19CD.unique()) + 
list(ccgs_apr_2021_df_final.CCG21CD.unique()) +
list(counties_uas_dec_2021.CTYUA21CD.unique()))
)

(pd.DataFrame(data_prepared['Area Code'].unique()).isin((list(ccgs_apr_2019_df_final.CCG19CD.unique()) + 
list(ccgs_apr_2021_df_final.CCG21CD.unique()) +
list(counties_uas_dec_2021.CTYUA21CD.unique())))
).sum()

## Shapes

### Trust

In [None]:
ni_trust_dict = {'BHSCT': 'ZT001',
                 'NHSCT' : 'ZT002',
                 'WHSCT' : 'ZT005',
                 'SHSCT' : 'ZT003',
                 'SEHSCT' : 'ZT004'}

In [None]:
os.chdir(root_data_ni)
os.listdir()

In [None]:
trustboundaries = gpd.GeoDataFrame.from_file(root_data_ni + '/trustboundaries.geojson')
trustboundaries['geometry_simplified'] = trustboundaries.geometry.simplify(tolerance = 0.01)
trustboundaries['geometry_simplified_str'] = trustboundaries.geometry_simplified.astype('str')
trustboundaries['shape'] = None

for i in range(len(trustboundaries)):
    trustboundaries['shape'].iloc[i] = str(wkt.loads(trustboundaries.geometry_simplified_str[i])['coordinates'])

trustboundaries['TrustCode'] = trustboundaries['TrustCode'].replace(ni_trust_dict)
trustboundaries['TrustCode'] = trustboundaries['TrustCode'] + '_202122'

In [None]:
trustboundaries.columns

In [None]:
trustboundaries_final = trustboundaries.loc[:, ['TrustName', 'TrustCode', 'shape']]

In [None]:
trustboundaries_final

In [None]:
run = False

if run ==False:
    print("This has not run.")
else:
    from google.colab import auth
    auth.authenticate_user()
    import gspread
    from google.auth import default
    creds, _ = default()
    gc = gspread.authorize(creds)

    northern_ireland_trustboundaries = gc.open_by_key('1O5RxtTSP2UH372uh7nr_aTlJCBXMyP4mZzTBpKAgTqo')
    northern_ireland_trustboundaries = northern_ireland_trustboundaries.worksheet("Data")

    gd.set_with_dataframe(northern_ireland_trustboundaries, trustboundaries_final)

### Local Government Districts

In [None]:
ni_lgd_id = 'd7572a9e2f1b4638ab0430785c0da3e8'

In [None]:
ni_lgd_final =  extract_ons_geoportal_shapes(ni_lgd_id, 'LGDCode', 'LGDNAME', 'SHAPE', '_202122')

In [None]:
# # get a GeoJSON string representation of the FeatureSet
# gjson_string = fset.to_json

# # # read GeoJSON string into a dict
# gjson_dict = json.loads(gjson_string)

# gdf = gpd.GeoDataFrame.from_features(ni_lgd_final['shape'])
# # may need to specify CRS and geometry column name after GeoDataFrame construction

In [None]:
# # get the hosted feature layer
# flayer = gis.content.get(ni_lgd_id).layers[0]

# # .query() returns a FeatureSet
# fset = flayer.query(result_offset = 5)

# # # get a GeoJSON string representation of the FeatureSet
# # gjson_string = fset.to_geojson

# # # read GeoJSON string into a dict
# # gjson_dict = json.loads(gjson_string)

# # gdf = gpd.GeoDataFrame.from_features(gjson_dict['features'])

In [None]:
# run = False

# if run ==False:
#     print("This has not run.")
# else:
#     from google.colab import auth
#     auth.authenticate_user()
#     import gspread
#     from google.auth import default
#     creds, _ = default()
#     gc = gspread.authorize(creds)

#     northern_ireland_local_government_districts = gc.open_by_key('1CThoGPHzShopzHG-6ItP3781joZ_HRPDAbFqoEyjUrk')
#     northern_ireland_local_government_districts = northern_ireland_local_government_districts.worksheet("Data")

#     gd.set_with_dataframe(northern_ireland_local_government_districts, ni_lgd)

In [None]:
lgd = gpd.GeoDataFrame.from_file(root_data_ni + '/Local_Government_Districts.geojson')
lgd = lgd.dropna(axis=0, subset=['geometry'])
lgd['geometry_simplified'] = lgd.geometry.simplify(tolerance = 0.01)
lgd['geometry_simplified_str'] = lgd.geometry_simplified.astype('str')
lgd['shape'] = None

for i in range(len(lgd)):
    lgd['shape'].iloc[i] = str(wkt.loads(lgd.geometry_simplified_str[i])['coordinates'])

lgd['LGDCode'] = lgd['LGDCode'] + '_202122'

In [None]:
lgd.columns

In [None]:
lgd_final = lgd.loc[:, ['LGDNAME', 'LGDCode', 'shape']]

In [None]:
run = False

if run ==False:
    print("This has not run.")
else:
    from google.colab import auth
    auth.authenticate_user()
    import gspread
    from google.auth import default
    creds, _ = default()
    gc = gspread.authorize(creds)

    northern_ireland_local_government_districts = gc.open_by_key('1CThoGPHzShopzHG-6ItP3781joZ_HRPDAbFqoEyjUrk')
    northern_ireland_local_government_districts = northern_ireland_local_government_districts.worksheet("Data")

    gd.set_with_dataframe(northern_ireland_local_government_districts, lgd_final)