In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
import fiona
import folium
import glob
import matplotlib.pyplot as plt

# Create crowding for LA, LSOA and OA

In [2]:
list_paths2 = glob.glob("./../../../../../../Desktop/MAV/all_certificates/*.csv")

data = pd.DataFrame()

for i in range(len(glob.glob('./../../../../../../Desktop/MAV/all_certificates/*.csv'))):
    x = pd.read_csv(list_paths2[i],usecols = ['UPRN','TOTAL_FLOOR_AREA','LATITUDE','LONGITUDE'])
    data = pd.concat([data,x])

In [3]:
data = gpd.GeoDataFrame(data, geometry = gpd.points_from_xy(data.LONGITUDE,data.LATITUDE),crs='EPSG:4326')

In [4]:
# read the boundary data
LA = gpd.read_file('../Data/boundaries/LAD_Dec_2021_GB_BFC_2022_-8975151699474964544.geojson').to_crs('EPSG:4326')
#extract boundary ID to EPCs
epc3_LA = data.sjoin(LA.loc[:,['LAD21CD','LAD21NM','geometry']], how = 'left')
#group EPCs by the boundary ID
house_area_LA = epc3_LA.loc[:,['LAD21CD', 'TOTAL_FLOOR_AREA']].groupby(['LAD21CD']).agg(median_house_area = ('TOTAL_FLOOR_AREA','median'),
                                                                    count_houses = ('TOTAL_FLOOR_AREA','count')).reset_index()
# remove boundary IDs where there is not enough EPCs
house_area_LA = house_area_LA[house_area_LA['count_houses']>11]
# merge the epc data to boundaries
LA = LA.merge(house_area_LA, on = 'LAD21CD', how = 'right')
# load census
LA_census = pd.read_csv('./../Data/census/2021/census2021-ts001/census2021-ts001-ltla.csv'
                        ).rename(columns = {'geography':'LAD21NM' , 'geography code':'LAD21CD','Residence type: Lives in a household; measures: Value':'household_pop'}
                                 ).iloc[:,[2,4]]
# merge to the data
LA = LA.merge(LA_census, on = 'LAD21CD', how = 'left')
# create crowd factor
LA['pop_per_floor'] = LA['household_pop'] / LA['median_house_area'] 
# so N_people / house area
# normalize
LA['pop_per_floor_stand'] = (LA['pop_per_floor']-LA['pop_per_floor'].min()) / (LA['pop_per_floor'].max()-LA['pop_per_floor'].min())
# export
LA.to_file('./../Data/crowd_factor/crowd_LA.geojson')

In [14]:
LA.to_file('./../Data/crowd_factor/crowd_LA.geojson')

In [15]:
del LA
del epc3_LA
del LA_census
del house_area_LA

In [16]:

LSOA = gpd.read_file('./../Data/boundaries/LSOA_2021.geojson').to_crs('EPSG:4326')
epc_LSOA = data.sjoin(LSOA.loc[:,['LSOA21CD','geometry']], how = 'left')

house_area_LSOA = epc_LSOA.loc[:,['LSOA21CD', 'TOTAL_FLOOR_AREA']].groupby(['LSOA21CD']).agg(median_house_area = ('TOTAL_FLOOR_AREA','median'),
                                                                    count_houses = ('TOTAL_FLOOR_AREA','count')).reset_index()
house_area_LSOA = house_area_LSOA[house_area_LSOA['count_houses']>5]

LSOA = LSOA.merge(house_area_LSOA, on = 'LSOA21CD', how = 'right')

# load census
LSOA_census = pd.read_csv('./../Data/census/2021/census2021-ts001/census2021-ts001-lsoa.csv'
                        ).rename(columns = {'geography code':'LSOA21CD','Residence type: Lives in a household; measures: Value':'household_pop'}
                                 ).iloc[:,[2,4]]
# merge to the data
LSOA = LSOA.merge(LSOA_census, on = 'LSOA21CD', how = 'left')
# create crowd factor
LSOA['pop_per_floor'] = LSOA['household_pop'] / LSOA['median_house_area']

LSOA['pop_per_floor_stand'] = (LSOA['pop_per_floor']-LSOA['pop_per_floor'].min()) / (LSOA['pop_per_floor'].max()-LSOA['pop_per_floor'].min())
# export
LSOA.to_file('./../Data/crowd_factor/crowd_LSOA.geojson')

del LSOA
del epc_LSOA
del LSOA_census
del house_area_LSOA

In [18]:
OA21 = gpd.read_file('./../Data/boundaries/Output_Areas/Output_Areas_Dec_2021_Boundaries_Generalised_Clipped_EW_BGC_2022_-5131276949491721609.geojson').to_crs('EPSG:4326')
epc_OA = data.sjoin(OA21.loc[:,['OA21CD','geometry']], how = 'left')

house_area_OA = epc_OA.loc[:,['OA21CD', 'TOTAL_FLOOR_AREA']].groupby(['OA21CD']).agg(median_house_area = ('TOTAL_FLOOR_AREA','median'),
                                                                    count_houses = ('TOTAL_FLOOR_AREA','count')).reset_index()
house_area_OA = house_area_OA[house_area_OA['count_houses']>5]

OA21 = OA21.merge(house_area_OA, on = 'OA21CD', how = 'right')

# load census
OA_census = pd.read_csv('./../Data/census/2021/census2021-ts001/census2021-ts001-oa.csv'
                        ).rename(columns = {'geography code':'OA21CD','Residence type: Lives in a household; measures: Value':'household_pop'}
                                 ).iloc[:,[2,4]]
# merge to the data
OA21 = OA21.merge(OA_census, on = 'OA21CD', how = 'left')
# create crowd factor
OA21['pop_per_floor'] = OA21['household_pop'] / OA21['median_house_area']
# export
OA21.to_file('./../Data/crowd_factor/crowd_OA.geojson')

In [31]:

del OA21
del epc_OA
del OA_census
del house_area_OA
