## Day 1: Points

Landfill sites in the UK

In [11]:
#setup
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import glob
import os

In [82]:
#load geojson
gdf = gpd.read_file("Permitted_Waste_Sites_Authorised_Landfill_Site_Boundaries.geojson")

In [160]:
#load data sets
folder_path = os.getcwd()
excel_files = glob.glob(os.path.join(folder_path, '*.xlsx'))
years = np.arange(2015, 2025, 1)
rows_to_ignore = [ 17, 7, 7, 7, 7, 7, 9 , 8, 7, 7 ]
dataframes = []

#loop through each file, clean up and combine at the end 
for i, file in enumerate(excel_files):
    frame = pd.read_excel(file, skiprows=rows_to_ignore[i])
    frame["Year"] = years[i]
    if years[i] < 2023:
        frame["Environmental Permitting Reference (EPR)"] = None
        cols = frame.columns.tolist()
        new_order = [cols[-1]] + cols[:-1]
        frame = frame[new_order]
    frame.columns = ["epr", "legacy_permit", "operator_name", "facility_name", "facility_address", "ea_area",
                  "region", "sub_region", "local_authority", "site_type", "remaining_capacity", "year", ]
    dataframes.append(frame)

df = pd.concat(dataframes, ignore_index=True)

#fix incorrect legacy permit code
df.loc[(df["operator_name"]=="Cory Environmental (Gloucestershire)Limited") &
   (df["legacy_permit"]=="TP3735PA"), "legacy_permit"] = "NP3736DS"

df["postcodes"] = df["facility_address"].str.replace(",", "").str[-8:].str.strip()


df

Unnamed: 0,epr,legacy_permit,operator_name,facility_name,facility_address,ea_area,region,sub_region,local_authority,site_type,remaining_capacity,year,postcodes
0,,10264,HH AND DE Drew Ltd,Lower Farm Landfill,"Lower Farm, Lower Pennington, New Milton SO41 8DF",Solent and South Downs,South East,Hampshire,New Forest,Inert Landfill,Closed,2015,SO41 8DF
1,,10285,Westridge Developments Ltd,Lynn Pit Landfill,"Briddlesford Road, Down End PO30 2PD",Solent and South Downs,South East,Isle of Wight,Isle of Wight,Inert Landfill,Closed,2015,PO30 2PD
2,,19737,Inert Waste Recycling Limited,Boxgrove Landfill,"Tinwood Lane, Eartham, Chichester, West Sussex...",Solent & South Downs,South East,West Sussex,Chichester,Inert Landfill,640000,2015,PO18 0NB
3,,21785,Leese's Limited,Kenbury Wood Landfill,"Kenbury Wood, Exminster EX6 7XD",Devon & Cornwall,South West,Devon,Teignbridge,Inert Landfill,512022,2015,EX6 7XD
4,,23517,Harley Jack,Chitterne Waste Management Facility,"Valley Farm, Chitterne, Warminster, Wiltshire...",Wessex,South West,Wiltshire,Wiltshire,Inert Landfill,0,2015,BA12 0LT
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5257,ZP3533GG,ZP3533GG,Margetts Pit Limited,Margetts Pit SNRHW Landfill,"Margetts Lane, Burham, Rochester, Kent ME1 3RQ",Kent and South London,South East,Kent,Tonbridge and Malling,L02 - Non Hazardous Landfill With SNRHW cell,0,2024,ME1 3RQ
5258,ZP3606BG,ZP3606BG,British Steel Limited,Crosby North Landfill,"Daws Lane, Scunthorpe",East Midlands,Yorkshire & the Humber,Former Humberside,North Lincolnshire,L04 - Non Hazardous,1641301,2024,unthorpe
5259,ZP3832SQ,210109,Cemex U K Materials Ltd,Great Westwood Landfill,"Great Westwood Landfill, Fir Tree Hill, Chand...",Herts and North London,East of England,Hertfordshire,Three Rivers,L05 - Inert Landfill,0,2024,WD3 4LY
5260,ZP3835JD,406424,Escrick Environmental Services Limited,Escrick Soil Landfill Site,"Escrick Environmental Services Ltd, The Old B...",Yorkshire,Yorkshire & the Humber,North Yorkshire,North Yorkshire,L05 - Inert Landfill,428521,2024,YO19 6ED


## Exploration

In [91]:
df[df["epr"]=="BJ6003IF"]

Unnamed: 0,epr,legacy_permit,operator_name,facility_name,facility_address,ea_area,region,sub_region,local_authority,site_type,remaining_capacity,year
4213,BJ6003IF,BJ6003IF,Veolia ES Landfill Limited,New Albion Landfill Site,"Occupation Road, Spring Cottage, Albert Villag...",West Midlands,East Midlands,Leicestershire,North West Leicestershire,L04 - Non Hazardous,0,2023
4748,BJ6003IF,BJ6003IF,Veolia ES Landfill Limited,New Albion Landfill Site,"Occupation Road, Spring Cottage, Albert Villag...",West Midlands,East Midlands,Leicestershire,North West Leicestershire,L04 - Non Hazardous,0,2024


In [79]:
#Scope of data
permit_counts = df['legacy_permit'].value_counts().reset_index()
frequent_permits = permit_counts[permit_counts["count"] > 9]
print("N of providers in 10 files:", len(frequent_permits))

print("Unique epr codes:", df["epr"].nunique())
print("Unique legacy codes:", df["legacy_permit"].nunique())
print("Unique operator names:", df["operator_name"].nunique())
print("Unique facility names:", df["facility_name"].nunique())

N of providers in 10 files: 422
Unique epr codes: 540
Unique legacy codes: 629
Unique operator names: 534
Unique facility names: 741


In [159]:
uk_postcode_pattern = r'^[A-Z]{1,2}\d{1,2}[A-Z]?\s?\d[A-Z]{2}$'
invalid_df = df[~df['postcodes'].str.upper().str.match(uk_postcode_pattern, na=False)]
invalid_df["postcodes"].tolist()

['edbridge',
 'ow Essex',
 'r Surrey',
 'W 18 4JX',
 'Rutland',
 nan,
 'zard LU7',
 nan,
 'yldesley',
 'nemouth',
 'DN6  7EX',
 'y S9 1LH',
 'CW 15NG',
 'd S3 8AG',
 'd S9 1TR',
 'LS27 OJA',
 'unthorpe',
 'ersfield',
 'heshire',
 nan,
 'nemouth',
 'LS27 OJA',
 'unthorpe',
 'ow Essex',
 'edbridge',
 'DN6  7EX',
 'W 18 4JX',
 'y S9 1LH',
 'heshire',
 'ersfield',
 'r Surrey',
 'CW 15NG',
 'yldesley',
 'd S3 8AG',
 nan,
 'zard LU7',
 'd S9 1TR',
 'Rutland',
 'edbridge',
 'W 18 4JX',
 'Rutland',
 nan,
 'zard LU7',
 'ank Lydd',
 'yldesley',
 'nemouth',
 'DN6  7EX',
 'y S9 1LH',
 'CW 15NG',
 'd S3 8AG',
 'd S9 1TR',
 'unthorpe',
 'LS27 OJA',
 'ersfield',
 'heshire',
 'heshire',
 'unthorpe',
 'ank Lydd',
 'nemouth',
 'yldesley',
 nan,
 'Rutland',
 'd S9 1TR',
 'CW 15NG',
 nan,
 'LS27 OJA',
 'DN6  7EX',
 'edbridge',
 'W 18 4JX',
 'zard LU7',
 'y S9 1LH',
 'd S3 8AG',
 'ersfield',
 nan,
 'edbridge',
 'W 18 4JX',
 'Rutland',
 nan,
 'zard LU7',
 'ank Lydd',
 'yldesley',
 'nemouth',
 'DN6  7EX',
 '

['SO41 8DF',
 'PO30 2PD',
 'PO18 0NB',
 'EX6 7XD',
 'BA12 0LT',
 'BH21 3QZ',
 'BA2 8PU',
 'WS9 9PE',
 'WS9 0NF',
 'LE9 3LE',
 'DN10 6BP',
 'WS14 0BD',
 'TF1 5RY',
 'WA3 7BW',
 'WN8 7ND',
 'SR6 7NG',
 'DH5 8AJ',
 'BD13 5DE',
 'LS24 9LY',
 'DL17 9EB',
 'RM18 8PH',
 'LN4 2JA',
 'NN16 9UX',
 'NN14 4NJ',
 'Dn21 4JH',
 'NN7 4EJ',
 'NN6 8AA',
 'LE17 6JH',
 'PE9 3BZ',
 'MK17 8BU',
 'CB6 2AY',
 'MK19 1RN',
 'CB11 3SL',
 'MK17 8BU',
 'LU7 4RU',
 'MK17 8BU',
 'HP5 1UW',
 'edbridge',
 'ow Essex',
 'RM13 9EW',
 'SG9 0BU',
 'UB7 0JG',
 'AL4 0PG',
 'EN11 8LS',
 'r Surrey',
 'SL2 3SD',
 'RH3 7HB',
 'SN7 8HE',
 'OX10 6PJ',
 'SL7 3SB',
 'RG16 9DR',
 'WF6 2JD',
 'CB6 2PZ',
 'CB8 7QX',
 'LA15 8BG',
 'WN8 0QE',
 'HU15 2BE',
 'SO51 0NU',
 'HX4 0DY',
 'SL3 0LP',
 'B78 3DH',
 'OX5 3EL',
 'NN17 4AP',
 'RM13 9GF',
 'DN14 0RL',
 'LU7 0AP',
 'DH8 9HQ',
 'TQ4 7PD',
 'RG30 3XA',
 'HR8 1PB',
 'TW17 0NF',
 'MK18 4AJ',
 'CV33 9QJ',
 'B61 0RF',
 'SG17 5PJ',
 'CB6 2AY',
 'CM9 6TJ',
 'DY6 8WT',
 'DN9 3AN',
 'WV10 7LL',
 

In [107]:
df[~(df["remaining_capacity"].isin(["Closed", 0])) & (df["year"]==2024)]


Unnamed: 0,epr,legacy_permit,operator_name,facility_name,facility_address,ea_area,region,sub_region,local_authority,site_type,remaining_capacity,year
4721,AB3009HS,400345,Sewells Reservoir Construction Limited,Highwood Quarry Inert Landfill,"Highwood Quarry, Little Easton, Great Dunmow,...",Herts and North London,East of England,Essex,Uttlesford,L05 - Inert Landfill,724735,2024
4722,AB3100UQ,40059,J P E ( Holdings ) Limited,Shire Oak Quarry,"Shire Oak Quarry, Chester Road, Shire Oak, Wa...",West Midlands,West Midlands,Staffordshire,Lichfield,L05 - Inert Landfill,726575,2024
4723,AB3105HD,400391,Ingrebourne Valley Ltd,Denham Park Farm,"Denham Park Farm, Denham Green, Buckinghamshi...",Herts and North London,South East,Buckinghamshire,Buckinghamshire,L05 - Inert Landfill,130854,2024
4725,AB3305MU,400532,Armstrongs Aggregates Limited,Pilkington Quarry,"Pilkington Quarry, Makinson Lane, Horwich, Bo...",Gtr Mancs Mersey and Ches,North West,Greater Manchester,Bolton,L05 - Inert Landfill,648543,2024
4727,AB3402KQ,400588,Hills Quarry Products Limited,Upwood Quarry,"Upwood Quarry, Besselsleigh, Abingdon, Oxford...",Thames,South East,Oxfordshire,Vale of White Horse,L05 - Inert Landfill,210746,2024
...,...,...,...,...,...,...,...,...,...,...,...,...
5254,ZP3232SF,ZP3232SF,Biffa Waste Services Ltd,Hartlebury Landfill Site,"Whitlenge Lane, Hartlebury DY10 4HB",West Midlands,West Midlands,Worcestershire,Wychavon,L02 - Non Hazardous Landfill With SNRHW cell,475913,2024
5256,ZP3497EW,102911,Allen Newport Limited,Marston's Pit,Marstons Pit Cavenham Road Tuddenham Bury St. ...,East Anglia,East of England,Suffolk,West Suffolk,L05 - Inert Landfill,2641287,2024
5258,ZP3606BG,ZP3606BG,British Steel Limited,Crosby North Landfill,"Daws Lane, Scunthorpe",East Midlands,Yorkshire & the Humber,Former Humberside,North Lincolnshire,L04 - Non Hazardous,1641301,2024
5260,ZP3835JD,406424,Escrick Environmental Services Limited,Escrick Soil Landfill Site,"Escrick Environmental Services Ltd, The Old B...",Yorkshire,Yorkshire & the Humber,North Yorkshire,North Yorkshire,L05 - Inert Landfill,428521,2024
