# Planning application scraper - example implementation 

Simple example of scraping planning applications from London borough councils.

In [1]:
import pandas as pd
from planning_scraper import (
    get_postcode_page,
    scrape_app_details,
    process_address_dataframe,
    extract_postcode
)

## Define search parameters

In [2]:
# Example postcodes
examples = {
    'newham': 'E13 0AG',
    'southwark': 'SE5 8UH',
    'lambeth': 'SW9 6DE'
}

# Select council and postcode
council = "newham"
postcode = examples[council]

print(f"Searching {council.upper()}: {postcode}")

Searching NEWHAM: E13 0AG


## Search for applications

In [3]:
# Get all URLs for postcode
urls = get_postcode_page(council, postcode, os_type="mac")

print(f"Found {len(urls)} applications")
print(f"\nFirst 5 URLs:")
for i, url in enumerate(urls[:5], 1):
    print(f"{i}. {url}")

Searched for postcode: E13 0AG
Collected 10 links (total: 10)
Collected 10 links (total: 20)
Collected 1 links (total: 21)
Found 21 applications

First 5 URLs:
1. https://pa.newham.gov.uk/online-applications/applicationDetails.do?keyVal=RUWLR0JY5W100&activeTab=summary
2. https://pa.newham.gov.uk/online-applications/applicationDetails.do?keyVal=PG2H3PJYFW400&activeTab=summary
3. https://pa.newham.gov.uk/online-applications/applicationDetails.do?keyVal=P5LC4HJY52R00&activeTab=summary
4. https://pa.newham.gov.uk/online-applications/applicationDetails.do?keyVal=O6WQ4PJY0AL00&activeTab=summary
5. https://pa.newham.gov.uk/online-applications/applicationDetails.do?keyVal=NKSHIJJYJX000&activeTab=summary


## Scrape application details

In [4]:
# Scrape first 5 applications (change to scrape all)
urls_to_scrape = urls[:5]

print(f"Scraping {len(urls_to_scrape)} applications...\n")

data = scrape_app_details(urls_to_scrape, os_type="mac", max_retries=3)

print(f"\n Scraping complete!")

Scraping 5 applications...

Scraping URL 1 of 5: https://pa.newham.gov.uk/online-applications/applicationDetails.do?keyVal=RUWLR0JY5W100&activeTab=summary
  Attempt 1/3
  Scraped main page for 23/01098/FUL
  Scraped further info page for 23/01098/FUL
Scraping URL 2 of 5: https://pa.newham.gov.uk/online-applications/applicationDetails.do?keyVal=PG2H3PJYFW400&activeTab=summary
  Attempt 1/3
  Scraped main page for 18/02826/CLE
  Scraped further info page for 18/02826/CLE
Scraping URL 3 of 5: https://pa.newham.gov.uk/online-applications/applicationDetails.do?keyVal=P5LC4HJY52R00&activeTab=summary
  Attempt 1/3
  Scraped main page for 18/00696/LA3
  Scraped further info page for 18/00696/LA3
Scraping URL 4 of 5: https://pa.newham.gov.uk/online-applications/applicationDetails.do?keyVal=O6WQ4PJY0AL00&activeTab=summary
  Attempt 1/3
  Scraped main page for 16/01386/CLP
  Scraped further info page for 16/01386/CLP
Scraping URL 5 of 5: https://pa.newham.gov.uk/online-applications/applicationDet

## Create dataframe of the results 

In [5]:
# Convert to DataFrame
df = pd.DataFrame(data)

print(f"Created DataFrame with {len(df)} rows\n")
df.head()

Created DataFrame with 5 rows



Unnamed: 0,reference,url,date_validated,address,description,decision,decision_date,app_type,actual_decision_level,expected_decision_level
0,23/01098/FUL,https://pa.newham.gov.uk/online-applications/a...,Fri 16 Jun 2023,Curwen Primary School Atlas Road Plaistow Lond...,Proposed erection of a single storey timber ga...,Approve,Fri 25 Aug 2023,Full Planning Permission,Delegated decision,Not Available
1,18/02826/CLE,https://pa.newham.gov.uk/online-applications/a...,Thu 04 Oct 2018,11 Atlas Road Plaistow London E13 0AG,Certificate of lawfulness for the existing use...,Approve,Thu 29 Nov 2018,Certificate of Lawfulness Existing Use,Delegated decision,Not Available
2,18/00696/LA3,https://pa.newham.gov.uk/online-applications/a...,Wed 14 Mar 2018,Curwen Primary School Atlas Road Plaistow Lond...,Installation of external canopy to match canop...,Approve,Tue 17 Apr 2018,Local Authority Regulation 3 Application,Committee decision,Not Available
3,16/01386/CLP,https://pa.newham.gov.uk/online-applications/a...,Tue 24 May 2016,5 Atlas Road Plaistow London E13 0AG,Proposed single storey rear extension,Refuse,Tue 19 Jul 2016,Certificate of Lawfulness Proposed Use,Delegated decision,Not Available
4,15/00593/AOD,https://pa.newham.gov.uk/online-applications/a...,Mon 16 Mar 2015,Curwen Junior And Infants School Atlas Road Pl...,Approval of details pursuant to Condition 9 (D...,Approve,Fri 10 Jul 2015,Approval of Details,Approval of Details,Not Available


## Process addresses

In [6]:
# Extract postcodes and clean addresses
df = process_address_dataframe(df)

# Show processed data
df[['reference', 'address', 'postcode', 'cleaned_address']].head()

Processing 5 addresses...
Extracted 5/5 valid postcodes


Unnamed: 0,reference,address,postcode,cleaned_address
0,23/01098/FUL,Curwen Primary School Atlas Road Plaistow Lond...,E13 0AG,Curwen Primary School Atlas Road Plaistow Lond...
1,18/02826/CLE,11 Atlas Road Plaistow London E13 0AG,E13 0AG,11 Atlas Road Plaistow London E13 0AG
2,18/00696/LA3,Curwen Primary School Atlas Road Plaistow Lond...,E13 0AG,Curwen Primary School Atlas Road Plaistow Lond...
3,16/01386/CLP,5 Atlas Road Plaistow London E13 0AG,E13 0AG,5 Atlas Road Plaistow London E13 0AG
4,15/00593/AOD,Curwen Junior And Infants School Atlas Road Pl...,E13 0AG,Curwen Junior And Infants School Atlas Road Pl...


## Summary statistics

In [7]:
print("Summary Statistics")
print("="*50)
print(f"Total applications: {len(df)}")
print(f"Valid postcodes: {df['postcode'].notna().sum()}")
print(f"\nDecision breakdown:")
print(df['decision'].value_counts())
print(f"\nTop postcodes:")
print(df['postcode'].value_counts().head())

Summary Statistics
Total applications: 5
Valid postcodes: 5

Decision breakdown:
decision
Approve    4
Refuse     1
Name: count, dtype: int64

Top postcodes:
postcode
E13 0AG    5
Name: count, dtype: int64


## Save results to a .csv

In [8]:
from datetime import datetime

# Save to CSV
output_file = f"data/output/{council}_applications_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
df.to_csv(output_file, index=False)

print(f"Saved {len(df)} applications to: {output_file}")

Saved 5 applications to: data/output/newham_applications_20260219_164811.csv


## Example address extraction 

Using 'geolocator.py' to extract individual postcode

In [9]:
# Example address
sample_address = "123 Main Street, Westminster, London SW1A 1AA"

# Extract postcode
extracted = extract_postcode(sample_address)

print(f"Address: {sample_address}")
print(f"Postcode: {extracted}")

Address: 123 Main Street, Westminster, London SW1A 1AA
Postcode: SW1A 1AA
