In [5]:
from enhance_ocod.get_data import download_latest_onspd, get_voa_file_list, VOARatingListDownloader

In [6]:
files = get_voa_file_list(name_contains="listentries")
baseline_files = [f for f in files if "baseline" in f['name']]
latest_baseline = baseline_files[-1]



Found 2745 files available for download
Filtered to 1426 files


In [7]:
latest_baseline

{'name': 'uk-englandwales-ndr-2023-listentries-compiled-epoch-0015-baseline-csv.zip',
 'last_modified': 'Wed, 04 Jun 2025 05:10:37 GMT',
 'size': 104402628,
 'content_type': 'application/x-zip-compressed',
 'url': 'https://voaratinglists.blob.core.windows.net/downloads/uk-englandwales-ndr-2023-listentries-compiled-epoch-0015-baseline-csv.zip'}

In [None]:
# Download it
downloader = VOARatingListDownloader()
downloader.download_files(str(VOA_DIR),name_contains = latest_baseline['name'] , confirm = False)

In [1]:
# Test the direct download speed
import requests
import time

url = "https://www.arcgis.com/sharing/rest/content/items/3be72478d8454b59bb86ba97b4ee325b/data"

print("Testing direct download speed...")
start_time = time.time()

response = requests.get(url, stream=True)
print(f"Headers received in {time.time() - start_time:.2f}s")
print(f"Content-Length: {response.headers.get('content-length')} bytes")

# Download first 10MB to test speed
downloaded = 0
chunk_start = time.time()
for chunk in response.iter_content(chunk_size=8192):
    downloaded += len(chunk)
    if downloaded >= 10 * 1024 * 1024:  # 10MB
        break

elapsed = time.time() - chunk_start
speed_mbps = (downloaded / (1024*1024)) / elapsed
print(f"Speed for first 10MB: {speed_mbps:.2f} MB/s")

response.close()

Testing direct download speed...
Headers received in 0.31s
Content-Length: 245482583 bytes
Speed for first 10MB: 31.68 MB/s


In [6]:
import requests
import json
from pprint import pprint

# Let's explore the ArcGIS endpoint you mentioned
base_endpoint = "https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/"

def explore_services():
    """Explore what services are available"""
    url = base_endpoint
    params = {'f': 'json'}
    
    response = requests.get(url, params=params)
    if response.status_code == 200:
        data = response.json()
        print("Available services:")
        print("=" * 50)
        pprint(data)
    else:
        print(f"Error: {response.status_code}")

def search_for_onspd_general():
    """Try different search approaches"""
    
    # Approach 1: General ArcGIS search
    search_urls = [
        "https://www.arcgis.com/sharing/rest/search",
        "https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/search"
    ]
    
    search_terms = [
        'ONSPD',
        'postcode',
        'directory',
        'ONSPD postcode',
        'postcode directory',
        'ONS postcode'
    ]
    
    for base_url in search_urls:
        print(f"\n{'='*60}")
        print(f"Searching: {base_url}")
        print(f"{'='*60}")
        
        for term in search_terms:
            print(f"\nSearching for: '{term}'")
            print("-" * 30)
            
            params = {
                'q': term,
                'f': 'json',
                'num': 5,
                'sortField': 'modified',
                'sortOrder': 'desc'
            }
            
            try:
                response = requests.get(base_url, params=params)
                if response.status_code == 200:
                    data = response.json()
                    results = data.get('results', [])
                    
                    if results:
                        for i, item in enumerate(results):
                            print(f"{i+1}. {item.get('title', 'No title')}")
                            print(f"   ID: {item.get('id', 'No ID')}")
                            print(f"   Owner: {item.get('owner', 'No owner')}")
                            print(f"   Type: {item.get('type', 'No type')}")
                            if 'snippet' in item:
                                print(f"   Description: {item['snippet'][:100]}...")
                            print()
                    else:
                        print("No results found")
                else:
                    print(f"Error: {response.status_code}")
            except Exception as e:
                print(f"Error: {e}")

def explore_specific_service():
    """Let's see what's in the services endpoint you mentioned"""
    
    # First, let's see what services are available
    url = "https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services"
    params = {'f': 'json'}
    
    try:
        response = requests.get(url, params=params)
        if response.status_code == 200:
            data = response.json()
            print("Services available:")
            print("=" * 50)
            
            # Look for services
            services = data.get('services', [])
            folders = data.get('folders', [])
            
            print(f"Found {len(services)} services and {len(folders)} folders")
            print()
            
            for service in services:
                name = service.get('name', 'Unknown')
                service_type = service.get('type', 'Unknown')
                print(f"Service: {name} (Type: {service_type})")
                
                # If it looks like it might be ONSPD-related, explore further
                if any(term.lower() in name.lower() for term in ['onspd', 'postcode', 'directory']):
                    print(f"  *** Potentially relevant! ***")
                    explore_service_details(name, service_type)
            
            print("\nFolders:")
            for folder in folders:
                print(f"Folder: {folder}")
                explore_folder(folder)
                
        else:
            print(f"Error accessing services: {response.status_code}")
            print(response.text)
    except Exception as e:
        print(f"Error: {e}")

def explore_service_details(service_name, service_type):
    """Explore details of a specific service"""
    url = f"https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/{service_name}/{service_type}"
    params = {'f': 'json'}
    
    try:
        response = requests.get(url, params=params)
        if response.status_code == 200:
            data = response.json()
            print(f"    Service details for {service_name}:")
            print(f"    Description: {data.get('description', 'No description')}")
            print(f"    Layers: {len(data.get('layers', []))}")
            
            # Look for layers
            layers = data.get('layers', [])
            for layer in layers:
                layer_name = layer.get('name', 'Unknown')
                layer_id = layer.get('id', 'Unknown')
                print(f"      Layer: {layer_name} (ID: {layer_id})")
        else:
            print(f"    Error accessing service details: {response.status_code}")
    except Exception as e:
        print(f"    Error exploring service: {e}")

def explore_folder(folder_name):
    """Explore contents of a folder"""
    url = f"https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/{folder_name}"
    params = {'f': 'json'}
    
    try:
        response = requests.get(url, params=params)
        if response.status_code == 200:
            data = response.json()
            services = data.get('services', [])
            print(f"  Folder '{folder_name}' contains {len(services)} services:")
            
            for service in services:
                name = service.get('name', 'Unknown')
                service_type = service.get('type', 'Unknown')
                print(f"    {name} ({service_type})")
                
                # Check if potentially relevant
                if any(term.lower() in name.lower() for term in ['onspd', 'postcode', 'directory']):
                    print(f"      *** Potentially relevant! ***")
        else:
            print(f"  Error accessing folder: {response.status_code}")
    except Exception as e:
        print(f"  Error exploring folder: {e}")

def try_direct_onspd_search():
    """Try to find ONSPD by looking at ONS-specific patterns"""
    
    # Let's try some ONS-specific searches
    ons_patterns = [
        "ONS Postcode Directory",
        "Ordnance Survey National Statistics Postcode Directory", 
        "National Statistics Postcode Directory",
        "Postcode Directory"
    ]
    
    print("Trying ONS-specific search patterns:")
    print("=" * 50)
    
    for pattern in ons_patterns:
        print(f"\nSearching for: '{pattern}'")
        
        # Try the main ArcGIS search
        url = "https://www.arcgis.com/sharing/rest/search"
        params = {
            'q': pattern,
            'f': 'json',
            'num': 10
        }
        
        try:
            response = requests.get(url, params=params)
            if response.status_code == 200:
                data = response.json()
                results = data.get('results', [])
                
                for result in results:
                    title = result.get('title', 'No title')
                    owner = result.get('owner', 'No owner')
                    item_type = result.get('type', 'No type')
                    
                    print(f"  Found: {title}")
                    print(f"    Owner: {owner}")
                    print(f"    Type: {item_type}")
                    print(f"    ID: {result.get('id', 'No ID')}")
                    print()
            else:
                print(f"  Error: {response.status_code}")
        except Exception as e:
            print(f"  Error: {e}")


print("ONSPD Dataset Exploration")
print("=" * 60)
    


ONSPD Dataset Exploration


In [7]:
print("\n1. Exploring the specific services endpoint...")
explore_specific_service()
    


1. Exploring the specific services endpoint...
Services available:
Found 3666 services and 0 folders

Service: ACTY_DEC_1921_EW_BGC (Type: FeatureServer)
Service: ADCTY_JUN_1921_EW_BGC (Type: FeatureServer)
Service: ADCTYCB_JUN_1921_EW_BGC (Type: FeatureServer)
Service: Administrative_Counties_1971_Boundaries_EW (Type: FeatureServer)
Service: Administrative_Districts_1971_Boundaries_EW (Type: FeatureServer)
Service: Age_16_24_TTWA (Type: FeatureServer)
Service: Age_25_34_TTWA (Type: FeatureServer)
Service: Age_35_49_TTWA (Type: FeatureServer)
Service: Age_50_64_TTWA (Type: FeatureServer)
Service: Age_65_74_TTWA (Type: FeatureServer)
Service: Age_75_plus_TTWA (Type: FeatureServer)
Service: Broadband_coverage (Type: FeatureServer)
Service: BUA_2011_EW_BGG (Type: FeatureServer)
Service: BUA_2022_GB (Type: FeatureServer)
Service: BUA_APR_2024_EW_NC (Type: FeatureServer)
Service: BUA_DEC_2022_EW_NC (Type: FeatureServer)
Service: BUA_MAR_2011_EW_NC_0dfbd108e33640d290c55d9639fa6a26 (Type: Fe

In [8]:
print("\n\n2. Trying general ONSPD searches...")
search_for_onspd_general()




2. Trying general ONSPD searches...

Searching: https://www.arcgis.com/sharing/rest/search

Searching for: 'ONSPD'
------------------------------
1. ONSPD Map Online V2
   ID: e911cc07e1a54b24b7cddcfa6fc7e249
   Owner: ONSGeography_data
   Type: Web Map
   Description: ...

2. ONSPD Online Latest Centroids
   ID: ed28dda8b75146288d7d08d56d2290a0
   Owner: ONSGeography_data
   Type: Feature Service
   Description: Postcode Latest Centroids...

3. Online ONS Postcode Directory (Live)
   ID: 2ced9a3a2462432a92c31226e3cd3aa5
   Owner: ONSGeography_data
   Type: Feature Service
   Description: Postcode Latest Centroids...

4. ONS Postcode Directory (May 2025) for the UK
   ID: 3be72478d8454b59bb86ba97b4ee325b
   Owner: ONSGeography_data
   Type: CSV Collection
   Description: Postcode Products...

5. ONSPD_Online_Latest_Centroids
   ID: bc07a97ee36c4587bdbefc7098df8bb1
   Owner: ONSGeography_data
   Type: Map Service
   Description: Postcode Latest Centroids...


Searching for: 'postcode'

In [9]:
    
print("\n\n3. Trying ONS-specific patterns...")
try_direct_onspd_search()



3. Trying ONS-specific patterns...
Trying ONS-specific search patterns:

Searching for: 'ONS Postcode Directory'
  Found: ONS Postcode Directory (February 2024) for the UK
    Owner: ONSGeography_data
    Type: CSV Collection
    ID: e14b1475ecf74b58804cf667b6740706

  Found: ONS Postcode Directory (November 2023) for the UK
    Owner: ONSGeography_data
    Type: CSV Collection
    ID: 3700342d3d184b0d92eae99a78d9c7a3

  Found: ONS Postcode Directory (November 2024) for the UK
    Owner: ONSGeography_data
    Type: CSV Collection
    ID: b54177d3d7264cd6ad89e74dd9c1391d

  Found: ONS Postcode Directory (August 2023) for the UK
    Owner: ONSGeography_data
    Type: CSV Collection
    ID: 487a5ba62c8b4da08f01eb3c08e304f6

  Found: ONS Postcode Directory (February 2023) for the UK (V2)
    Owner: ONSGeography_data
    Type: CSV Collection
    ID: a2f8c9c5778a452bbf640d98c166657c

  Found: ONS Postcode Directory (May 2024) for the UK
    Owner: ONSGeography_data
    Type: CSV Collection

In [10]:
import requests
import json
from pprint import pprint

def explore_onspd_item(item_id="3be72478d8454b59bb86ba97b4ee325b"):
    """Explore the specific ONSPD item we found"""
    
    print(f"Exploring ONSPD item: {item_id}")
    print("=" * 60)
    
    # Get detailed information about this item
    url = f"https://www.arcgis.com/sharing/rest/content/items/{item_id}"
    params = {'f': 'json'}
    
    try:
        response = requests.get(url, params=params)
        if response.status_code == 200:
            data = response.json()
            print("Item Details:")
            print("-" * 30)
            print(f"Title: {data.get('title', 'Unknown')}")
            print(f"Type: {data.get('type', 'Unknown')}")
            print(f"Owner: {data.get('owner', 'Unknown')}")
            print(f"Created: {data.get('created', 'Unknown')}")
            print(f"Modified: {data.get('modified', 'Unknown')}")
            print(f"Size: {data.get('size', 'Unknown')} bytes")
            print(f"Description: {data.get('description', 'No description')}")
            print(f"Snippet: {data.get('snippet', 'No snippet')}")
            print(f"Tags: {data.get('tags', [])}")
            
            # Look for download-related information
            print(f"\nAccess Information: {data.get('accessInformation', 'None')}")
            print(f"License Info: {data.get('licenseInfo', 'None')}")
            
            print(f"\nFull item data:")
            pprint(data)
            
        else:
            print(f"Error getting item details: {response.status_code}")
            print(response.text)
    except Exception as e:
        print(f"Error: {e}")

def try_download_onspd_item(item_id="3be72478d8454b59bb86ba97b4ee325b"):
    """Try to download the ONSPD item"""
    
    print(f"\nTrying to download ONSPD item: {item_id}")
    print("=" * 60)
    
    # Try the direct download URL pattern
    download_urls = [
        f"https://www.arcgis.com/sharing/rest/content/items/{item_id}/data",
        f"https://www.arcgis.com/sharing/rest/content/items/{item_id}/resources",
        f"https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/{item_id}",
    ]
    
    for url in download_urls:
        print(f"\nTrying: {url}")
        try:
            response = requests.get(url, params={'f': 'json'})
            print(f"Status: {response.status_code}")
            print(f"Content-Type: {response.headers.get('content-type', 'Unknown')}")
            print(f"Content-Length: {response.headers.get('content-length', 'Unknown')}")
            
            if response.status_code == 200:
                # If it's JSON, show the content
                if 'application/json' in response.headers.get('content-type', ''):
                    try:
                        data = response.json()
                        print("JSON Response:")
                        pprint(data)
                    except:
                        print("Response content (first 500 chars):")
                        print(response.text[:500])
                else:
                    print("Non-JSON response - might be the actual file!")
                    print(f"First 100 bytes: {response.content[:100]}")
            else:
                print(f"Error response: {response.text[:200]}")
                
        except Exception as e:
            print(f"Error: {e}")

def explore_all_onspd_items():
    """Look at all the ONSPD items we found"""
    
    onspd_items = [
        ("ONSPD Map Online V2", "e911cc07e1a54b24b7cddcfa6fc7e249", "Web Map"),
        ("ONSPD Online Latest Centroids", "ed28dda8b75146288d7d08d56d2290a0", "Feature Service"),
        ("Online ONS Postcode Directory (Live)", "2ced9a3a2462432a92c31226e3cd3aa5", "Feature Service"),
        ("ONS Postcode Directory (May 2025) for the UK", "3be72478d8454b59bb86ba97b4ee325b", "CSV Collection"),
        ("ONSPD_Online_Latest_Centroids", "bc07a97ee36c4587bdbefc7098df8bb1", "Map Service"),
    ]
    
    print("Exploring all ONSPD items:")
    print("=" * 60)
    
    for title, item_id, item_type in onspd_items:
        print(f"\n{title} ({item_type})")
        print(f"ID: {item_id}")
        print("-" * 40)
        
        # Get basic info
        url = f"https://www.arcgis.com/sharing/rest/content/items/{item_id}"
        params = {'f': 'json'}
        
        try:
            response = requests.get(url, params=params)
            if response.status_code == 200:
                data = response.json()
                
                print(f"Size: {data.get('size', 'Unknown')} bytes")
                if data.get('size'):
                    size_mb = round(data.get('size') / (1024*1024), 2)
                    print(f"Size (MB): {size_mb}")
                
                # Convert timestamp
                if data.get('modified'):
                    from datetime import datetime
                    mod_date = datetime.fromtimestamp(data.get('modified')/1000)
                    print(f"Modified: {mod_date.strftime('%Y-%m-%d %H:%M')}")
                
                print(f"Description: {data.get('description', 'No description')[:100]}...")
                
                # Try to get download URL
                download_url = f"https://www.arcgis.com/sharing/rest/content/items/{item_id}/data"
                print(f"Download URL: {download_url}")
                
            else:
                print(f"Error: {response.status_code}")
                
        except Exception as e:
            print(f"Error: {e}")



In [11]:

# First, look at all ONSPD items
explore_all_onspd_items()

print("\n" + "="*80)


Exploring all ONSPD items:

ONSPD Map Online V2 (Web Map)
ID: e911cc07e1a54b24b7cddcfa6fc7e249
----------------------------------------
Size: 9515 bytes
Size (MB): 0.01
Modified: 2025-07-17 11:58
Description: ...
Download URL: https://www.arcgis.com/sharing/rest/content/items/e911cc07e1a54b24b7cddcfa6fc7e249/data

ONSPD Online Latest Centroids (Feature Service)
ID: ed28dda8b75146288d7d08d56d2290a0
----------------------------------------
Size: 5155487744 bytes
Size (MB): 4916.66
Modified: 2025-07-17 10:47
Description: <p><span style='font-size:12.0pt; line-height:107%; font-family:&quot;Arial&quot;,sans-serif;'>This ...
Download URL: https://www.arcgis.com/sharing/rest/content/items/ed28dda8b75146288d7d08d56d2290a0/data

Online ONS Postcode Directory (Live) (Feature Service)
ID: 2ced9a3a2462432a92c31226e3cd3aa5
----------------------------------------
Size: 1409662976 bytes
Size (MB): 1344.36
Modified: 2025-07-17 10:45
Description: <p><span style='font-size:12.0pt; font-family:&quot;Ar

In [12]:

    # Focus on the CSV Collection item
    explore_onspd_item()

    print("\n" + "="*80)

    # Try to download it
    try_download_onspd_item()

Exploring ONSPD item: 3be72478d8454b59bb86ba97b4ee325b
Item Details:
------------------------------
Title: ONS Postcode Directory (May 2025) for the UK
Type: CSV Collection
Owner: ONSGeography_data
Created: 1748593901000
Modified: 1750143647000
Size: 245482583 bytes
Description: <span style='font-family:&quot;Avenir Next W01&quot;, &quot;Avenir Next W00&quot;, &quot;Avenir Next&quot;, Avenir, &quot;Helvetica Neue&quot;, sans-serif; font-size:16px;'>This is the ONS Postcode Directory (ONSPD) for the United Kingdom as at May 2025 in Comma Separated Variable (CSV) and ASCII text (TXT) formats. This file contains the multi CSVs so that postcode areas can be opened in MS Excel. To download the zip file click the Download button. The ONSPD relates both current and terminated postcodes in the United Kingdom to a range of current statutory administrative, electoral, health and other area geographies. It also links postcodes to 2001 Census Output Areas (OA) and Super Output Areas (SOA) for Engl