## Imports

In [3]:
import geopandas as gpd
from shapely.geometry import Point
import random
import pandas as pd

## Urban Areas GeoJSON file Read

In [4]:
urban_areas = gpd.read_file("../data/Urban_Areas_National_Statistical_Boundaries_2022_Ungeneralised_View_6867301564302593317.geojson")

In [5]:
# Quick look at available columns
print(urban_areas.columns)

Index(['OBJECTID', 'URBAN_AREA_GUID', 'URBAN_AREA_CODE', 'URBAN_AREA_NAME',
       'COUNTY', 'Centroid_x', 'Centroid_y', 'geometry'],
      dtype='object')


In [8]:
urban_areas.head()

Unnamed: 0,OBJECTID,URBAN_AREA_GUID,URBAN_AREA_CODE,URBAN_AREA_NAME,COUNTY,Centroid_x,Centroid_y,geometry
0,1,0138fb4f-2ab8-4035-8287-7f13bb3b3de6,27295,Bearna,Galway,522593.204561,723181.979819,"POLYGON ((-9.17177 53.26583, -9.1716 53.26553,..."
1,2,0139a442-0f36-46dc-b383-2a34d4d3f6f7,10019,Ardee,Louth,696152.155347,790695.313556,"POLYGON ((-6.52932 53.86812, -6.52927 53.86794..."
2,3,020a2786-0521-445e-bf50-e3a24632bab1,28125,Ballinamore,Leitrim,613071.60711,811532.546151,"POLYGON ((-7.7949 54.05975, -7.79486 54.05971,..."
3,4,0261d090-30b7-406d-9f7a-e69a0c6e6239,11408,Ratoath,Meath,701909.103785,751650.842556,"POLYGON ((-6.45573 53.51625, -6.45546 53.51616..."
4,5,0265ada6-7078-45da-afb0-ac001ca276b1,16544,O'Briensbridge-Montpelier,Clare,566174.956058,666640.253476,"POLYGON ((-8.49436 52.75419, -8.49428 52.75418..."


In [11]:
urban_areas['URBAN_AREA_NAME']

0

## Considering Major Cities
> Grabbing **5 major cities** for now (Simplicity's sake)
- Dublin city and suburbs
- Cork city and suburbs
- Limerick city and suburbs
- Galway city and suburbs
- Waterford city and suburbs

In [12]:
major_cities = [
    'Dublin city and suburbs',
    'Cork city and suburbs',
    'Limerick city and suburbs',
    'Galway city and suburbs',
    'Waterford city and suburbs'
]

urban_areas = urban_areas[urban_areas['URBAN_AREA_NAME'].isin(major_cities)]

city_weights = {
    'Dublin city and suburbs': 0.40,
    'Cork city and suburbs': 0.15,
    'Limerick city and suburbs': 0.10,
    'Galway city and suburbs': 0.10,
    'Waterford city and suburbs': 0.05
}

urban_areas['weight'] = urban_areas['URBAN_AREA_NAME'].map(city_weights).fillna(0.01)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


## Grabbing random locations using population as weights

> Using population to dictate the number of locations to grab from each city

In [13]:
# Helper function to sample point inside polygon
def sample_point_within(polygon):
    minx, miny, maxx, maxy = polygon.bounds
    while True:
        p = Point(random.uniform(minx, maxx), random.uniform(miny, maxy))
        if polygon.contains(p):
            return p


In [16]:
# Generate delivery points
def generate_delivery_points(n=1000):
    samples = []
    weights = urban_areas['weight'].values
    normalized_weights = weights / weights.sum()

    for _ in range(n):
        region = urban_areas.sample(weights=normalized_weights).iloc[0]
        city = region['URBAN_AREA_NAME']
        polygon = region.geometry
        point = sample_point_within(polygon)
        samples.append({
            'city': city,
            'lat': point.y,
            'lon': point.x
        })
    return pd.DataFrame(samples)

In [17]:
# Generate 1000 delivery points
df_points = generate_delivery_points(1000)

# Save to CSV (optional)
df_points.to_csv("smart_delivery_points.csv", index=False)

# STEP 1: Randomly Pair Points (Origin ↔ Destination)

In [18]:
import pandas as pd

# Load your generated points
df_points = pd.read_csv("smart_delivery_points.csv")

# Randomly shuffled origins and destinations
df_origins = df_points.sample(frac=1).reset_index(drop=True)
df_destinations = df_points.sample(frac=1).reset_index(drop=True)

# Combine into trips
df_trips = pd.DataFrame({
    "origin_city": df_origins["city"],
    "origin_lat": df_origins["lat"],
    "origin_lon": df_origins["lon"],
    "dest_city": df_destinations["city"],
    "dest_lat": df_destinations["lat"],
    "dest_lon": df_destinations["lon"]
})

df_trips.head()

Unnamed: 0,origin_city,origin_lat,origin_lon,dest_city,dest_lat,dest_lon
0,Dublin city and suburbs,53.276699,-6.150429,Dublin city and suburbs,53.260513,-6.129931
1,Dublin city and suburbs,53.396304,-6.299107,Limerick city and suburbs,52.666091,-8.684511
2,Galway city and suburbs,53.261758,-9.100766,Cork city and suburbs,51.92754,-8.529792
3,Dublin city and suburbs,53.321108,-6.222912,Dublin city and suburbs,53.378718,-6.163126
4,Waterford city and suburbs,52.236297,-7.034926,Galway city and suburbs,53.257949,-9.092321


# Route Each Trip with OSRM

In [19]:
import requests
import time

def get_osrm_route(origin_lat, origin_lon, dest_lat, dest_lon):
    url = f"http://localhost:5000/route/v1/driving/{origin_lon},{origin_lat};{dest_lon},{dest_lat}?overview=false"
    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
        route = data['routes'][0]
        return route['duration'], route['distance']
    except Exception as e:
        print(f"Routing failed: {e}")
        return None, None

# Add OSRM data to each trip
durations = []
distances = []

for i, row in df_trips.iterrows():
    dur, dist = get_osrm_route(
        row["origin_lat"], row["origin_lon"],
        row["dest_lat"], row["dest_lon"]
    )
    durations.append(dur)
    distances.append(dist)
    
    if i % 100 == 0:
        print(f"Routed {i} trips...")
        time.sleep(0.1)  # Prevent spamming OSRM

df_trips["duration_sec"] = durations
df_trips["distance_m"] = distances


Routed 0 trips...
Routed 100 trips...
Routed 200 trips...
Routed 300 trips...
Routed 400 trips...
Routed 500 trips...
Routed 600 trips...
Routed 700 trips...
Routed 800 trips...
Routed 900 trips...


## Let's check the routes and possible failures

In [20]:
df_trips.head()

Unnamed: 0,origin_city,origin_lat,origin_lon,dest_city,dest_lat,dest_lon,duration_sec,distance_m
0,Dublin city and suburbs,53.276699,-6.150429,Dublin city and suburbs,53.260513,-6.129931,256.2,2513.5
1,Dublin city and suburbs,53.396304,-6.299107,Limerick city and suburbs,52.666091,-8.684511,8623.8,210995.4
2,Galway city and suburbs,53.261758,-9.100766,Cork city and suburbs,51.92754,-8.529792,9313.3,195700.6
3,Dublin city and suburbs,53.321108,-6.222912,Dublin city and suburbs,53.378718,-6.163126,1195.7,11757.9
4,Waterford city and suburbs,52.236297,-7.034926,Galway city and suburbs,53.257949,-9.092321,12160.9,231812.6


In [21]:
print(df_trips["duration_sec"].isnull().sum())
print(df_trips["distance_m"].isnull().sum())


0
0
