# Generar Matriz de Distancias

### Instrucciones:

- **a. Origenes**  
  - Es una capa shapefile de polígonos con los campos que quiero calcular.  
  - Tiene que tener los nombres de los campos en una columna llamada `"idest"`.  
  - En general, esta capa es la capa de cultivos o una parte de ella.

- **b. Destinos**  
  - Es una capa shapefile de puntos con los destinos.  
  - Debe tener la columna `"Localidad"` con las localidades de los puertos, molinos o acopios de la capa.

In [1]:
import pandas as pd
import geopandas as gpd
import osmnx as ox
import datetime as datetime
import os
from pathlib import Path
import time
import requests

In [7]:
try:
  espcul_gdf = gpd.read_file('./inputs/origenes.shp')

except Exception as e:
  espcul_gdf = gpd.read_file('./inputs/origenes.gpkg')

espcul_gdf = espcul_gdf.loc[espcul_gdf.campania == '24/25']

espcul_gdf = espcul_gdf[['zona', 'idest', 'geometry']]

espcul_gdf = espcul_gdf.loc[espcul_gdf.zona.isin(['LA PAMPA'])]

espcul_gdf = gpd.GeoDataFrame(espcul_gdf, geometry='geometry').reset_index()
espcul_gdf['geometry'] = espcul_gdf.buffer(0.0001)
espcul_dissolve = espcul_gdf.dissolve(by='idest').reset_index()

print(f"espcul_dissolve.shape: {espcul_dissolve.shape}")
print(f"espcul_gdf.shape: {espcul_gdf.shape}")
espcul_dissolve.head()


  espcul_gdf['geometry'] = espcul_gdf.buffer(0.0001)


espcul_dissolve.shape: (5, 4)
espcul_gdf.shape: (112, 4)


Unnamed: 0,idest,geometry,index,zona
0,Atreuco,"MULTIPOLYGON (((-63.8251 -37.10477, -63.82509 ...",144,LA PAMPA
1,La Merced,"POLYGON ((-63.57368 -36.91905, -63.57369 -36.9...",93,LA PAMPA
2,Los Robles Lonquimay,"MULTIPOLYGON (((-63.8182 -36.57147, -63.8182 -...",112,LA PAMPA
3,San Francisco,"POLYGON ((-63.49687 -36.74051, -63.49687 -36.7...",929,LA PAMPA
4,Valle Argentino,"MULTIPOLYGON (((-63.90649 -37.21337, -63.90649...",145,LA PAMPA


In [31]:
# origen_zona = {"O1": "BA SDE S", "O2": "BA SDE S"}
origen_zona = {}
for i in range(len(espcul_gdf)):
    origen_zona[espcul_gdf['idest'][i]] = espcul_gdf['zona'][i]

In [8]:
zonas_list = espcul_gdf.zona.unique()
zonas_list

array(['LA PAMPA'], dtype=object)

In [29]:
espcul_dissolve['x']= espcul_dissolve['geometry'].centroid.x
espcul_dissolve['y']= espcul_dissolve['geometry'].centroid.y
espcul_dissolve['centres']= espcul_dissolve['geometry'].centroid
espcul_centroids = espcul_dissolve.loc[:,['idest','x','y','centres']].copy()
espcul_centroids = espcul_centroids.rename(columns = {'centres': 'geometry'})
espcul_centroids = espcul_centroids.to_crs('EPSG:4326')
print(f"espcul_centroids: {espcul_centroids.shape[0]}")
espcul_centroids.head()

espcul_centroids: 5



  espcul_dissolve['x']= espcul_dissolve['geometry'].centroid.x

  espcul_dissolve['y']= espcul_dissolve['geometry'].centroid.y

  espcul_dissolve['centres']= espcul_dissolve['geometry'].centroid


Unnamed: 0,idest,x,y,geometry
0,Atreuco,-63.815498,-37.088294,POINT (-63.8155 -37.08829)
1,La Merced,-63.553689,-36.897663,POINT (-63.55369 -36.89766)
2,Los Robles Lonquimay,-63.784198,-36.565025,POINT (-63.7842 -36.56502)
3,San Francisco,-63.483646,-36.749309,POINT (-63.48365 -36.74931)
4,Valle Argentino,-63.915926,-37.191708,POINT (-63.91593 -37.19171)


In [10]:
espcul_centroids.to_file(driver = "GPKG",filename='./inputs/centoids.gpkg', encoding='utf-8', index=False)

In [11]:
espcul_coord = {}

for i in range(0, len(espcul_centroids)):
    origin = espcul_centroids['idest'][i]
    x = espcul_centroids['geometry'][i].x
    y = espcul_centroids['geometry'][i].y
    coords = (y, x)
    espcul_coord[origin]=coords
# espcul_coord

In [12]:
try:
  destinos_gdf = gpd.read_file('./inputs/destinos.shp')

except Exception as e:
  destinos_gdf = gpd.read_file('./inputs/destinos.gpkg')


print(destinos_gdf.shape)
destinos_gdf.head()

destinos_coord = {}

for i in range(len(destinos_gdf)):
    destino = destinos_gdf['Localidad'][i]
    punto = destinos_gdf['geometry'][i].centroid
    coords = (punto.y, punto.x)
    destinos_coord[destino] = coords

list(destinos_coord.items())[:4]

(69, 12)


[('9 de Julio', (-35.44393528, -60.88462748)),
 ('Arrecifes', (-34.06751491, -60.10869159)),
 ('Azul', (-36.777447, -59.86344305)),
 ('Bahia Blanca', (-38.71760509, -62.26544693))]

The centroids may be away form actual street network. Use osmnx to find the closest node on OSM network for routing

In [13]:
arr = espcul_gdf.to_crs('EPSG:4326').total_bounds
tupla = tuple(arr)
# print(tupla)

In [14]:
G = ox.graph.graph_from_bbox(tupla, network_type='drive')
nodes = G.nodes()

In [15]:
espcul_coord_snapped = {}
for name, coords in espcul_coord.items():
    node = ox.distance.nearest_nodes(G, coords[1], coords[0])
    info = nodes[node]
    espcul_coord_snapped[name] = (info['y'], info['x'])

espcul_coord_snapped

{'Atreuco': (-37.0952312, -63.8351522),
 'La Merced': (-36.9190191, -63.5534875),
 'Los Robles Lonquimay': (-36.5374728, -63.7769719),
 'San Francisco': (-36.7629028, -63.497029),
 'Valle Argentino': (-37.1679942, -63.9040107)}

In [None]:
service = 'table'
version = 'v1'
profile = 'driving'

In [45]:
# Build list of all coordinates (espcul_coord_snapped first, then destinos_coord)
all_coords = []
# To keep track of index mapping
origin_keys = list(espcul_coord_snapped.keys())
dest_keys = list(destinos_coord.keys())

for key in origin_keys:
    o = espcul_coord_snapped[key]
    # OSRM expects lon,lat; our tuples are (lat, lon) so index [1] is lon, [0] is lat
    all_coords.append(f"{o[1]},{o[0]}")

for key in dest_keys:
    d = destinos_coord[key]
    # Same format: (lat, lon) so we use [1] for lon, [0] for lat
    all_coords.append(f"{d[1]},{d[0]}")

# Build the coordinate string
coords_str = ";".join(all_coords)

# Build sources indices: since espcul_coord_snapped are first in the list, their indices = 0..len(espcul_coord_snapped)-1
sources_indices = ";".join(str(i) for i in range(len(origin_keys)))

# Build destinos_coord indices: destinos_coord start at index len(espcul_coord_snapped)
# Build destination indices: destinos_coord start at index len(espcul_coord_snapped)
dest_start = len(origin_keys)
dest_indices = ";".join(str(dest_start + i) for i in range(len(dest_keys)))

# Build the request URL
profile = "driving"  # or "bike"/"foot" according to your case
port = 5001  # local OSRM server port
base_url = f"http://localhost:{port}"  # local OSRM server
url = (
    f"{base_url}/table/v1/{profile}/{coords_str}"
    f"?sources={sources_indices}&destinations={dest_indices}&annotations=distance"
)

# Send the request
resp = requests.get(url)
resp.raise_for_status()
data = resp.json()

# Check for successful code
if data.get("code") != "Ok":
    raise Exception(f"OSRM table error: {data.get('code')}")

# Extract distances matrix (in meters)
distances = data.get("distances")

# Map results to list of dicts with origen, destino, distancia keys
results = []
for i, orig_key in enumerate(origin_keys):
    for j, dest_key in enumerate(dest_keys):
        # Convert from meters to kilometers
        results.append({
            'origen': orig_key,
            'destino': dest_key,
            'distancia': distances[i][j] / 1000
        })

print(results)

[{'origen': 'Atreuco', 'destino': '9 de Julio', 'distancia': 378.688}, {'origen': 'Atreuco', 'destino': 'Arrecifes', 'distancia': 595.1990999999999}, {'origen': 'Atreuco', 'destino': 'Azul', 'distancia': 457.811}, {'origen': 'Atreuco', 'destino': 'Bahia Blanca', 'distancia': 298.5912}, {'origen': 'Atreuco', 'destino': 'Barker', 'distancia': 480.9814}, {'origen': 'Atreuco', 'destino': 'Benito Juarez', 'distancia': 431.5583}, {'origen': 'Atreuco', 'destino': 'Bragado', 'distancia': 432.414}, {'origen': 'Atreuco', 'destino': 'Brandsen', 'distancia': 629.1041}, {'origen': 'Atreuco', 'destino': 'Ramallo', 'distancia': 663.796}, {'origen': 'Atreuco', 'destino': 'Cañuelas', 'distancia': 565.5233000000001}, {'origen': 'Atreuco', 'destino': 'Capilla del Señor', 'distancia': 612.1795999999999}, {'origen': 'Atreuco', 'destino': 'Laboulaye', 'distancia': 472.241}, {'origen': 'Atreuco', 'destino': 'Los Cisnes', 'distancia': 585.207}, {'origen': 'Atreuco', 'destino': 'Rio Cuarto', 'distancia': 496.8

In [44]:
distances_dict = {}

for k, v in results.items():
    for k2, v2 in v.items():
        origen = k
        destino = k2
        distancia = v2
        distances_dict[(origen, destino)] = distancia

distances_dict

{('Atreuco', '9 de Julio'): 378.688,
 ('Atreuco', 'Arrecifes'): 595.1990999999999,
 ('Atreuco', 'Azul'): 457.811,
 ('Atreuco', 'Bahia Blanca'): 298.5912,
 ('Atreuco', 'Barker'): 480.9814,
 ('Atreuco', 'Benito Juarez'): 431.5583,
 ('Atreuco', 'Bragado'): 432.414,
 ('Atreuco', 'Brandsen'): 629.1041,
 ('Atreuco', 'Ramallo'): 663.796,
 ('Atreuco', 'Cañuelas'): 565.5233000000001,
 ('Atreuco', 'Capilla del Señor'): 612.1795999999999,
 ('Atreuco', 'Laboulaye'): 472.241,
 ('Atreuco', 'Los Cisnes'): 585.207,
 ('Atreuco', 'Rio Cuarto'): 496.84590000000003,
 ('Atreuco', 'Chacabuco'): 521.5346,
 ('Atreuco', 'Cofco Balcarce'): 646.0515,
 ('Atreuco', 'Concepción del Uruguay'): 839.7955999999999,
 ('Atreuco', 'Deheza'): 566.6472,
 ('Atreuco', 'Villa Paranacito'): 719.1871,
 ('Atreuco', 'Dovales (Tandil)'): 539.9815,
 ('Atreuco', 'Flor de Oro, Reconquista'): 1137.7476000000001,
 ('Atreuco', 'Gral. Levalle'): 451.62309999999997,
 ('Atreuco', 'H. Lagos '): 679.266,
 ('Atreuco', 'La Carlota'): 561.1493,


In [None]:
df = pd.DataFrame(results)
df.head()

Unnamed: 0,Atreuco,La Merced,Los Robles Lonquimay,San Francisco,Valle Argentino
9 de Julio,378.688,349.0135,302.6472,346.0523,386.3544
Arrecifes,595.1991,565.5248,519.1584,562.5636,602.8656
Azul,457.811,472.6816,429.3376,472.7428,465.4775
Bahia Blanca,298.5912,313.4618,362.4922,335.7028,306.2576
Barker,480.9814,495.852,537.2868,518.093,488.6478


In [None]:
# --- Inputs (examples) ---
# espcul_coord_snapped = {"O1": (-37.621628, -59.3359883), "O2": (-37.700000, -59.200000)}
# origen_zona = {"O1": "BA SDE S", "O2": "BA SDE S"}
# destinos_coord = {"Flor de Oro, Reconquista": (-29.14, -59.64), "Gral. Levalle": (-34.01, -63.92)}

def build_distance_table(
    espcul_coord_snapped: dict,
    origen_zona: dict,
    destinos_coord: dict,
    profile: str = "driving",
    base_url: str = "http://router.project-osrm.org",
    include_distance: bool = True,
) -> pd.DataFrame:
    """
    Build a long-format OD table using OSRM's Table service.
    Returns a DataFrame with columns:
    zona, idest, origen_x, origen_y, destino, destino_x, destino_y, distancia_m, duracion_s
    """

    # --- Keys and coordinate lists ---
    origin_keys = list(espcul_coord_snapped.keys())
    dest_keys   = list(destinos_coord.keys())

    all_coords = []
    # Append origins first (lon,lat)
    for ok in origin_keys:
        o = espcul_coord_snapped[ok]
        if not (isinstance(o, tuple) and len(o) == 2):
            raise ValueError(f"Origin '{ok}' has invalid coordinate format: {o}")
        lat, lon = o
        all_coords.append(f"{lon},{lat}")  # OSRM expects lon,lat

    # Then destinations (lon,lat)
    for dk in dest_keys:
        d = destinos_coord[dk]
        if not (isinstance(d, tuple) and len(d) == 2):
            raise ValueError(f"Destination '{dk}' has invalid coordinate format: {d}")
        lat, lon = d
        all_coords.append(f"{lon},{lat}")

    coords_str = ";".join(all_coords)

    # Indices for sources and destinations in the unified coordinate list
    sources_indices = ";".join(str(i) for i in range(len(origin_keys)))
    dest_start      = len(origin_keys)
    dest_indices    = ";".join(str(dest_start + i) for i in range(len(dest_keys)))

    # --- Request URL (ask for both durations and distances if needed) ---
    annotations = "duration,distance" if include_distance else "duration"
    url = (
        f"{base_url}/table/v1/{profile}/{coords_str}"
        f"?sources={sources_indices}&destinations={dest_indices}"
        f"&annotations={annotations}"
    )

    # --- Call OSRM ---
    resp = requests.get(url, timeout=60)
    resp.raise_for_status()
    data = resp.json()
    if data.get("code") != "Ok":
        raise RuntimeError(f"OSRM table error: {data.get('code')}")

    durations = data.get("durations")
    distances = data.get("distances") if include_distance else None
    if durations is None:
        raise RuntimeError("OSRM response missing 'durations'")

    # --- Flatten to long rows ---
    records = []
    for i, ok in enumerate(origin_keys):
        zona = origen_zona.get(ok, None)  # zone per origin id
        o_lat, o_lon = espcul_coord_snapped[ok]
        o_x, o_y = o_lon, o_lat  # x = lon, y = lat

        for j, dk in enumerate(dest_keys):
            d_lat, d_lon = destinos_coord[dk]
            d_x, d_y = d_lon, d_lat

            dur_s = durations[i][j] if durations and durations[i] else None
            dist_m = distances[i][j] if (distances and distances[i]) else None

            # Build row
            records.append({
                "zona": zona,
                "idest": ok,
                "origen_x": o_x,
                "origen_y": o_y,
                "destino": dk,
                "destino_x": d_x,
                "destino_y": d_y,
                "distancia_km": dist_m,
                "duracion_h": dur_s,
            })

    df = pd.DataFrame.from_records(records)

    # Optional: order columns to match your screenshot style
    cols = ["zona", "idest", "origen_x", "origen_y", "destino", "destino_x", "destino_y", "distancia_km", "duracion_h"]
    df = df[cols]
    df.distancia_km = df.distancia_km / 1000
    df.duracion_h = df.duracion_h / 3600

    return df


# --- Example usage ---
df_od = build_distance_table(espcul_coord_snapped, origen_zona, destinos_coord)
df_od.head()


In [None]:
path = "./outputs"

if not os.path.exists(path):
    os.makedirs(path)

# df.to_csv('matrix.csv', index=False)
df_od.to_excel('./outputs/matriz_de_distancias_table_service.xlsx', index=False)