In [1]:
import geopandas as gpd
import pandas as pd
from shapely import Point, LineString, MultiLineString, MultiPolygon
from shapely import distance, from_wkt, intersects, buffer, set_precision, shortest_line
from shapely.ops import split, snap, transform, split, nearest_points
from functools import partial
import pyproj
from itertools import chain

In [2]:
# use settings for Lima or London

# Lima

# crs = 32718

# bus_route_path = "busability/bus_routes.geojson"
# stops_path = 'busability/stops.txt' # from GTFS files

# London

crs = 32630

bus_route_path = "data/bus_routes_london.geojson"
stops_path = 'data/stops_london.txt' # from GTFS files

In [3]:
def preprocess_line(sline):
    buffered_polygon = sline.buffer(1)

    # Check if the buffered result is a MultiPolygon and union it to a single Polygon
    if isinstance(buffered_polygon, MultiPolygon):
        buffered_polygon = buffered_polygon.union()
        print(type(buffered_polygon))

    
    # Extract the outer ring as a LineString
    outer_ring_line = LineString(buffered_polygon.exterior.coords)
    return outer_ring_line
    

def modify_string(value):
    # Add 'r' at the beginning
    value = 'r' + value
    
    # Replace '_Ida' with 'a' and '_Vuelta' with 'b'
    if '_Ida' in value:
        value = value.replace('_Ida', 'A')
    elif '_Vuelta' in value:
        value = value.replace('_Vuelta', 'B')
    return value
    

def add_point(sline, bus_stop_1, bus_stop_2):
    
    all_points_coords = chain(sline.coords,bus_stop_1.coords,bus_stop_2.coords)
    all_points = map(Point, all_points_coords)
    new_line = LineString(sorted(all_points, key=line.project))
    #new_ls = transform(project, new_line)
    return new_line
    

def clip_busstops(start_point, end_point, slines):
    
    split1 = split(slines, start_point)
    
    # Check if the result of the split is a GeometryCollection and iterate over it
    if isinstance(split1, (list, tuple)):
        segments = split1
    else:
        segments = [geom for geom in split1.geoms if isinstance(geom, LineString)]
    
    gdf = gpd.GeoDataFrame(geometry=segments).set_crs(crs)
    
    split2 = split(MultiLineString(gdf.geometry.tolist()), end_point)
    
        segments = split2
    else:

        segments = [geom for geom in split2.geoms if isinstance(geom, LineString)]

    intersecting_segments = []
    
    for segment in segments:
        if intersects(segment, end_point) and intersects(segment, start_point):
            intersecting_segments.append(segment)

    if intersecting_segments:
        # Find the segment with the shortest length among the intersecting ones
        target_segment = min(intersecting_segments, key=lambda s: s.length)
        return target_segment
    
    return None 

In [4]:
gdf_lines = gpd.read_file(bus_route_path)

In [5]:
# Load stops.txt GTFS file into a DataFrame
stops_df = pd.read_csv('busability/stops.txt')
stops_df = stops_df.drop(columns=["wheelchair_boarding", "location_type", "parent_station", "platform_code"])

In [6]:
import ast
df = pd.read_csv('busability/london_stops_seq.csv', delimiter=',', header=None, names=['id', 'route_id', 'stop_ids'], skiprows=1)
df['stop_ids'] = df['stop_ids'].apply(ast.literal_eval)

In [7]:
df_expanded = df.explode('stop_ids')
df_expanded = df_expanded.drop_duplicates()
df_expanded['incremental_id'] = df_expanded.groupby('route_id').cumcount()
df_expanded.rename(columns={'stop_ids': 'stop_id'}, inplace=True)
result_df = pd.merge(df_expanded, stops_df, on='stop_id', how='inner')
result_df = result_df[['route_id', 'stop_id', 'stop_lon', 'stop_lat', 'incremental_id']]
result_df = result_df.dropna(subset=['route_id'])
result_df['route_id'] = result_df['route_id'].astype(int)
df_no_duplicates = result_df.drop_duplicates()
df_no_duplicates = df_no_duplicates.sort_values(by=['route_id', 'incremental_id'])
df = df_no_duplicates

In [8]:
df

Unnamed: 0,route_id,stop_id,stop_lon,stop_lat,incremental_id
0,89,490014051VC,-0.149191,51.492668,0
1,89,4900080168,-0.453272,51.471085,1
2,89,1600GL1272,-1.960760,51.716763,2
3,89,1600GLA756,-2.053024,51.876019,3
4,89,1600GL1082,-2.078006,51.899685,4
...,...,...,...,...,...
64578,102253,490013194S,-0.004387,51.551922,81
64579,102253,490020129S,-0.008078,51.547668,82
64580,102253,490002268ZZ,-0.008057,51.545822,83
64581,102253,490020130S,-0.009991,51.543159,84


In [None]:
# 10.11 13:22

import re
from tqdm import tqdm
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, LineString
from pyproj import Transformer

# Initialize a transformer for converting coordinates from EPSG:4326 to your target CRS, e.g., EPSG:3857
transformer = Transformer.from_crs("EPSG:4326", "EPSG:32630", always_xy=True)

cnt = 0
processed_pairs = set()
results = {}

def clean_and_convert(value):
    try:
        value = str(value).strip()
        value = re.sub(r'[−–—]', '-', value)
        value = re.sub(r'[^0-9.-]', '', value)
        return float(value)
    except ValueError:
        print(f"Warning: Unable to convert value '{value}' to float.")
        return None

# Loop through each unique route
for route in tqdm(df["route_id"].unique(), desc="Processing"):
    #print(f"Processing route: {route}")
    part_df = df[df["route_id"] == route]
    points_list = []

    try:
        line = gdf_lines[gdf_lines["route_id"] == route].geometry.iloc[0]
    except:
        cnt += 1
        continue
            

    # Loop through each row and the next row with a progress bar
    for i in range(len(part_df) - 1):
        row1, row2 = part_df.iloc[i], part_df.iloc[i + 1]
        
        lon1 = clean_and_convert(row1['stop_lon'])
        lat1 = clean_and_convert(row1['stop_lat'])
        lon2 = clean_and_convert(row2['stop_lon'])
        lat2 = clean_and_convert(row2['stop_lat'])

    
        if lon1 is None or lat1 is None or lon2 is None or lat2 is None:
            continue
        
        # Transform coordinates to the target CRS
        x1, y1 = transformer.transform(lon1, lat1)
        x2, y2 = transformer.transform(lon2, lat2)

    
        point1 = Point(x1, y1)
        point2 = Point(x2, y2)
    
        pair_id = (row1['stop_id'], row2['stop_id'], route)
    
        processed_line = preprocess_line(line)
        processed_line = add_point(processed_line, point1, point2)
    
        if pair_id not in processed_pairs:
            result = clip_busstops(point1, point2, processed_line)
            results[pair_id] = result
            processed_pairs.add(pair_id)
        else:
            cnt +=1
            
        points_list.append(point2)

    # gdf = gpd.GeoDataFrame(geometry=points_list).set_crs(32718)
    # gdf.to_file(f"punkte_{route}.geojson")


results_df = pd.DataFrame([
    {'id_1': pair[0], 'id_2': pair[1], 'result': result}
    for pair, result in results.items()
])

results_df.to_csv('processed_results_london.csv', index=False)

print(cnt)


  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(l



  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(l



  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(l



  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(l



  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(line, other)
  return lib.line_locate_point(l