## Nodes_filtered_trivial_nodes_links_by_links

### Import libraries

In [1]:
#import libraries
import pandas as pd
import numpy as np
import geopandas as gpd
from sqlalchemy import create_engine
from sqlalchemy import text
from shapely.geometry import LineString, MultiPoint, Point
import networkx as nx
from shapely.ops import unary_union

#### Create the connection with the DB

In [2]:
# Step 1: Create the connection with the DB
engine = create_engine("postgresql://urbaninfo:@cirrus.ita.chalmers.se/se_tuptp")
conn = engine.connect()

In [3]:
# Step 2: Create a new schema when it is necessary
#schema = '''CREATE SCHEMA name_schema'''
#conn.execute(schema)

### Nodes - Prepare the data

In [4]:
# Import Data
# Step 0: Import Data
nodes_pt_1_query = text('SELECT * FROM pt_1_mdirections_mstops_mroutes.nodes_pt_1')
nodes_pt_1 = gpd.read_postgis(nodes_pt_1_query, engine, geom_col='geometry')

In [6]:
# Preprocess the data
# Step 1: Copy the original DataFrame
nodes_pt_8_l1 = nodes_pt_1.copy()

# Step 2: Aggregate functions for node grouping
agg_func_node = {
    'route_id': list,
    'route_short_name': list,
    'stop_id': list,
    'direction_id': list,
    'mode': list,
    'osmid': list,
    'geometry': lambda g: Point(unary_union(g).centroid.x, unary_union(g).centroid.y)
}
nodes_pt_8_l2 = nodes_pt_8_l1.groupby(['stop_name', 'place_id']).agg(agg_func_node).reset_index()

# Step 3: Set Geometry
nodes_pt_8_l2 = gpd.GeoDataFrame(nodes_pt_8_l2, geometry='geometry', crs='EPSG:3006')

# Step 4: Display the results
nodes_pt_8_l2.head()

Unnamed: 0,stop_name,place_id,route_id,route_short_name,stop_id,direction_id,mode,osmid,geometry
0,7:e Villagatan,307955489,"[9011014200300000, 9011014200500000, 901101420...","[3, 5, 8, 1, 3, 5, 8, 1]","[9022014082785001, 9022014082785001, 902201408...","[0, 1, 0, 0, 1, 0, 1, 1]","[bus_service, bus_service, bus_service, bus_se...","[87793055.0, 87793055.0, 87793055.0, 87793055....",POINT (378291.977 6399668.440)
1,AGA Vattenfall,307980814,"[9011014633100000, 9011014633400000, 901101463...","[331, 334, 4, 932, 331, 334, 4]","[9022014015161001, 9022014015161001, 902201401...","[0, 0, 1, 1, 1, 1, 0]","[bus_service, bus_service, bus_service, bus_se...","[1021141732.0, 1021141732.0, 1021141732.0, 102...",POINT (313191.955 6442732.480)
2,Abrahamstorp,307959538,"[9011014366300000, 9011014658100000, 901101436...","[663, 581, 663]","[9022014046020001, 9022014046020001, 902201404...","[1, 1, 0]","[communal_taxi_service, bus_service, communal_...","[173516827.0, 173516827.0, 835571452.0]",POINT (371017.481 6448504.455)
3,Acklinga,307927983,"[9011014330300000, 9011014332700000, 901101438...","[303, 327, 327, 303, 327]","[9022014042120001, 9022014042120001, 902201404...","[1, 1, 0, 0, 0]","[bus_service, bus_service, communal_taxi_servi...","[19804825.0, 19804825.0, 19804825.0, 19804825....",POINT (434427.979 6451013.415)
4,Acklinga Solbacken,307927983,"[9011014384000000, 9011014332600000]","[326, 326]","[9022014042270001, 9022014042270002]","[1, 1]","[communal_taxi_service, bus_service]","[1008032531.0, 1008032531.0]",POINT (434327.469 6451981.944)


In [7]:
# Step 5: Create buffer zones
nodes_pt_8_l2 ['buffer'] = nodes_pt_8_l2.geometry.buffer(85)
nodes_pt_8_l2 = nodes_pt_8_l2.set_geometry('buffer', crs='epsg:3006')

In [8]:
# Step 6: Create unary union GeoDataFrame
nodes_pt_8_l2_unary_union_buffer = gpd.GeoDataFrame(
    geometry=[nodes_pt_8_l2.unary_union], crs='epsg:3006'
).explode(index_parts=True).reset_index(drop=True)

# Step 7: Assign buffer IDs
nodes_pt_8_l2_unary_union_buffer['buffer_id'] = nodes_pt_8_l2_unary_union_buffer.index

In [9]:
# Step 8: Perform spatial join
nodes_pt_8_l2_spatial_join = (
    gpd.sjoin(nodes_pt_8_l2_unary_union_buffer, nodes_pt_8_l2, how='inner', predicate='intersects')
    .reset_index(drop=True)
    .rename(columns={'geometry_left': 'geometry_buffer', 'geometry_right': 'geometry'})
)

In [10]:
# Step 9: Explode modes
nodes_pt_8_l2_exploded_modes = nodes_pt_8_l2_spatial_join.explode('mode').drop_duplicates(['stop_name', 'place_id', 'mode'])

# Step 10: Count modes by buffer and merge the mode count in the nodes table
nodes_pt_8_l2_mode_count = nodes_pt_8_l2_exploded_modes.groupby('buffer_id')['mode'].nunique().reset_index(name='mode_count')
nodes_pt_8_l2_mode_count_merged = pd.merge(nodes_pt_8_l2_exploded_modes, nodes_pt_8_l2_mode_count, on='buffer_id', how='inner')

In [11]:
# Step 11: Create tables with stops having more than one mode and with only one mode
nodes_pt_8_l2_multi_modes = nodes_pt_8_l2_mode_count_merged[nodes_pt_8_l2_mode_count_merged.mode_count > 1]
nodes_pt_8_l2_one_mode = nodes_pt_8_l2_mode_count_merged[nodes_pt_8_l2_mode_count_merged.mode_count == 1]

In [12]:
# Step 12: Rename buffers by most recurrent stop name
nodes_pt_8_l2_stop_areas_name = nodes_pt_8_l2_multi_modes.groupby('buffer_id').agg({'stop_name': lambda x: x.mode().iloc[0]}).reset_index()

In [13]:
# Step 13: Merge new names with multi-mode buffers
nodes_pt_8_l2_stop_areas_name_multi_modes = pd.merge(
    nodes_pt_8_l2_multi_modes[['buffer_id', 'stop_name', 'place_id', 'mode', 'geometry_buffer']],
    nodes_pt_8_l2_stop_areas_name, on='buffer_id', how='inner'
).rename(columns={'stop_name_x': 'stop_name', 'stop_name_y': 'stop_areas_name'})

In [14]:
# Step 14: Create centroids of buffers
nodes_pt_8_l2_stop_areas_centroids = gpd.GeoDataFrame(nodes_pt_8_l2_stop_areas_name_multi_modes, geometry='geometry_buffer', crs='EPSG:3006')
nodes_pt_8_l2_stop_areas_centroids ['geometry'] = nodes_pt_8_l2_stop_areas_centroids.geometry.centroid

In [16]:
# Step 15: Merge multi-mode buffers with one-mode table
nodes_pt_8_l2_one_mode.loc[:,'stop_areas_name'] = nodes_pt_8_l2_one_mode.loc[:,'stop_name']
nodes_pt_8_l3 = pd.merge(
    nodes_pt_8_l2_one_mode[['stop_areas_name', 'stop_name', 'place_id', 'mode', 'geometry']],
    nodes_pt_8_l2_stop_areas_centroids,
    on=['stop_areas_name', 'stop_name', 'place_id', 'mode', 'geometry'],
    how='outer'
)

In [17]:
# Step 16: Merge with nodes_pt_1 to get disaggregated information
nodes_pt_8_l4 = pd.merge(
    nodes_pt_8_l3, nodes_pt_1[['stop_id', 'route_id', 'route_short_name', 'direction_id', 'osmid', 'stop_name', 'place_id', 'mode']],
    on=['stop_name', 'place_id', 'mode'], how='inner'
)

# Step 17 (optional): Display the results
nodes_pt_8_l4.head()

Unnamed: 0,stop_areas_name,stop_name,place_id,mode,geometry,buffer_id,geometry_buffer,stop_id,route_id,route_short_name,direction_id,osmid
0,7:e Villagatan,7:e Villagatan,307955489,bus_service,POINT (378291.977 6399668.440),,,9022014082785001,9011014200300000,3,0,87793055.0
1,7:e Villagatan,7:e Villagatan,307955489,bus_service,POINT (378291.977 6399668.440),,,9022014082785001,9011014200500000,5,1,87793055.0
2,7:e Villagatan,7:e Villagatan,307955489,bus_service,POINT (378291.977 6399668.440),,,9022014082785001,9011014200800000,8,0,87793055.0
3,7:e Villagatan,7:e Villagatan,307955489,bus_service,POINT (378291.977 6399668.440),,,9022014082785001,9011014200100000,1,0,87793055.0
4,7:e Villagatan,7:e Villagatan,307955489,bus_service,POINT (378291.977 6399668.440),,,9022014082785002,9011014200300000,3,1,23859327.0


In [57]:
# Step 18: Create node_id column
nodes_pt_8_l4['node_id'] = nodes_pt_8_l4['stop_areas_name'].astype(str) + '_' + nodes_pt_8_l4['place_id'].astype(str)

# Step 18: Aggregate by node_id
agg_func_node = {
    'stop_areas_name': 'first',
    'stop_name': list,
    'place_id': 'first',
    'mode':list,
    'route_id': list,
    'route_short_name': list,
    'stop_id': list,
    'direction_id': list,
    'osmid': list,
    'geometry': 'first'
}

nodes_pt_8_l5 = nodes_pt_8_l4.groupby('node_id').agg(agg_func_node).reset_index()

# Step 20 (optional): Display the temporary results
nodes_pt_8_l5.head()

Unnamed: 0,node_id,stop_areas_name,stop_name,place_id,mode,route_id,route_short_name,stop_id,direction_id,osmid,geometry,mode_nunique
0,7:e Villagatan_307955489,7:e Villagatan,"[7:e Villagatan, 7:e Villagatan, 7:e Villagata...",307955489,"[bus_service, bus_service, bus_service, bus_se...","[9011014200300000, 9011014200500000, 901101420...","[3, 5, 8, 1, 3, 5, 8, 1]","[9022014082785001, 9022014082785001, 902201408...","[0, 1, 0, 0, 1, 0, 1, 1]","[87793055.0, 87793055.0, 87793055.0, 87793055....",POINT (378291.977 6399668.440),1
1,AGA Vattenfall_307980814,AGA Vattenfall,"[AGA Vattenfall, AGA Vattenfall, AGA Vattenfal...",307980814,"[bus_service, bus_service, bus_service, bus_se...","[9011014633100000, 9011014633400000, 901101463...","[331, 334, 4, 932, 331, 334, 4]","[9022014015161001, 9022014015161001, 902201401...","[0, 0, 1, 1, 1, 1, 0]","[1021141732.0, 1021141732.0, 1021141732.0, 102...",POINT (313191.955 6442732.480),1
2,Abrahamstorp_307959538,Abrahamstorp,"[Abrahamstorp, Abrahamstorp, Abrahamstorp]",307959538,"[bus_service, communal_taxi_service, communal_...","[9011014658100000, 9011014366300000, 901101436...","[581, 663, 663]","[9022014046020001, 9022014046020001, 902201404...","[1, 1, 0]","[173516827.0, 173516827.0, 835571452.0]",POINT (371017.481 6448504.455),2
3,Acklinga Solbacken_307927983,Acklinga Solbacken,"[Acklinga Solbacken, Acklinga Solbacken]",307927983,"[bus_service, communal_taxi_service]","[9011014332600000, 9011014384000000]","[326, 326]","[9022014042270002, 9022014042270001]","[1, 1]","[1008032531.0, 1008032531.0]",POINT (434327.469 6451981.944),2
4,Acklinga kyrka_307927983,Acklinga kyrka,"[Acklinga kyrka, Acklinga kyrka, Acklinga kyrk...",307927983,"[bus_service, bus_service, bus_service, bus_se...","[9011014330300000, 9011014332700000, 901101433...","[303, 327, 303, 327, 327]","[9022014042119001, 9022014042119001, 902201404...","[1, 1, 0, 0, 0]","[19804825.0, 19804825.0, 19804825.0, 19804825....",POINT (434071.965 6451013.922),2


In [133]:
# Filter and organize the final table
# Step 20: Select desired columns for the final DataFrame
nodes_pt_8_l6 = nodes_pt_8_l5 [['node_id', 'stop_areas_name', 'place_id', 'stop_name', 'route_id',
                            'route_short_name', 'stop_id', 'mode', 'direction_id', 'osmid', 'geometry']]

# Step 21: Display the final results
nodes_pt_8_l6.head()

Unnamed: 0,node_id,stop_areas_name,place_id,stop_name,route_id,route_short_name,stop_id,mode,direction_id,osmid,geometry
0,7:e Villagatan_307955489,7:e Villagatan,307955489,"[7:e Villagatan, 7:e Villagatan, 7:e Villagata...","[9011014200300000, 9011014200500000, 901101420...","[3, 5, 8, 1, 3, 5, 8, 1]","[9022014082785001, 9022014082785001, 902201408...","[bus_service, bus_service, bus_service, bus_se...","[0, 1, 0, 0, 1, 0, 1, 1]","[87793055.0, 87793055.0, 87793055.0, 87793055....",POINT (378291.977 6399668.440)
1,AGA Vattenfall_307980814,AGA Vattenfall,307980814,"[AGA Vattenfall, AGA Vattenfall, AGA Vattenfal...","[9011014633100000, 9011014633400000, 901101463...","[331, 334, 4, 932, 331, 334, 4]","[9022014015161001, 9022014015161001, 902201401...","[bus_service, bus_service, bus_service, bus_se...","[0, 0, 1, 1, 1, 1, 0]","[1021141732.0, 1021141732.0, 1021141732.0, 102...",POINT (313191.955 6442732.480)
2,Abrahamstorp_307959538,Abrahamstorp,307959538,"[Abrahamstorp, Abrahamstorp, Abrahamstorp]","[9011014658100000, 9011014366300000, 901101436...","[581, 663, 663]","[9022014046020001, 9022014046020001, 902201404...","[bus_service, communal_taxi_service, communal_...","[1, 1, 0]","[173516827.0, 173516827.0, 835571452.0]",POINT (371017.481 6448504.455)
3,Acklinga Solbacken_307927983,Acklinga Solbacken,307927983,"[Acklinga Solbacken, Acklinga Solbacken]","[9011014332600000, 9011014384000000]","[326, 326]","[9022014042270002, 9022014042270001]","[bus_service, communal_taxi_service]","[1, 1]","[1008032531.0, 1008032531.0]",POINT (434327.469 6451981.944)
4,Acklinga kyrka_307927983,Acklinga kyrka,307927983,"[Acklinga kyrka, Acklinga kyrka, Acklinga kyrk...","[9011014330300000, 9011014332700000, 901101433...","[303, 327, 303, 327, 327]","[9022014042119001, 9022014042119001, 902201404...","[bus_service, bus_service, bus_service, bus_se...","[1, 1, 0, 0, 0]","[19804825.0, 19804825.0, 19804825.0, 19804825....",POINT (434071.965 6451013.922)


### Links - Prepare the data

In [108]:
#Import data
#Step 0: Import Data
links_pt_1_query = text('SELECT * FROM pt_1_mdirections_mstops_mroutes.links_pt_1_wide')
links_pt_1 = gpd.read_postgis(links_pt_1_query, engine, geom_col='geometry')

In [110]:
# Step 1: Explode nodes table by stop name, clean data
nodes_pt_8_exploded_l1 = nodes_pt_8_l6.explode('stop_name').drop_duplicates(['stop_name', 'place_id'])

# Step 2: Display the temporary results
nodes_pt_8_exploded_l1.head()

Unnamed: 0,node_id,stop_areas_name,place_id,stop_name,route_id,route_short_name,stop_id,mode,direction_id,osmid,geometry
0,7:e Villagatan_307955489,7:e Villagatan,307955489,7:e Villagatan,"[9011014200300000, 9011014200500000, 901101420...","[3, 5, 8, 1, 3, 5, 8, 1]","[9022014082785001, 9022014082785001, 902201408...","[bus_service, bus_service, bus_service, bus_se...","[0, 1, 0, 0, 1, 0, 1, 1]","[87793055.0, 87793055.0, 87793055.0, 87793055....",POINT (378291.977 6399668.440)
1,AGA Vattenfall_307980814,AGA Vattenfall,307980814,AGA Vattenfall,"[9011014633100000, 9011014633400000, 901101463...","[331, 334, 4, 932, 331, 334, 4]","[9022014015161001, 9022014015161001, 902201401...","[bus_service, bus_service, bus_service, bus_se...","[0, 0, 1, 1, 1, 1, 0]","[1021141732.0, 1021141732.0, 1021141732.0, 102...",POINT (313191.955 6442732.480)
2,Abrahamstorp_307959538,Abrahamstorp,307959538,Abrahamstorp,"[9011014658100000, 9011014366300000, 901101436...","[581, 663, 663]","[9022014046020001, 9022014046020001, 902201404...","[bus_service, communal_taxi_service, communal_...","[1, 1, 0]","[173516827.0, 173516827.0, 835571452.0]",POINT (371017.481 6448504.455)
3,Acklinga Solbacken_307927983,Acklinga Solbacken,307927983,Acklinga Solbacken,"[9011014332600000, 9011014384000000]","[326, 326]","[9022014042270002, 9022014042270001]","[bus_service, communal_taxi_service]","[1, 1]","[1008032531.0, 1008032531.0]",POINT (434327.469 6451981.944)
4,Acklinga kyrka_307927983,Acklinga kyrka,307927983,Acklinga kyrka,"[9011014330300000, 9011014332700000, 901101433...","[303, 327, 303, 327, 327]","[9022014042119001, 9022014042119001, 902201404...","[bus_service, bus_service, bus_service, bus_se...","[1, 1, 0, 0, 0]","[19804825.0, 19804825.0, 19804825.0, 19804825....",POINT (434071.965 6451013.922)


In [192]:
# Preprocess the Data
# Step 1: Merge nodes and links in order to get the right combination of data
links_pt_8_source_merge_l1 = pd.merge(
    nodes_pt_8_exploded[['node_id', 'stop_areas_name', 'mode','stop_name',
                         'place_id', 'direction_id', 'geometry']],
    links_pt_1 [['route_id', 'route_short_name', 'stop_id_source',
                 'stop_id_target', 'stop_name_source', 'stop_name_target', 'time_distance',
                 'place_id_source', 'place_id_target'] + [col for col in links_pt_1.columns if col.startswith('freq_')]],
    left_on=['stop_name', 'place_id'],
    right_on=['stop_name_source', 'place_id_source'], how='inner'
)

links_pt_8_target_merge_l1  = pd.merge(
    nodes_pt_8_exploded[['node_id', 'stop_areas_name', 'stop_name',
                         'place_id', 'direction_id', 'geometry']],
    links_pt_8_source_merge_l1[['node_id', 'stop_areas_name', 'stop_name_source',
                             'stop_name_target', 'place_id_source', 'place_id_target',
                             'stop_id_source', 'stop_id_target', 'time_distance',
                             'route_id', 'route_short_name', 'mode',
                             'geometry'] + [col for col in links_pt_8_source_merge.columns if col.startswith('freq_')]],
    left_on=['stop_name', 'place_id'],
    right_on=['stop_name_target', 'place_id_target'],
    how='inner').rename(columns={
    'node_id_x':'node_id_target',
    'node_id_y':'node_id_source',
    'stop_areas_name_x': 'stop_areas_name_target',
    'stop_areas_name_y': 'stop_areas_name_source',
    'geometry_x':'geometry_target',
    'geometry_y':'geometry_source'})

# Step 2: Select specific columns and remove duplicates
links_pt_8_l1 = links_pt_8_target_merge_l1 [['node_id_source', 'node_id_target', 'time_distance', 'stop_areas_name_source',
                                          'stop_areas_name_target', 'stop_name_source', 'stop_name_target', 'place_id_source',
                                          'place_id_target', 'mode', 'stop_id_source', 'stop_id_target', 'route_id', 'route_short_name',
                                          'direction_id', 'geometry_source', 'geometry_target']
                                         + [col for col in links_pt_8_target_merge.columns if col.startswith('freq_')]]

# Step 2 (optional): Display the results
links_pt_8_l1.head()

Unnamed: 0,node_id_source,node_id_target,time_distance,stop_areas_name_source,stop_areas_name_target,stop_name_source,stop_name_target,place_id_source,place_id_target,mode,...,freq_friday_night(dawn),freq_saturday_night(dawn),freq_sunday_night(dawn),freq_week_day_morning,freq_week_day_peak_morning,freq_week_day_afternoon,freq_week_day_peak_evening,freq_week_day_evening,freq_week_day_night,freq_week_day_night(dawn)
0,Bäckängsskolan_307955489,7:e Villagatan_307955489,129.0,Bäckängsskolan,7:e Villagatan,Bäckängsskolan,7:e Villagatan,307955489,307955489,"[bus_service, bus_service, bus_service, bus_se...",...,0.0,0.0,0.0,2842.105263,1800.0,2571.428571,1800.0,2454.545455,4909.090909,0.0
1,Bäckängsskolan_307955489,7:e Villagatan_307955489,114.0,Bäckängsskolan,7:e Villagatan,Bäckängsskolan,7:e Villagatan,307955489,307955489,"[bus_service, bus_service, bus_service, bus_se...",...,0.0,0.0,0.0,9000.0,9671.641791,9000.0,9000.0,9671.641791,54000.0,0.0
2,Bäckängsskolan_307955489,7:e Villagatan_307955489,117.0,Bäckängsskolan,7:e Villagatan,Bäckängsskolan,7:e Villagatan,307955489,307955489,"[bus_service, bus_service, bus_service, bus_se...",...,0.0,0.0,0.0,8415.584416,5102.362205,8415.584416,4729.927007,6750.0,9000.0,0.0
3,Bäckängsskolan_307955489,7:e Villagatan_307955489,117.0,Bäckängsskolan,7:e Villagatan,Bäckängsskolan,7:e Villagatan,307955489,307955489,"[bus_service, bus_service, bus_service, bus_se...",...,0.0,0.0,0.0,6000.0,3410.526316,4984.615385,3100.478469,5634.782609,9000.0,0.0
4,Södra Älvsborgs sjukhus_307955489,7:e Villagatan_307955489,98.0,Södra Älvsborgs sjukhus,7:e Villagatan,Södra Älvsborgs sjukhus,7:e Villagatan,307955489,307955489,"[bus_service, bus_service, bus_service, bus_se...",...,0.0,0.0,0.0,2781.11588,1825.352113,2623.481781,1800.0,2347.826087,6000.0,0.0


In [249]:
# Create the links of the one direction model
# Step 4: Aggregate Data
links_pt_8_l1.loc[:, 'geometry'] = links_pt_8_l1.apply(lambda row: LineString([Point(row['geometry_source']), Point(row['geometry_target'])]), axis=1)
links_pt_8_l1.loc[:, 'symmetric_pairs'] = links_pt_8_l1[['node_id_source', 'node_id_target']].apply(lambda x: '-'.join(sorted(x)), axis=1)

# Step 5: Define aggregation functions for various columns
agg_func_link = {
    'node_id_source': 'first',
    'node_id_target': 'first',
    'stop_areas_name_source': 'first',
    'stop_areas_name_target': 'first',
    'route_id': list,
    'route_short_name': list,
    'stop_id_source': list,
    'stop_id_target': list,
    'stop_name_source': 'first',
    'stop_name_target': 'first',
    'place_id_source': 'first',
    'place_id_target': 'first',
    'mode':'first',
    'time_distance': 'min',
    'direction_id': 'first',
    'geometry': 'first'
}

freq_cols = [col for col in links_pt_8_l1.columns if col.startswith('freq_')]
agg_func_link.update({col: 'min' for col in freq_cols})

links_pt_8_l2 = links_pt_8_l1.groupby(['symmetric_pairs']).agg(agg_func_link).reset_index()

# Step 6 (optional): Display the results
links_pt_8_l2.head()

Unnamed: 0,symmetric_pairs,node_id_source,node_id_target,stop_areas_name_source,stop_areas_name_target,route_id,route_short_name,stop_id_source,stop_id_target,stop_name_source,...,freq_friday_night(dawn),freq_saturday_night(dawn),freq_sunday_night(dawn),freq_week_day_morning,freq_week_day_peak_morning,freq_week_day_afternoon,freq_week_day_peak_evening,freq_week_day_evening,freq_week_day_night,freq_week_day_night(dawn)
0,7:e Villagatan_307955489-Bäckängsskolan_307955489,Bäckängsskolan_307955489,7:e Villagatan_307955489,Bäckängsskolan,7:e Villagatan,"[9011014200100000, 9011014200300000, 901101420...","[1, 3, 5, 8, 1, 3, 5, 8]","[9022014082676002, 9022014082676002, 902201408...","[9022014082785002, 9022014082785002, 902201408...",Bäckängsskolan,...,0.0,0.0,0.0,2842.105263,1800.0,2571.428571,1800.0,2347.826087,0.0,0.0
1,7:e Villagatan_307955489-Södra Älvsborgs sjukh...,Södra Älvsborgs sjukhus_307955489,7:e Villagatan_307955489,Södra Älvsborgs sjukhus,7:e Villagatan,"[9011014200100000, 9011014200300000, 901101420...","[1, 3, 5, 8, 1, 3, 5, 8]","[9022014082726001, 9022014082726001, 902201408...","[9022014082785001, 9022014082785001, 902201408...",Södra Älvsborgs sjukhus,...,0.0,0.0,0.0,2757.446809,1825.352113,2623.481781,1800.0,2347.826087,0.0,0.0
2,AGA Vattenfall_307980814-Borealis kracker_3079...,Borealis kracker_307980814,AGA Vattenfall_307980814,Borealis kracker,AGA Vattenfall,"[9011014633100000, 9011014633400000, 901101463...","[331, 334, 4, 331, 334, 4, 932]","[9022014015013002, 9022014015013002, 902201401...","[9022014015161002, 9022014015161002, 902201401...",Borealis kracker,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,AGA Vattenfall_307980814-Jordhammar_307980814,Jordhammar_307980814,AGA Vattenfall_307980814,Jordhammar,AGA Vattenfall,"[9011014633100000, 9011014633400000, 901101463...","[331, 334, 4, 932, 331, 334, 4]","[9022014015110001, 9022014015110001, 902201401...","[9022014015161001, 9022014015161001, 902201401...",Jordhammar,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Abrahamstorp_307959538-Badet_307959538,Badet_307959538,Abrahamstorp_307959538,Badet,Abrahamstorp,"[9011014366300000, 9011014658100000, 901101436...","[663, 581, 663]","[9022014046021001, 9022014046021001, 902201404...","[9022014046020001, 9022014046020001, 902201404...",Badet,...,0.0,0.0,0.0,0.0,18000.0,0.0,0.0,0.0,0.0,0.0


In [250]:
# Filter and organize the final table
# Step 7: Rename columns for clarity
links_pt_8_l2.rename(
    columns={'node_id_source': 'source',
             'node_id_target': 'target'}, inplace=True
)

# Step 8: Select desired columns for the final DataFrame
links_pt_8_l2 = links_pt_8_l2[['source', 'target', 'time_distance', 'route_id', 'stop_name_source',
                            'place_id_source', 'stop_name_target', 'place_id_target',
                            'stop_id_source', 'stop_id_target', 'route_short_name', 'mode',
                            'direction_id','geometry']
                           + [col for col in links_pt_8_l2.columns if col.startswith('freq_')]]\
    .drop_duplicates(subset=['source', 'target'])

# Step 9: Filter out rows where 'source' is equal to 'target'
links_pt_8_l2  = links_pt_8_l2 [links_pt_8_l2 ['source'] != links_pt_8_l2 ['target']]

# Step 10 : Display the final results
links_pt_8_l2.head()

Unnamed: 0,source,target,time_distance,route_id,stop_name_source,place_id_source,stop_name_target,place_id_target,stop_id_source,stop_id_target,...,freq_friday_night(dawn),freq_saturday_night(dawn),freq_sunday_night(dawn),freq_week_day_morning,freq_week_day_peak_morning,freq_week_day_afternoon,freq_week_day_peak_evening,freq_week_day_evening,freq_week_day_night,freq_week_day_night(dawn)
0,Bäckängsskolan_307955489,7:e Villagatan_307955489,109.0,"[9011014200100000, 9011014200300000, 901101420...",Bäckängsskolan,307955489,7:e Villagatan,307955489,"[9022014082676002, 9022014082676002, 902201408...","[9022014082785002, 9022014082785002, 902201408...",...,0.0,0.0,0.0,2842.105263,1800.0,2571.428571,1800.0,2347.826087,0.0,0.0
1,Södra Älvsborgs sjukhus_307955489,7:e Villagatan_307955489,86.0,"[9011014200100000, 9011014200300000, 901101420...",Södra Älvsborgs sjukhus,307955489,7:e Villagatan,307955489,"[9022014082726001, 9022014082726001, 902201408...","[9022014082785001, 9022014082785001, 902201408...",...,0.0,0.0,0.0,2757.446809,1825.352113,2623.481781,1800.0,2347.826087,0.0,0.0
2,Borealis kracker_307980814,AGA Vattenfall_307980814,27.0,"[9011014633100000, 9011014633400000, 901101463...",Borealis kracker,307980814,AGA Vattenfall,307980814,"[9022014015013002, 9022014015013002, 902201401...","[9022014015161002, 9022014015161002, 902201401...",...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Jordhammar_307980814,AGA Vattenfall_307980814,97.0,"[9011014633100000, 9011014633400000, 901101463...",Jordhammar,307980814,AGA Vattenfall,307980814,"[9022014015110001, 9022014015110001, 902201401...","[9022014015161001, 9022014015161001, 902201401...",...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Badet_307959538,Abrahamstorp_307959538,40.0,"[9011014366300000, 9011014658100000, 901101436...",Badet,307959538,Abrahamstorp,307959538,"[9022014046021001, 9022014046021001, 902201404...","[9022014046020001, 9022014046020001, 902201404...",...,0.0,0.0,0.0,0.0,18000.0,0.0,0.0,0.0,0.0,0.0


In [251]:
links_pt_8_l2

Unnamed: 0,source,target,time_distance,route_id,stop_name_source,place_id_source,stop_name_target,place_id_target,stop_id_source,stop_id_target,...,freq_friday_night(dawn),freq_saturday_night(dawn),freq_sunday_night(dawn),freq_week_day_morning,freq_week_day_peak_morning,freq_week_day_afternoon,freq_week_day_peak_evening,freq_week_day_evening,freq_week_day_night,freq_week_day_night(dawn)
0,Bäckängsskolan_307955489,7:e Villagatan_307955489,109.0,"[9011014200100000, 9011014200300000, 901101420...",Bäckängsskolan,307955489,7:e Villagatan,307955489,"[9022014082676002, 9022014082676002, 902201408...","[9022014082785002, 9022014082785002, 902201408...",...,0.0,0.0,0.0,2842.105263,1800.000000,2571.428571,1800.000000,2347.826087,0.0,0.0
1,Södra Älvsborgs sjukhus_307955489,7:e Villagatan_307955489,86.0,"[9011014200100000, 9011014200300000, 901101420...",Södra Älvsborgs sjukhus,307955489,7:e Villagatan,307955489,"[9022014082726001, 9022014082726001, 902201408...","[9022014082785001, 9022014082785001, 902201408...",...,0.0,0.0,0.0,2757.446809,1825.352113,2623.481781,1800.000000,2347.826087,0.0,0.0
2,Borealis kracker_307980814,AGA Vattenfall_307980814,27.0,"[9011014633100000, 9011014633400000, 901101463...",Borealis kracker,307980814,AGA Vattenfall,307980814,"[9022014015013002, 9022014015013002, 902201401...","[9022014015161002, 9022014015161002, 902201401...",...,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0
3,Jordhammar_307980814,AGA Vattenfall_307980814,97.0,"[9011014633100000, 9011014633400000, 901101463...",Jordhammar,307980814,AGA Vattenfall,307980814,"[9022014015110001, 9022014015110001, 902201401...","[9022014015161001, 9022014015161001, 902201401...",...,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0
4,Badet_307959538,Abrahamstorp_307959538,40.0,"[9011014366300000, 9011014658100000, 901101436...",Badet,307959538,Abrahamstorp,307959538,"[9022014046021001, 9022014046021001, 902201404...","[9022014046020001, 9022014046020001, 902201404...",...,0.0,0.0,0.0,0.000000,18000.000000,0.000000,0.000000,0.000000,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10996,Överlida kyrka_307891645,Överlida centrum_307891645,36.0,"[9011014236200000, 9011014230200000, 901101423...",Överlida kyrka,307891645,Överlida centrum,307891645,"[9022014065932002, 9022014065933001, 902201406...","[9022014065933001, 9022014065932001, 902201406...",...,0.0,0.0,0.0,0.000000,27000.000000,0.000000,0.000000,0.000000,0.0,0.0
10997,Överlida skola_307891645,Överlida centrum_307891645,120.0,"[9011014230200000, 9011014280200000]",Överlida skola,307891645,Överlida centrum,307891645,"[9022014065976001, 9022014065976001]","[9022014065933001, 9022014065933001]",...,0.0,0.0,0.0,0.000000,54000.000000,54000.000000,43200.000000,0.000000,0.0,0.0
10998,Överlida kyrka_307891645,Överlida skola_307891645,141.0,"[9011014230200000, 9011014280200000]",Överlida kyrka,307891645,Överlida skola,307891645,"[9022014065932002, 9022014065932002]","[9022014065976001, 9022014065976001]",...,0.0,0.0,0.0,0.000000,0.000000,54000.000000,43200.000000,0.000000,0.0,0.0
11000,Öxabäck_307997498,Öxabäck Torestorpsvägen_307997498,24.0,"[9011014237100000, 9011014237200000, 901101423...",Öxabäck,307997498,Öxabäck Torestorpsvägen,307997498,"[9022014063318001, 9022014063317001, 902201406...","[9022014063317001, 9022014063318002, 902201406...",...,0.0,0.0,0.0,0.000000,0.000000,0.000000,18873.786408,0.000000,0.0,0.0


### Nodes - Final Nodes

In [122]:
# Step 1: Create the graph
G = nx.from_pandas_edgelist(links_pt_8, 'source', 'target', edge_attr='time_distance', create_using=nx.Graph())

In [135]:
# Step 2:  Filter the nodes
sum_of_new_edges_time_distance = 0

while any(degree == 2 for node, degree in G.degree()):
    for node in list(G.nodes()):
        if G.degree(node) == 2:
            edges = list(G.edges(node))
            time_distance_sum = G[edges[0][0]][edges[0][1]].get('time_distance', 0) + G[edges[1][0]][edges[1][1]].get('time_distance', 0)
            G.add_edge(edges[0][1], edges[1][1])
            G[edges[0][1]][edges[1][1]]['time_distance'] = time_distance_sum
            sum_of_new_edges_time_distance += time_distance_sum
            G.remove_node(node)

degrees = dict(G.degree())

# Step 3: Create a Dataframe with the nodes
nodes_pt_8 = pd.DataFrame(list(degrees.items()), columns=['node_id', 'degree'])

In [178]:
# Step 4: Merge the dataframe with the the prepared dataframe to get the necessary informations
nodes_pt_8 = pd.merge (nodes_pt_8 [['node_id', 'degree']], nodes_pt_8_l6, on='node_id', how='inner')

# Step 5: Display the final result
nodes_pt_8.head()

Unnamed: 0,node_id,degree,stop_areas_name,place_id,stop_name,route_id,route_short_name,stop_id,mode,direction_id,osmid,geometry
0,Bäckängsskolan_307955489,4,Bäckängsskolan,307955489,"[Bäckängsskolan, Bäckängsskolan, Bäckängsskola...","[9011014200300000, 9011014200500000, 901101420...","[3, 5, 8, 1, 3, 5, 8, 1]","[9022014082676001, 9022014082676001, 902201408...","[bus_service, bus_service, bus_service, bus_se...","[0, 1, 0, 0, 1, 0, 1, 1]","[397156375.0, 397156375.0, 397156375.0, 397156...",POINT (377826.942 6399466.436)
1,Södra Älvsborgs sjukhus_307955489,5,Södra Älvsborgs sjukhus,307955489,"[Södra Älvsborgs sjukhus, Södra Älvsborgs sjuk...","[9011014200300000, 9011014200100000, 901101420...","[3, 1, 5, 8, 6, 200, 200, 102, 6, 5, 8, 3, 1, ...","[9022014082726001, 9022014082726001, 902201408...","[bus_service, bus_service, bus_service, bus_se...","[0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0]","[339828166.0, 339828166.0, 339828175.0, 339828...",POINT (378640.599 6399758.434)
2,Borealis kracker_307980814,3,Borealis kracker,307980814,"[Borealis kracker, Borealis kracker, Borealis ...","[9011014633100000, 9011014633400000, 901101463...","[331, 334, 4, 932, 331, 334, 4]","[9022014015013001, 9022014015013001, 902201401...","[bus_service, bus_service, bus_service, bus_se...","[0, 0, 1, 1, 1, 1, 0]","[1125337144.0, 1125337144.0, 1125337144.0, 112...",POINT (313106.978 6442246.408)
3,Kavlåsbruket_307927983,7,Kavlåsbruket,307927983,"[Bruket, Bruket, Kavlåsbruket, Kavlåsbruket, K...","[9011014330300000, 9011014330300000, 901101433...","[303, 303, 303, 303, 326, 326]","[9022014042121001, 9022014042121002, 902201404...","[bus_service, bus_service, bus_service, bus_se...","[1, 0, 1, 0, 1, 1]","[599242864.0, 599242864.0, 1027026785.0, 10270...",POINT (432853.974 6453340.197)
4,Acklinga Solbacken_307927983,1,Acklinga Solbacken,307927983,"[Acklinga Solbacken, Acklinga Solbacken]","[9011014332600000, 9011014384000000]","[326, 326]","[9022014042270002, 9022014042270001]","[bus_service, communal_taxi_service]","[1, 1]","[1008032531.0, 1008032531.0]",POINT (434327.469 6451981.944)


In [137]:
# Step 6: Set geometry and export to the DB
nodes_pt_8 = gpd.GeoDataFrame(nodes_pt_8, geometry='geometry', crs='3006')
nodes_pt_8.to_postgis('nodes_pt_8', engine, schema='pt_8_stopareassecondfilter_onemode', if_exists ='replace')

#### Links - Final Links

In [258]:
# Step 1: Convert the graph to a DataFrame
links_pt_8_l4 = pd.DataFrame(list(G.edges(data=True)), columns=['source', 'target', 'time_distance'])
links_pt_8_l4 ['time_distance'] = links_pt_8_l4 ['time_distance'].apply(lambda x: x['time_distance'] if 'time_distance' in x else None)

In [259]:
# Preprocess the Data
# Step 2: Merge nodes and links in order to get the right combination of data
links_pt_8_source = pd.merge(
    nodes_pt_8[['node_id','geometry']],
    links_pt_8_l4,
    left_on='node_id',
    right_on='source',
    how='inner'
)

links_pt_8_target = pd.merge(
    links_pt_8_source, nodes_pt_8[['node_id','geometry']],
    left_on='target',
    right_on='node_id',
    how='inner'
).rename(columns={
    'geometry_y':'geometry_source',
    'geometry_x':'geometry_target'
})

# Step 3: Select specific columns and remove duplicates
links_pt_8_l5 = links_pt_8_target[['source', 'target', 'time_distance', 'geometry_source', 'geometry_target']]

# Step 4 (optional): Display the results
links_pt_8_l5.head()

Unnamed: 0,source,target,time_distance,geometry_source,geometry_target
0,Bäckängsskolan_307955489,Allégatan_307955489,134.0,POINT (377468.217 6399358.673),POINT (377826.942 6399466.436)
1,Bäckängsskolan_307955489,Södra torget_307955489,91.0,POINT (377421.472 6399273.424),POINT (377826.942 6399466.436)
2,Bäckängsskolan_307955489,Södra Älvsborgs sjukhus_307955489,195.0,POINT (378640.599 6399758.434),POINT (377826.942 6399466.436)
3,Bäckängsskolan_307955489,Söderbro_307955489,214.0,POINT (377254.478 6399046.704),POINT (377826.942 6399466.436)
4,Södra Älvsborgs sjukhus_307955489,Borås central_307955489,480.0,POINT (376809.367 6399462.293),POINT (378640.599 6399758.434)


In [263]:
# Step 5: Create a geometry
links_pt_8_l5.loc[:, 'geometry'] = links_pt_8_l5.apply(lambda row: LineString([Point(row['geometry_source']), Point(row['geometry_target'])]), axis=1)

# Step 6: DOrganize the final table
links_pt_8 = links_pt_8_l5 [['source', 'target', 'time_distance', 'geometry']]

# Step 7: Display the final results
links_pt_8.head()

Unnamed: 0,source,target,time_distance,geometry
0,Bäckängsskolan_307955489,Allégatan_307955489,134.0,LINESTRING (377468.2169475646 6399358.67327003...
1,Bäckängsskolan_307955489,Södra torget_307955489,91.0,LINESTRING (377421.4718657576 6399273.42384159...
2,Bäckängsskolan_307955489,Södra Älvsborgs sjukhus_307955489,195.0,LINESTRING (378640.5991298174 6399758.43443690...
3,Bäckängsskolan_307955489,Söderbro_307955489,214.0,LINESTRING (377254.4775803655 6399046.70442958...
4,Södra Älvsborgs sjukhus_307955489,Borås central_307955489,480.0,LINESTRING (376809.3671732711 6399462.29262101...


In [265]:
#create the GeoDataFrame and export to the DB
links_pt_8 = gpd.GeoDataFrame(links_pt_8, geometry='geometry', crs='3006')
links_pt_8.to_postgis('links_pt_8', engine, schema='pt_8_stopareassecondfilter_onemode', if_exists ='replace')