## Frequency_tables_long_format

### Import libraries

In [50]:
# Step 1: Import libraries
import pandas as pd
import numpy as np
import geopandas as gpd
from sqlalchemy import create_engine
from sqlalchemy import text
from shapely.geometry import Point, LineString, shape

### Create the connection with the DB (optional)

In [51]:
# Step 1: Create the connection with the DB
engine = create_engine("postgresql://urbaninfo:@cirrus.ita.chalmers.se/se_tuptp")
conn = engine.connect()

In [52]:
# Step 2: Create a new schema when it is necessary
#schema = '''CREATE SCHEMA name_schema'''
#conn.execute(schema)

### Expanded Table

###### To create the frequency model, it is necessary first to compile a table containing all the trips that occur on the days covered by the GTFS.

In [5]:
# Step 1: Import data
# In this example, we are utilizing data from Västra Götaland, although these can be modified based on specific requirements.
# links_pt_0
links_pt_0_query = text('SELECT * FROM pt_0_baseline.links_pt_0')
links_pt_0 = gpd.read_postgis(links_pt_0_query, engine, geom_col='geometry')

# nodes_pt_0
nodes_pt_0_query = text('''SELECT * FROM pt_0_baseline.nodes_pt_0''')
nodes_pt_0 = gpd.read_postgis(nodes_pt_0_query, engine, geom_col='geometry')

#calendar_dates_gtfs
calendar_dates_gtfs_query = text('''SELECT * FROM p1_gtfs.regional_calendardates''')
calendar_dates_gtfs = pd.read_sql(calendar_dates_gtfs_query, engine)

In [7]:
# Preprocess the calendar_dates data
# Step 2: Adjust the date format
calendar_dates_gtfs['date'] = pd.to_datetime(calendar_dates_gtfs['date'], format='%Y%m%d')

# Step 3: Create a column with the day of the week
calendar_dates_gtfs['day_week'] = calendar_dates_gtfs['date'].dt.day_name()

# Step 4: Create a column with the day type
day_type_dict = {
    'Monday': 'week_day',
    'Tuesday': 'week_day',
    'Wednesday': 'week_day',
    'Thursday': 'week_day',
    'Friday': 'week_day',
    'Saturday': 'weekend',
    'Sunday': 'weekend'
}

calendar_dates_gtfs['day_type'] = calendar_dates_gtfs['day_week'].map(day_type_dict)

# Step 5: Create the week_start and week_number columns based on the date
calendar_dates_gtfs['week_start'] = calendar_dates_gtfs['date'].dt.to_period('W').apply(lambda r: r.start_time)
calendar_dates_gtfs['week_number'] = calendar_dates_gtfs['week_start'].dt.isocalendar().week

# Step 6 (optional): Display the temporary results
calendar_dates_gtfs.head()

Unnamed: 0,service_id,date,exception_type,day_week,day_type,week_start,week_number
0,1,2022-08-15,1,Monday,week_day,2022-08-15,33
1,1,2022-08-16,1,Tuesday,week_day,2022-08-15,33
2,1,2022-08-17,1,Wednesday,week_day,2022-08-15,33
3,1,2022-08-18,1,Thursday,week_day,2022-08-15,33
4,1,2022-08-19,1,Friday,week_day,2022-08-15,33


In [8]:
# Step 7: Merge the links_pt_0 with the preprocessed calendar_dates
all_trips_gtfs = pd.merge(links_pt_0, calendar_dates_gtfs[['service_id', 'date', 'day_week', 'day_type', 'week_start', 'week_number']],
                          on='service_id', how='inner')

# Step 8 (optional): Display the temporary results
all_trips_gtfs.head()

Unnamed: 0,source,target,route_short_name,mode,time_distance,time_period,route_id,service_id,trip_id,direction_id,...,stop_name_source,stop_name_target,place_id_source,place_id_target,geometry,date,day_week,day_type,week_start,week_number
0,9022014001760012,9022014006242004,50,bus_service,97.0,night_dawn,9011014505000000,5,141010001054458148,0,...,Brunnsparken,Stenpiren,307987564,307987564,"LINESTRING (319300.968 6400118.943, 318713.953...",2022-07-31,Sunday,weekend,2022-07-25,30
1,9022014001760012,9022014006242004,50,bus_service,97.0,night_dawn,9011014505000000,5,141010001054458148,0,...,Brunnsparken,Stenpiren,307987564,307987564,"LINESTRING (319300.968 6400118.943, 318713.953...",2022-08-06,Saturday,weekend,2022-08-01,31
2,9022014001760012,9022014006242004,50,bus_service,97.0,night_dawn,9011014505000000,5,141010001054458148,0,...,Brunnsparken,Stenpiren,307987564,307987564,"LINESTRING (319300.968 6400118.943, 318713.953...",2022-08-07,Sunday,weekend,2022-08-01,31
3,9022014001760012,9022014006242004,50,bus_service,97.0,night_dawn,9011014505000000,5,141010001054458148,0,...,Brunnsparken,Stenpiren,307987564,307987564,"LINESTRING (319300.968 6400118.943, 318713.953...",2022-08-13,Saturday,weekend,2022-08-08,32
4,9022014001760012,9022014006242004,50,bus_service,97.0,night_dawn,9011014505000000,5,141010001054458148,0,...,Brunnsparken,Stenpiren,307987564,307987564,"LINESTRING (319300.968 6400118.943, 318713.953...",2022-08-14,Sunday,weekend,2022-08-08,32


In [11]:
# Filter the data
# Step 9: Filter the data by selecting the period with the highest concentration of trips
filtered_trips_gtfs = all_trips_gtfs[all_trips_gtfs['date'].between('2022-09-01', '2022-12-10')]

# Step 10: Create the 'week_count' column to contain information about the number of weeks each service operates
filtered_trips_gtfs.loc[:, 'week_count'] = filtered_trips_gtfs.groupby(['route_id', 'service_id'])['week_number'].transform('nunique')

# Step 11: Filter the frequent services of each route
frequent_services_routes = (filtered_trips_gtfs.groupby(['route_id', 'week_count'], as_index=False)
                            .size()
                            .reset_index(name='route_id_count')
                            .sort_values(['route_id', 'route_id_count'], ascending=[True, False])
                            .groupby('route_id')
                            .apply(lambda x: x.head(2))
                            .reset_index(drop=True))

# Step 12: Merge the results
filtered_services_routes = pd.merge(filtered_trips_gtfs, frequent_services_routes,
                                    on=['route_id', 'week_count'], how='inner')

# Step 13 (optional): Display the temporary results
filtered_services_routes.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
  frequent_services_routes = (filtered_trips_gtfs.groupby(['route_id', 'week_count'])


Unnamed: 0,source,target,route_short_name,mode,time_distance,time_period,route_id,service_id,trip_id,direction_id,...,place_id_source,place_id_target,geometry,date,day_week,day_type,week_start,week_number,week_count,route_id_count
0,9022014001760012,9022014006242004,50,bus_service,97.0,night_dawn,9011014505000000,5,141010001054458148,0,...,307987564,307987564,"LINESTRING (319300.968 6400118.943, 318713.953...",2022-09-03,Saturday,weekend,2022-08-29,35,15,465390
1,9022014001760012,9022014006242004,50,bus_service,97.0,night_dawn,9011014505000000,5,141010001054458148,0,...,307987564,307987564,"LINESTRING (319300.968 6400118.943, 318713.953...",2022-09-04,Sunday,weekend,2022-08-29,35,15,465390
2,9022014001760012,9022014006242004,50,bus_service,97.0,night_dawn,9011014505000000,5,141010001054458148,0,...,307987564,307987564,"LINESTRING (319300.968 6400118.943, 318713.953...",2022-09-10,Saturday,weekend,2022-09-05,36,15,465390
3,9022014001760012,9022014006242004,50,bus_service,97.0,night_dawn,9011014505000000,5,141010001054458148,0,...,307987564,307987564,"LINESTRING (319300.968 6400118.943, 318713.953...",2022-09-11,Sunday,weekend,2022-09-05,36,15,465390
4,9022014001760012,9022014006242004,50,bus_service,97.0,night_dawn,9011014505000000,5,141010001054458148,0,...,307987564,307987564,"LINESTRING (319300.968 6400118.943, 318713.953...",2022-09-17,Saturday,weekend,2022-09-12,37,15,465390


### Nodes

In [13]:
# Preprocess the data to obtain information on each route passing through each stop
# Step 1: Filter columns from the DataFrame
nodes_pt_0 = nodes_pt_0[['stop_id', 'stop_name', 'place_id', 'osmid', 'geometry']]

# Step 2: Merge nodes_pt_0 with target and source stop_ids
nodes_pt_1 = pd.concat([
    nodes_pt_0.merge(filtered_services_routes [['route_id','target', 'route_short_name','mode', 'direction_id']], left_on='stop_id', right_on='target', how='inner'),
    nodes_pt_0.merge(filtered_services_routes [['route_id','source', 'route_short_name','mode', 'direction_id']], left_on='stop_id', right_on='source', how='inner')
])

# Step 3: Change NaN values to 0
nodes_pt_1['osmid'] = nodes_pt_1['osmid'].fillna(0)

# Step 4: Display the data
nodes_pt_1.head()

Unnamed: 0,stop_id,stop_name,place_id,osmid,geometry,route_id,target,route_short_name,mode,direction_id,source,node_id
0,9022014001004001,Amhult Resecentrum,307987564,290390139.0,POINT (308137.973 6400982.960),9011014521200000,9022014001004001,X1,bus_service,1,,9022014001004001
1,9022014001004001,Amhult Resecentrum,307987564,290390139.0,POINT (308137.973 6400982.960),9011014521200000,9022014001004001,X1,bus_service,1,,9022014001004001
2,9022014001004001,Amhult Resecentrum,307987564,290390139.0,POINT (308137.973 6400982.960),9011014521200000,9022014001004001,X1,bus_service,1,,9022014001004001
3,9022014001004001,Amhult Resecentrum,307987564,290390139.0,POINT (308137.973 6400982.960),9011014521200000,9022014001004001,X1,bus_service,1,,9022014001004001
4,9022014001004001,Amhult Resecentrum,307987564,290390139.0,POINT (308137.973 6400982.960),9011014521200000,9022014001004001,X1,bus_service,1,,9022014001004001


In [68]:
# Step 5: create node_id column
nodes_pt_1 ['node_id'] = nodes_pt_1 ['stop_id'].astype(str) + '_'+ nodes_pt_1 ['route_id'].astype(str)

#Step 6: combine duplicates and transform the Dataframe to a GeoDataFrame
nodes_pt_1 = gpd.GeoDataFrame(nodes_pt_1[['node_id','stop_id', 'route_id', 'route_short_name', 'stop_name',
                                          'place_id', 'mode', 'direction_id','osmid', 'geometry']]).drop_duplicates(['node_id', 'route_id'])

# Step 7: Display the results
nodes_pt_1.head()

Unnamed: 0,node_id,stop_id,route_id,route_short_name,stop_name,place_id,mode,direction_id,osmid,geometry
0,9022014001004001_9011014521200000,9022014001004001,9011014521200000,X1,Amhult Resecentrum,307987564,bus_service,1,290390139.0,POINT (308137.973 6400982.960)
9016,9022014001004003_9011014620400000,9022014001004003,9011014620400000,LILA,Amhult Resecentrum,307987564,bus_service,0,846315401.0,POINT (308179.982 6400938.925)
12400,9022014001004004_9011014521200000,9022014001004004,9011014521200000,X1,Amhult Resecentrum,307987564,bus_service,0,846315401.0,POINT (308152.980 6400960.949)
21243,9022014001004006_9011014503400000,9022014001004006,9011014503400000,34,Amhult Resecentrum,307987564,bus_service,0,868843476.0,POINT (308158.939 6400936.915)
27091,9022014001004007_9011014521100000,9022014001004007,9011014521100000,SVART,Amhult Resecentrum,307987564,bus_service,0,868843476.0,POINT (308164.962 6400912.990)


In [69]:
#Step 7 (optional): export to the DB
nodes_pt_1.to_postgis('nodes_pt_1', engine, schema='pt_1_mdirections_mstops_mroutes', if_exists ='replace')

### Links

In [17]:
# Preprocess the data
# Step1: Group by specified columns and count the links
frequency_by_date_groupby = filtered_services_routes.groupby(
    ['source', 'target', 'route_id', 'date', 'time_period']
).size().reset_index(name='links_count')

# Step 2: Merge the grouped data with the original dataframe
frequency_links = pd.merge(
    filtered_services_routes,
    frequency_by_date_groupby,
    on=['source', 'target', 'route_id', 'date', 'time_period']
)

# Step 3 (optional): Display the temporary results
frequency_links.head()

Unnamed: 0,source,target,route_short_name,mode,time_distance,time_period,route_id,service_id,trip_id,direction_id,...,geometry,date,day_week,day_type,week_start,week_number,week_count,route_id_count,links_count,frequency_date
0,9022014001760012,9022014006242004,50,bus_service,97.0,night_dawn,9011014505000000,5,141010001054458148,0,...,"LINESTRING (319300.968 6400118.943, 318713.953...",2022-09-03,Saturday,weekend,2022-08-29,35,15,465390,8,1350.0
1,9022014001760012,9022014006242004,50,bus_service,97.0,night_dawn,9011014505000000,5,141010001054458148,0,...,"LINESTRING (319300.968 6400118.943, 318713.953...",2022-09-04,Sunday,weekend,2022-08-29,35,15,465390,4,2700.0
2,9022014001760012,9022014006242004,50,bus_service,97.0,night_dawn,9011014505000000,5,141010001054458148,0,...,"LINESTRING (319300.968 6400118.943, 318713.953...",2022-09-10,Saturday,weekend,2022-09-05,36,15,465390,8,1350.0
3,9022014001760012,9022014006242004,50,bus_service,97.0,night_dawn,9011014505000000,5,141010001054458148,0,...,"LINESTRING (319300.968 6400118.943, 318713.953...",2022-09-11,Sunday,weekend,2022-09-05,36,15,465390,4,2700.0
4,9022014001760012,9022014006242004,50,bus_service,97.0,night_dawn,9011014505000000,5,141010001054458148,0,...,"LINESTRING (319300.968 6400118.943, 318713.953...",2022-09-17,Saturday,weekend,2022-09-12,37,15,465390,8,1350.0


In [19]:
# Calculate Frequencies
# Step 4: Calculate the number of links
frequency_links['frequency_date'] = 10800 / frequency_links['links_count']

# Step 5: Calculate mean frequency by day of the week
frequency_links['frequency_dayweek'] = frequency_links.groupby(
    ['source', 'target', 'route_id', 'day_week', 'time_period']
)['frequency_date'].transform('mean')

# Step 6: Calculate mean frequency by day type
frequency_links['frequency_daytype'] = frequency_links.groupby(
    ['source', 'target', 'route_id', 'day_type', 'time_period']
)['frequency_date'].transform('mean')

# Step 7: Filter the first link by day of the week and time period, and rename columns
frequency_links_l1 = frequency_links.groupby(
    ['source', 'target', 'route_id', 'date', 'time_period']
).first().reset_index().drop_duplicates(
    subset=['source', 'target', 'route_id', 'day_week', 'time_period']
).rename(columns={
    'source': 'stop_id_source',
    'target': 'stop_id_target'
})

# Step 8 (optional): Display the temporary results
frequency_links_l1.head()

Unnamed: 0,stop_id_source,stop_id_target,route_id,date,time_period,route_short_name,mode,time_distance,service_id,trip_id,...,day_week,day_type,week_start,week_number,week_count,route_id_count,links_count,frequency_date,frequency_dayweek,frequency_daytype
0,9022014001004001,9022014003960006,9011014502300000,2022-09-01,night_dawn,23,bus_service,85.0,1,141010001141073508,...,Thursday,week_day,2022-08-29,35,15,21960,1,10800.0,10800.0,10800.0
1,9022014001004001,9022014003960006,9011014502300000,2022-09-01,peak_evening,23,bus_service,102.0,1,141010001140608383,...,Thursday,week_day,2022-08-29,35,15,21960,1,10800.0,10800.0,10800.0
2,9022014001004001,9022014003960006,9011014502300000,2022-09-01,peak_morning,23,bus_service,85.0,1,141010001141078619,...,Thursday,week_day,2022-08-29,35,15,21960,1,10800.0,10800.0,10800.0
3,9022014001004001,9022014003960006,9011014502300000,2022-09-02,night_dawn,23,bus_service,85.0,1,141010001141073508,...,Friday,week_day,2022-08-29,35,15,21960,1,10800.0,10800.0,10800.0
4,9022014001004001,9022014003960006,9011014502300000,2022-09-02,peak_evening,23,bus_service,102.0,1,141010001140608383,...,Friday,week_day,2022-08-29,35,15,21960,1,10800.0,10800.0,10800.0


In [20]:
# Clean and organize the data
# Step 9: Add node_id columns
frequency_links_l1['node_id_source'] = frequency_links_l1['stop_id_source']
frequency_links_l1['node_id_target'] = frequency_links_l1['stop_id_target']

# Step 10: Concatenate node_id and route_id to create source and target columns
frequency_links_l1['source'] = frequency_links_l1['node_id_source'].astype(str) + '_' + frequency_links_l1['route_id'].astype(str)
frequency_links_l1['target'] = frequency_links_l1['node_id_target'].astype(str) + '_' + frequency_links_l1['route_id'].astype(str)

# Step 11: Select the desired columns
links_pt_1 = frequency_links_l1[[
    'source', 'target', 'time_distance', 'stop_id_source', 'stop_id_target',
    'node_id_source', 'node_id_target', 'route_id', 'route_short_name',
    'stop_name_source', 'stop_name_target', 'place_id_source', 'place_id_target',
    'mode', 'direction_id', 'stop_sequence', 'time_period', 'day_week', 'day_type',
    'frequency_dayweek', 'frequency_daytype', 'geometry'
]]

# Step 12: Display the first few rows with the results
links_pt_1.head()

Unnamed: 0,source,target,time_distance,stop_id_source,stop_id_target,node_id_source,node_id_target,route_id,route_short_name,stop_name_source,...,place_id_target,mode,direction_id,stop_sequence,time_period,day_week,day_type,frequency_dayweek,frequency_daytype,geometry
0,9022014001004001_9011014502300000,9022014003960006_9011014502300000,85.0,9022014001004001,9022014003960006,9022014001004001,9022014003960006,9011014502300000,23,Amhult Resecentrum,...,307987564,bus_service,0,1,night_dawn,Thursday,week_day,10800.0,10800.0,"LINESTRING (308137.973 6400982.960, 308364.962..."
1,9022014001004001_9011014502300000,9022014003960006_9011014502300000,102.0,9022014001004001,9022014003960006,9022014001004001,9022014003960006,9011014502300000,23,Amhult Resecentrum,...,307987564,bus_service,0,1,peak_evening,Thursday,week_day,10800.0,10800.0,"LINESTRING (308137.973 6400982.960, 308364.962..."
2,9022014001004001_9011014502300000,9022014003960006_9011014502300000,85.0,9022014001004001,9022014003960006,9022014001004001,9022014003960006,9011014502300000,23,Amhult Resecentrum,...,307987564,bus_service,0,1,peak_morning,Thursday,week_day,10800.0,10800.0,"LINESTRING (308137.973 6400982.960, 308364.962..."
3,9022014001004001_9011014502300000,9022014003960006_9011014502300000,85.0,9022014001004001,9022014003960006,9022014001004001,9022014003960006,9011014502300000,23,Amhult Resecentrum,...,307987564,bus_service,0,1,night_dawn,Friday,week_day,10800.0,10800.0,"LINESTRING (308137.973 6400982.960, 308364.962..."
4,9022014001004001_9011014502300000,9022014003960006_9011014502300000,102.0,9022014001004001,9022014003960006,9022014001004001,9022014003960006,9011014502300000,23,Amhult Resecentrum,...,307987564,bus_service,0,1,peak_evening,Friday,week_day,10800.0,10800.0,"LINESTRING (308137.973 6400982.960, 308364.962..."


In [24]:
# Step 13 (optional): Set Geometry and export the table to the DB
links_pt_1 = gpd.GeoDataFrame(links_pt_1, geometry='geometry', crs='3006')
links_pt_1.to_postgis('links_pt_1_long', engine, schema='pt_1_mdirections_mstops_mroutes', if_exists ='replace')

### Transfers


#### Same stop

In [83]:
# Preprocess the Data
# Step 1: Create a copy of the nodes table
nodes_transfer_same_stop = nodes_pt_1[['node_id', 'stop_id', 'place_id', 'geometry']].copy()

#Step 2 (optional): display the results
nodes_transfer_same_stop.head()

Unnamed: 0,node_id,stop_id,place_id,geometry
0,9022014001004001_9011014521200000,9022014001004001,307987564,POINT (308137.973 6400982.960)
9016,9022014001004003_9011014620400000,9022014001004003,307987564,POINT (308179.982 6400938.925)
12400,9022014001004004_9011014521200000,9022014001004004,307987564,POINT (308152.980 6400960.949)
21243,9022014001004006_9011014503400000,9022014001004006,307987564,POINT (308158.939 6400936.915)
27091,9022014001004007_9011014521100000,9022014001004007,307987564,POINT (308164.962 6400912.990)


In [84]:
# Step 3: Merge the stops in order to create the transfers
# Source
stop_routes_source = nodes_transfer_same_stop.rename(columns={'node_id': 'source', 'stop_id': 'target'}).drop_duplicates()

# Target
stop_routes_target = pd.merge(nodes_transfer_same_stop,
                              nodes_pt_1[['node_id']],
                              on='node_id',
                              how='inner').rename(columns={'stop_id': 'source', 'node_id': 'target'}).drop_duplicates()

# Step 4: Create the transfers same stop table
transfers_same_stop = pd.merge(stop_routes_source, stop_routes_target, how='outer')

#Step 5 (optional): Display the results
transfers_same_stop.head()

Unnamed: 0,source,target,place_id,geometry
0,9022014001004001,9022014001004001_9011014502300000,307987564,POINT (308137.973 6400982.960)
1,9022014001004001,9022014001004001_9011014521100000,307987564,POINT (308137.973 6400982.960)
2,9022014001004001,9022014001004001_9011014521200000,307987564,POINT (308137.973 6400982.960)
3,9022014001004001_9011014502300000,9022014001004001,307987564,POINT (308137.973 6400982.960)
4,9022014001004001_9011014521100000,9022014001004001,307987564,POINT (308137.973 6400982.960)


In [85]:
# Create the transfers
# Step 6: Merge the table with the frequency of the links table
transfers_same_stop = pd.merge(
    transfers_same_stop,
    links_pt_1[['target', 'day_week', 'day_type', 'frequency_dayweek', 'frequency_daytype']],
    on='target',
    how='outer'
).drop_duplicates(subset=['source', 'target'])

#Step 7 (optional): Display the results
transfers_same_stop.head()

Unnamed: 0,source,target,place_id,geometry,day_week,day_type,frequency_dayweek,frequency_daytype
0,9022014001004001_9011014502300000,9022014001004001,307987564,POINT (308137.973 6400982.960),,,,
1,9022014001004001_9011014521100000,9022014001004001,307987564,POINT (308137.973 6400982.960),,,,
2,9022014001004001_9011014521200000,9022014001004001,307987564,POINT (308137.973 6400982.960),,,,
3,9022014001004001,9022014001004001_9011014502300000,307987564,POINT (308137.973 6400982.960),,,,
4,9022014001004001,9022014001004001_9011014521100000,307987564,POINT (308137.973 6400982.960),,,,


In [86]:
# Step 5: Replace 0 with NaN and vice versa in frequency columns
freq_columns = [col for col in transfers_same_stop_merge.columns if col.startswith('frequency')]
transfers_same_stop_merge[freq_columns] = transfers_same_stop_merge[freq_columns].replace({0: np.nan, np.nan: 0})

In [87]:
# Step 6: Filter final columns for transfers_same_stop table
transfers_same_stop = transfers_same_stop_merge[[
    'source', 'target', 'day_week', 'day_type', 'frequency_dayweek', 'frequency_daytype', 'geometry'
]]
transfers_same_stop.head()

Unnamed: 0,source,target,day_week,day_type,frequency_dayweek,frequency_daytype,geometry
0,9022014019115003,9022014019115003_9011013061400000,Thursday,week_day,10800.0,10800.0,POINT (327675.964 6361431.902)
355,9022014019610001,9022014019610001_9011013061400000,Thursday,week_day,10800.0,10800.0,POINT (327108.979 6359778.001)
710,9022014019610002,9022014019610002_9011013061400000,Thursday,week_day,10800.0,10800.0,POINT (327161.986 6359701.973)
1065,9022014019611001,9022014019611001_9011013061400000,Thursday,week_day,10800.0,10800.0,POINT (326796.970 6360275.948)
1420,9022014019611002,9022014019611002_9011013061400000,Thursday,week_day,10800.0,10800.0,POINT (326818.972 6360170.911)


In [89]:
# Step 7 (optional): Set Geometry and export the table to the DB
transfers_same_stop = gpd.GeoDataFrame(transfers_same_stop, geometry='geometry', crs='3006')
transfers_same_stop.to_postgis('transfers_same_stop_pt_1_long', engine, schema='pt_1_mdirections_mstops_mroutes', if_exists ='replace')

#### Different stops

In [None]:
#Preprocess the Data
# Step 1: Create a copy of the nodes table
nodes_transfer_diff_stop = nodes_pt_1[['node_id', 'stop_id', 'geometry']].copy()

# Step 2 (optional): Display the results
nodes_transfer_diff_stop.head()

In [None]:
#Create the buffers
# Step 3: Create a buffer column and set it as geometry
nodes_transfer_diff_stop['buffer'] = nodes_transfer_diff_stop['geometry'].buffer(75)
nodes_transfer_diff_stop = nodes_transfer_diff_stop.set_geometry('buffer',
                                                                 crs='EPSG:3006')

# Step 4: found the intersections among the buffers
nodes_transfer_diff_stop = gpd.overlay(nodes_transfer_diff_stop,
                                       nodes_transfer_diff_stop,
                                       how='intersection').rename(columns={'node_id_1':'source',
                                                                           'node_id_2':'target',
                                                                           'stop_id_1':'stop_id_source',
                                                                           'stop_id_2':'stop_id_target',
                                                                           'geometry_1': 'geometry_source',
                                                                           'geometry_2': 'geometry_target'}).dropna(subset=['source'])
# Step 5 (optional): Display the results
nodes_transfer_diff_stop.head()

In [None]:
#Create the geometry
# Step 6: Create linstrings between the intersections
nodes_transfer_diff_stop['geometry'] = nodes_transfer_diff_stop.apply(lambda row: LineString([[row.geometry_source.x,
                                                                                               row.geometry_source.y],
                                                                                                [row.geometry_target.x,
                                                                                                 row.geometry_target.y]]) if row.geometry_target is not None else None,
                                                                      axis=1)
# Step 7: Set the length of the linestring
nodes_transfer_diff_stop['length'] = nodes_transfer_diff_stop['geometry'].length
nodes_transfer_diff_stop['time_distance'] = nodes_transfer_diff_stop['length'] / 1.2

# Step 8 (optional): Display the results
nodes_transfer_diff_stop.head()

In [None]:
#Filter and organize the tables
# Step 9: Filter the transfers between different stops
transfers_diff_stop = nodes_transfer_diff_stop[nodes_transfer_diff_stop['length'] > 0]

# Step 10: Drop the duplicates of symmetric pairs
transfers_diff_stop ['symmetric_pairs'] = transfers_diff_stop[['source',
                                                               'target']].apply(lambda x: '-'.join(sorted(x)),
                                                                                axis=1)
transfers_diff_stop = transfers_diff_stop.drop_duplicates(subset = ['symmetric_pairs'])

# Step 11: Filter the necessary columns
transfers_diff_stop = transfers_diff_stop [['stop_id_source',
                                            'stop_id_target',
                                            'geometry',
                                            'length',
                                            'time_distance']].rename(columns={'stop_id_source':'source',
                                                                              'stop_id_target':'target'}).drop_duplicates()
# Step 12 (optional): Display the results
transfers_diff_stop.head()

In [48]:
# Step 6 (optional): Set Geometry and export the table to the DB
transfers_diff_stop = gpd.GeoDataFrame(transfers_diff_stop, geometry='geometry', crs='3006')
transfers_diff_stop.to_postgis('transfers_diff_stop_pt_1_long', engine, schema='pt_1_mdirections_mstops_mroutes', if_exists ='replace')