In [1]:
import sys
import os
import pandas as pd
import geopandas as gpd

sys.path.append(os.path.abspath('../../scripts/2. modules'))
import processing

# Inputs
PTV_METRO_PATH = '../../data/1. landing/shapefile/metro_station'
PTV_REGIONAL_PATH = '../../data/1. landing/shapefile/regional_station'

# Outputs
PTV_FILE_PATH_OUT = '../../data/2. raw'
PTV_FILE_NAME = 'ptv.csv'

In [2]:
regions_df = processing.get_regions_df(2)
metro_trains = gpd.read_file(PTV_METRO_PATH)
regional_trains = gpd.read_file(PTV_REGIONAL_PATH)

In [6]:
duplicate_stations = set(pd.merge(regional_trains, metro_trains, on="STOP_NAME")['STOP_NAME'])

regional_trains_no_duplicates = regional_trains[~regional_trains['STOP_NAME'].isin(duplicate_stations)]

all_stations = pd.concat([metro_trains, regional_trains_no_duplicates], ignore_index=True)
all_stations = gpd.GeoDataFrame(
    geometry=all_stations['geometry']
)

regions_df_trains = gpd.GeoDataFrame(
    regions_df,
    geometry=regions_df['geometry']
)

if regions_df_trains.crs is None:
    regions_df_trains = regions_df_trains.set_crs('EPSG:4326')
if all_stations.crs is None:
    gdf_points = all_stations.set_crs("EPSG:7844")

if regions_df_trains.crs != all_stations.crs:
    all_stations = all_stations.to_crs(regions_df_trains.crs)

gdf_joined = gpd.sjoin(regions_df_trains, all_stations, how='left', predicate='intersects')

# Group by the index of the geometry GeoDataFrame and count points
point_counts = gdf_joined.groupby('suburbs').size()

# Add the point counts as a new column to the geometry GeoDataFrame
regions_df_trains['trains'] = list(point_counts)
regions_df_trains['trains'] = regions_df_trains['trains'].fillna(0)

regions_df_trains = regions_df_trains[['suburbs', 'trains']]

In [7]:
processing.to_csv(regions_df_trains, PTV_FILE_PATH_OUT, PTV_FILE_NAME)