In [None]:
import matplotlib.animation as animation
from matplotlib.animation import PillowWriter, FuncAnimation
import osmnx as ox
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import sqlite3
import warnings
from shapely.geometry import Point, LineString
from loguru import logger
from IPython.display import Image
from matplotlib.collections import LineCollection
from scipy.interpolate import splprep, splev

warnings.filterwarnings("ignore", category=UserWarning, module="osmnx")
pd.set_option("display.float_format", "{:.6f}".format)

In [None]:
# Stops data is taken direcly from ZTM website and it"s processed inside "get_stops_data.py"
# It has no use for now, because I decited to use different routes source

stops_data = pd.read_csv(f"{data_parent_directory}/stops.csv", dtype={
    "stop_ordinal_number": str,
    "street_id": str,
    "type_id": str,
    "stop_id": str,
})

stops_data = gpd.GeoDataFrame(stops_data, geometry=gpd.points_from_xy(stops_data["lon"], stops_data["lat"]))
stops_data = stops_data[["line", "stop_ordinal_number", "complex_id", "type_name", "geometry"]]
stops_data = stops_data.drop_duplicates()
stops_data = stops_data.set_crs(epsg=4326)
stops_data = stops_data.to_crs(epsg=2180)

stops_data

In [None]:
# This data is taken from the "https://mkuran.pl/gtfs/" website and it links information about line (route_id)
# brigade and shape_id

trips = pd.read_csv(f"{data_parent_directory}/routes_detailed/trips.txt", dtype={"brigade": str})
trips = trips[["route_id", "shape_id", "brigade"]]
trips = trips.drop_duplicates()
trips

In [None]:
# This data is taken from the "https://mkuran.pl/gtfs/" website and it contains sequences of the route of any shape_id
# shape_id is taken from the previos table

shapes = pd.read_csv(f"{data_parent_directory}/routes_detailed/shapes.txt")
shapes = shapes[shapes["shape_id"].isin(trips["shape_id"].unique())]
shapes

In [None]:
# Here I take points and make linestring out of them. Next I merge it with the information about line and brigade
# also here I change EPSG to 2180

def make_linestring(df):
    df = df.sort_values("shape_pt_sequence")
    points = list(zip(df["shape_pt_lon"], df["shape_pt_lat"]))
    return LineString(points)

lines = shapes.groupby("shape_id").apply(make_linestring).reset_index(name="geometry")

routes_detailed = gpd.GeoDataFrame(lines, geometry="geometry", crs="EPSG:4326")

routes_detailed = routes_detailed.merge(
    trips,
    on=["shape_id"],
    how="left",
    validate="one_to_many",
)

routes_detailed = routes_detailed.to_crs(epsg="2180")
routes_detailed = routes_detailed.rename(columns={"route_id": "line"})

mask_duplicates = routes_detailed[["line", "brigade"]].duplicated()
print_removed(routes_detailed, mask_duplicates)

routes_detailed = routes_detailed[~mask_duplicates]

In [None]:
# Here I merge routes details with gps data:
# - merge by "line" and "brigade" first
# - if there are some data missing I merge data again, but on "line" only - it"s better to have a route than not
#   and there is a high probability that it will match

# NOT USED FOR NOW

gps_data_routes = gps_data_processed.copy()
gps_data_routes = gps_data_routes.merge(
    routes_detailed.rename(columns={"geometry": "route_shape"}),
    on=["line", "brigade"],
    how="left",
    validate="many_to_one",
)

gps_data_routes = gps_data_routes.merge(
    routes_detailed.drop(columns=["brigade"]).rename(columns={"geometry": "route_shape"}).drop_duplicates(subset=["line"]),
    on=["line"],
    how="left",
    validate="many_to_one",
    suffixes=("", "_missing")
)

missing_from_first_merge = gps_data_routes["shape_id"].isna() & gps_data_routes["shape_id_missing"].notnull()
logger.info(
    f"There are {missing_from_first_merge.sum()} rows that have missing routes after first merege. "
    f"Here are some details: {gps_data_routes.loc[missing_from_first_merge, ["line", "brigade"]].drop_duplicates()}"
    )

for rep_col in ["shape_id", "route_shape"]:
    gps_data_routes[rep_col] = gps_data_routes[rep_col].fillna(gps_data_routes[f"{rep_col}_missing"])
    gps_data_routes = gps_data_routes.drop(columns=[f"{rep_col}_missing"])
    
gps_data_routes