# Generate fixtures

How to use it:

We have 3 steps:
1. load gtfs files
2. filter gtfs data
3. save fixtures
4. realtime api sample

1. Load GTFS files

In [3]:
from glob import glob
from typing import Dict, List

import pandas as pd

gtfs_folder = glob(f"../manage_db/fixtures/pontos/")[0]

gtfs = {
    "agency": pd.read_csv(f"{gtfs_folder}/agency.txt"),
    "stop_times": pd.read_csv(f"{gtfs_folder}/stop_times.txt"),
    "stops": pd.read_csv(f"{gtfs_folder}/stops.txt"),
    "trips": pd.read_csv(f"{gtfs_folder}/trips.txt"),
    "frequencies": pd.read_csv(f"{gtfs_folder}/frequencies.txt"),
    "routes": pd.read_csv(f"{gtfs_folder}/routes.txt"),
    "shapes": pd.read_csv(f"{gtfs_folder}/shapes.txt"),
    "calendar": pd.read_csv(f"{gtfs_folder}/calendar.txt"),
    "calendar_dates": pd.read_csv(f"{gtfs_folder}/calendar_dates.txt"),
}

gtfs_to_save: Dict[str, pd.DataFrame] = {}

2. Filter GTFS data

GTFS diagram for reference:
![GTFS diagram - research gate](https://www.researchgate.net/profile/Milos-Jovanovik/publication/263853949/figure/fig1/AS:296062309945345@1447598163690/The-GTFS-Schema-for-the-data-from-JSP-Skopje.png)
Source: [Open Public Transport Data in Macedonia - Research Gate](https://www.researchgate.net/figure/The-GTFS-Schema-for-the-data-from-JSP-Skopje_fig1_263853949)

In [180]:
# copy to filter
from datetime import datetime
import os

import requests


agency = gtfs['agency'].copy()
st = gtfs['stop_times'].copy()
stops = gtfs['stops'].copy()
trips = gtfs['trips'].copy()
frequencies = gtfs['frequencies'].copy()
routes = gtfs['routes'].copy()
shapes = gtfs['shapes'].copy()
calendar = gtfs['calendar'].copy()
cd = gtfs['calendar_dates'].copy()


# filter utils

def get_stop_and_its_platforms(stops: pd.DataFrame, codes: List[str], limit: int = -1):
    s = stops.copy()
    stop_code = s['stop_code'].isin(codes)
    response = s[(stop_code) | (
        s['parent_station'].isin(s[stop_code]['stop_id']))]
    if limit >= 0:
        response = response.head(limit)
    response = response.sort_values(by='stop_code')
    return response


def get_realtime_inputs():
    url = os.environ.get(
        "API_REALTIME", "https://dados.mobilidade.rio/gps/brt")
    _response = requests.get(url, timeout=5)
    data: List[dict] = _response.json()["veiculos"]
    response = []
    keys = []
    for i in data:
        if i['linha'] not in keys:
            response += [{
                'trip_short_name': i['linha'],
                'direction_id': 1 if i['sentido'] == "volta" else 0,
            }]
            keys += [i['linha']]
    return response


def filter_stops():
    global stops, trips, st
    # inputs = get_realtime_inputs()
    # tsn = [i['trip_short_name'] for i in inputs]
    # trips = trips[trips['trip_short_name'].isin(tsn)]
    # st = st[st['trip_id'].isin(trips['trip_id'])]
    # stops = stops[stops['stop_id'].isin(st['stop_id'])]
    # codes = stops[stops['stop_code'].notnull()]['stop_code'].to_list()[:1]
    # print('codes',codes)
    stops = get_stop_and_its_platforms(stops, ['TOTAL'], 2)


def filter_fk(limit: int = -1):
    """keep only rows with valid fk"""
    global st, stops, trips, frequencies, routes, shapes, calendar, cd

    if limit >= 0:
        routes = routes.head(limit)
        stops = stops.head(limit)
        calendar = calendar.head(limit)

    shapes_dup = gtfs['shapes'][gtfs['shapes'].duplicated(
        subset=['shape_id'], keep=False)]
    shapes = shapes_dup

    trips = trips[
        (trips['route_id'].isin(routes['route_id']))
        & (trips['service_id'].isin(calendar['service_id']))
        & (trips['shape_id'].isin(shapes['shape_id']))
    ]
    if limit >= 0:
        trips = trips.head(limit)

    st = st[
        (st['stop_id'].isin(stops['stop_id']))
        & (st['trip_id'].isin(trips['trip_id']))
    ]
    trips = trips[
        (trips['route_id'].isin(routes['route_id']))
        & (trips['service_id'].isin(calendar['service_id']))
        & (trips['shape_id'].isin(shapes['shape_id']))
    ]

    frequencies = frequencies[frequencies['trip_id'].isin(trips['trip_id'])]

    if limit >= 0:
        limit_results(limit)


def limit_results(max: int):
    global st, stops, trips, frequencies, routes, shapes, calendar, cd
    st = st.head(max)
    stops = stops.head(max)
    trips = trips.head(max)
    frequencies = frequencies.head(max)
    routes = routes.head(max)
    shapes = shapes.head(max)
    calendar = calendar.head(max)
    cd = cd.head(max)


def patch_col(dest: pd.DataFrame, src: pd.DataFrame, col: str):
    i = 0
    src = src.copy().reset_index(drop=True)
    dest = dest.copy().reset_index(drop=True)
    for _, _ in dest.iterrows():
        dest.at[i, col] = src.at[i % len(src), col]
        i += 1
    return dest


def patch_rows():
    """Modify values to make a suitable data mock"""
    global st, stops, trips, frequencies, routes, shapes, calendar, cd
    # fk
    st = patch_col(st, trips, 'trip_id')
    trips = patch_col(trips, routes, 'route_id')
    trips = patch_col(trips, calendar, 'service_id')
    trips = patch_col(trips, shapes, 'shape_id')
    frequencies = patch_col(frequencies, trips, 'trip_id')
    # trip_short_name
    tsn = ['0', '10', '11', '12', '13', '14', '17', '18', '19', '20', '22', '25',
           '29', '31', '35', '38', '40', '41', '42', '43', '46', '50', '51', '52', '53']
    for i, _ in trips.iterrows():
        trips.at[i, 'trip_short_name'] = tsn[i % len(tsn)]
    st = st.replace(0, 1)


# run filters
filter_stops()
filter_fk()

max = 10
st = st.head(max)
stops = stops.head(max)
trips = trips.head(max)
frequencies = frequencies.head(max)
routes = routes.head(max)
shapes = shapes.head(max)
calendar = calendar.head(max)
cd = cd.head(max)

patch_rows()

# save
gtfs_to_save = {
    'agency': {'table': agency, 'pk': "agency_id", 'model': "pontos.agency"},
    'calendar_dates': {'table': cd, 'pk': None, 'model': "pontos.calendardates"},
    'calendar': {'table': calendar, 'pk': "service_id", 'model': "pontos.calendar"},
    'frequencies': {'table': frequencies, 'pk': None, 'model': "pontos.frequencies"},
    'routes': {'table': routes, 'pk': "route_id", 'model': "pontos.routes"},
    'shapes': {'table': shapes, 'pk': None, 'model': "pontos.shapes"},
    'stop_times': {'table': st, 'pk': None, 'model': "pontos.stoptimes"},
    'stops': {'table': stops, 'pk': "stop_id", 'model': "pontos.stops"},
    'trips': {'table': trips, 'pk': "trip_id", 'model': "pontos.trips"},
}

# log
print("result:")
display({k: len(v['table']) for k, v in gtfs_to_save.items()})

result:


{'agency': 5,
 'calendar_dates': 10,
 'calendar': 10,
 'frequencies': 10,
 'routes': 10,
 'shapes': 10,
 'stop_times': 6,
 'stops': 2,
 'trips': 10}

3. Save as fixtures

In [181]:
import json
from os import path
from datetime import datetime as dt

import numpy as np

fixtures_folder = glob(
    f"../../../mobilidade_rio/mobilidade_rio/predictor/tests/fixtures")[0]

generate_start = dt.now()
for name, info in gtfs_to_save.items():
    fixtures: List[dict] = []
    table: pd.DataFrame = info['table']

    now = dt.now()
    print(f"Saving {name} ({len(table)}) ...", end='')

    # treat table
    table = table.replace({np.nan: None})
    date_cols = [i for i in table.columns.to_list(
    ) if i in ['date', 'start_date', 'end_date']]
    for col in date_cols:
        table[col] = table[col].astype(
            str).apply(lambda n: f"{n[:4]}-{n[4:6]}-{n[6:]}")

    # generate
    for index, row in table.iterrows():
        info_pk = info['pk']
        if info_pk is not None:
            pk = str(row[info_pk])
        else:
            pk = str(index + 1)
        treated_row = row.replace({np.nan: None})
        fixture = {
            'model': info['model'],
            'pk': pk,
            'fields': treated_row.to_dict(),
        }
        fixtures.append(fixture)

    # save
    save_filepath = path.join(fixtures_folder, f"{name}.json")
    with open(save_filepath, 'w') as json_file:
        json_file.write(json.dumps(fixtures, indent=4))
        print(f" \t{dt.now() - now}s")

print(f"\nDone ({dt.now()}) \t{dt.now() - generate_start}s")

Saving agency (5) ... 	0:00:00.003502s
Saving calendar_dates (10) ... 	0:00:00.003498s
Saving calendar (10) ... 	0:00:00.005023s
Saving frequencies (10) ... 	0:00:00.003003s
Saving routes (10) ... 	0:00:00.005038s
Saving shapes (10) ... 	0:00:00.003489s
Saving stop_times (6) ... 	0:00:00.003529s
Saving stops (2) ... 	0:00:00.002002s
Saving trips (10) ... 	0:00:00.003506s

Done (2024-01-13 03:32:55.206184) 	0:00:00.036614s


4. Realtime api sample

Get static sample of realtime api data

In [197]:
def get_realtime():
    url = os.environ.get(
        "API_REALTIME", "https://dados.mobilidade.rio/gps/brt")
    _response = requests.get(url, timeout=5)
    data: dict = _response.json()
    return data

data = get_realtime()
data['veiculos'] = data['veiculos'][:int(len(data['veiculos'])/2)]


# save
data_folder = glob(
    f"../../../mobilidade_rio/mobilidade_rio/predictor/tests/data")[0]
save_filepath = path.join(data_folder, f"api_realtime.json")
with open(save_filepath, 'w') as json_file:
    json_file.write(json.dumps(data))
    print(
        f"api_realtime.json ({len(data['veiculos'])} items) saved! \t{dt.now() - now}s")

api_realtime.json (249 items) saved! 	18:43:16.024812s
