# Bus Data Exploration Notebook

This notebook helps you load, parse, and explore the bus data files. Adjust and experiment as needed!

In [1]:
import json
from pathlib import Path
import pandas as pd

def load_bus_file(path):
    with open(path, encoding='utf-8') as f:
        data = json.load(f)
    return data

def parse_posiciones(pos_str):
    fields = pos_str.strip(';').split(';')
    # Try to auto-detect record length
    for l in range(11, 25):
        if len(fields) % l == 0:
            record_length = l
            break
    else:
        record_length = 11  # fallback
    records = [fields[i:i+record_length] for i in range(0, len(fields), record_length)]
    return records, record_length


In [3]:
# Load your data file
bus_json_path = 'raw_data/bus/2024-11-26/13_54.json'  # Update path as needed
data = load_bus_file(bus_json_path)
print(f'Loaded {len(data["posiciones"])} position strings')

Loaded 12008 position strings


In [4]:
# Preview the first position string
first_pos = data['posiciones'][0]
records, rec_len = parse_posiciones(first_pos)
print(f'Record length detected: {rec_len}')
print(f'Total records: {len(records)}')
print('First record:')
for i, val in enumerate(records[0]):
    print(f'  Field {i+1}: {val}')

Record length detected: 12
Total records: 4
First record:
  Field 1: 20-12-2023 15:02:26
  Field 2: BJFC-73
  Field 3: -33.41663938
  Field 4: -70.6912058
  Field 5: 0.0
  Field 6: 7.0
  Field 7: 5.0
  Field 8: T5TS
  Field 9: I
  Field 10: T5TS 00I
  Field 11: T5TS 00I
  Field 12: 20-12-2023 15:02:31


In [None]:
# Preview the first position string
first_pos = data['posiciones'][0]
records, rec_len = parse_posiciones(first_pos)
print(f'Record length detected: {rec_len}')
print(f'Total records: {len(records)}')
print('First record:')
for i, val in enumerate(records[0]):
    print(f'  Field {i+1}: {val}')

## Next Steps
- Assign column names based on your understanding.
- Explore statistics, plot routes, or filter by bus ID or time.
- Load and concatenate records from multiple position strings if needed.

Feel free to add more cells to experiment!

In [None]:
import json
from pathlib import Path

def load_bus_file(path):
    with open(path, encoding='utf-8') as f:
        data = json.load(f)
    return data

def parse_posiciones(pos_str):
    fields = pos_str.strip(';').split(';')
    for l in range(11, 25):
        if len(fields) % l == 0:
            record_length = l
            break
    else:
        record_length = 11  # fallback
    records = [fields[i:i+record_length] for i in range(0, len(fields), record_length)]
    return records, record_length

# Set the bus_id you want to track (the value from the second field)
bus_id = 'WB-1031'

# List of files to check
file_paths = [
    'raw_data/bus/2024-11-26/13_54.json',
    'raw_data/bus/2024-11-26/13_56.json',
    'raw_data/bus/2024-11-26/13_57.json',
    'raw_data/bus/2024-11-26/13_58.json'
]

for path in file_paths:
    data = load_bus_file(path)
    found = False
    for pos_idx, pos_str in enumerate(data['posiciones']):
        records, rec_len = parse_posiciones(pos_str)
        for rec_idx, record in enumerate(records):
            if len(record) > 1 and record[1] == bus_id:
                print(f"Bus {bus_id} found in {Path(path).name}: posiciones[{pos_idx}], record[{rec_idx}]")
                found = True
    if not found:
        print(f"Bus {bus_id} NOT found in {Path(path).name}")

Bus WB-1031 found in 13_54.json: posiciones[1201], record[0]
Bus WB-1031 found in 13_54.json: posiciones[1201], record[1]
Bus WB-1031 found in 13_54.json: posiciones[1201], record[2]
Bus WB-1031 found in 13_54.json: posiciones[1201], record[3]
Bus WB-1031 found in 13_56.json: posiciones[1201], record[0]
Bus WB-1031 found in 13_56.json: posiciones[1201], record[1]
Bus WB-1031 found in 13_56.json: posiciones[1201], record[2]
Bus WB-1031 found in 13_56.json: posiciones[1201], record[3]
Bus WB-1031 found in 13_57.json: posiciones[1201], record[0]
Bus WB-1031 found in 13_57.json: posiciones[1201], record[1]
Bus WB-1031 found in 13_57.json: posiciones[1201], record[2]
Bus WB-1031 found in 13_57.json: posiciones[1201], record[3]
Bus WB-1031 found in 13_58.json: posiciones[1201], record[0]
Bus WB-1031 found in 13_58.json: posiciones[1201], record[1]
Bus WB-1031 found in 13_58.json: posiciones[1201], record[2]
Bus WB-1031 found in 13_58.json: posiciones[1201], record[3]


In [9]:
import json
from datetime import datetime, timedelta

def load_bus_file(path):
    with open(path, encoding='utf-8') as f:
        data = json.load(f)
    return data

def parse_posiciones(pos_str):
    fields = pos_str.strip(';').split(';')
    for l in range(11, 25):
        if len(fields) % l == 0:
            record_length = l
            break
    else:
        record_length = 11  # fallback
    records = [fields[i:i+record_length] for i in range(0, len(fields), record_length)]
    return records, record_length

def parse_datetime(dt_str):
    # Handles both 'YYYYMMDDHHMMSS' and 'DD-MM-YYYY HH:MM:SS'
    try:
        return datetime.strptime(dt_str, "%Y%m%d%H%M%S")
    except ValueError:
        return datetime.strptime(dt_str, "%d-%m-%Y %H:%M:%S")

# Path to your JSON file
bus_json_path = 'raw_data/bus/2024-11-26/13_54.json'

data = load_bus_file(bus_json_path)
request_time = parse_datetime(data['fecha_consulta'])

active_buses = set()

for pos_str in data['posiciones']:
    records, rec_len = parse_posiciones(pos_str)
    if records and len(records[0]) > 1:
        # The first record's first field is the latest timestamp for this bus
        latest_time_str = records[0][0]
        bus_id = records[0][1]
        latest_time = parse_datetime(latest_time_str)
        if request_time - latest_time <= timedelta(minutes=2):
            active_buses.add(bus_id)

print(f"Active buses (last 2 minutes): {active_buses}")
print(f"Number of active buses: {len(active_buses)}")

Active buses (last 2 minutes): {'FLXZ-47', 'PGBZ-14', 'BJFW-35', 'FLXS-69', 'SJTD-56', 'GCBB-31', 'THTG-65', 'SJTD-64', 'SPZX-69', 'PFVK-90', 'FLXG-34', 'FLXY-96', 'BJFC-37', 'TBFX-11', 'PFXB-83', 'THTG-96', 'FLXV-65', 'LZPT-77', 'BJFX-44', 'GCBD-77', 'SRVJ-82', 'FLXL-20', 'FLXG-31', 'CJRL-18', 'SKPP-24', 'FLXD-26', 'FLXL-94', 'SPBL-85', 'SVDD-90', 'BJFS-67', 'LZPS-15', 'FLXP-94', 'SDSW-79', 'PFXC-69', 'SLPC-43', 'FLXX-70', 'GCBD-62', 'SFPF-85', 'FLXT-55', 'XM21', 'SPRW-13', 'FLXL-46', 'BJFF-61', 'FDJX-84', 'PFBG-38', 'PFZK-80', 'SHXD-81', 'CJRY-73', 'FLXY-33', 'PGBY-65', 'BJFP-36', 'FLXZ-48', 'STHR-46', 'BJFX-10', 'FLXC-47', 'LCTG-31', 'BJFB-15', 'LCPW-44', 'FLXV-47', 'FLXF-20', 'SSXF-31', 'PFXC-43', 'TBFX-60', 'PGRZ-85', 'PFYS-22', 'SFPG-68', 'BJFZ-52', 'TBFV-24', 'FLXR-53', 'TBFP-98', 'FLXR-41', 'CJRX-45', 'CJRP-20', 'FLXW-65', 'FLXP-89', 'SFPG-30', 'LWTK-64', 'SVDC-92', 'SHCY-30', 'CJRX-42', 'TBFR-19', 'SRRY-53', 'LFGW-21', 'BJFX-59', 'SHXD-73', 'SVDD-50', 'BJFC-35', 'BJFD-73', 'TB