# Vitrasa Bus Line Data Extractor

This Python script fetches essential bus line data from the Vitrasa website. It extracts a hidden JSON dataset containing **bus line IDs, names, and colors**, then organizes this information into a Python dictionary for easy access by each **`idBusLine`**.

In [18]:
import requests
from bs4 import BeautifulSoup
import json
import re

url = "https://www.vitrasa.es/lineas-y-horarios/todas-las-lineas"

# --- Step 1: Fetch the HTML content ---
try:
    response = requests.get(url, timeout=10)
    response.raise_for_status()
    html_content = response.text
except requests.exceptions.RequestException as e:
    print(f"Error fetching HTML: {e}")
    exit()

# --- Step 2: Parse the HTML ---
soup = BeautifulSoup(html_content, 'html.parser')

# --- Step 3: Find the script tag containing 'jsonLineas' and extract the JSON string ---
json_data_string = None
script_tags = soup.find_all('script')

for script in script_tags:
    if script.string and 'const jsonLineas =' in script.string:
        match = re.search(r'const jsonLineas = (\[.*?\]);', script.string, re.DOTALL)
        if match:
            json_data_string = match.group(1)
            break

# --- Step 4: Convert the extracted string to a Python object ---
if json_data_string:
    try:
        lineas_data = json.loads(json_data_string)
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON from script content: {e}")
else:
    print("Could not find the 'jsonLineas' data in any script tag.")
    exit()

# --- Step 5: Format the data into a Python dictionary using 'idBusLine' as key ---
bus_lines_dict = {}
if lineas_data:
    for line in lineas_data:
        if "idBusLine" in line:
            bus_lines_dict[line["idBusLine"]] = line
            del bus_lines_dict[line["idBusLine"]]["idBusLine"] 
    print(json.dumps(bus_lines_dict, indent=4, ensure_ascii=False))
else:
    print("No bus line data available to format into a dictionary.")
    exit()

{
    "1": {
        "idBusSAE": "C1",
        "descBusLine": "CIRCULAR CENTRO",
        "color": "#ED4713"
    },
    "3001": {
        "idBusSAE": "C3d",
        "descBusLine": "BOUZAS/COIA – ENCARNACIÓN (P.E.FADRIQUE)",
        "color": "#FFCC00"
    },
    "3002": {
        "idBusSAE": "C3i",
        "descBusLine": "BOUZAS/COIA – ENCARNACIÓN (PZA. ESPAÑA)",
        "color": "#FFCC00"
    },
    "4001": {
        "idBusSAE": "4A",
        "descBusLine": "ARAGÓN - COIA",
        "color": "#009900"
    },
    "4003": {
        "idBusSAE": "4C",
        "descBusLine": "G. ESPINO - COIA",
        "color": "#009900"
    },
    "5001": {
        "idBusSAE": "5A",
        "descBusLine": "NAVIA-TRV. DE VIGO ",
        "color": "#00B0F0"
    },
    "5004": {
        "idBusSAE": "5B",
        "descBusLine": "NAVIA-S. BADÍA ",
        "color": "#00B0F0"
    },
    "6": {
        "idBusSAE": "6",
        "descBusLine": "HOSP. ALVARO CUNQUEIRO - BEADE – PZA. ESPAÑA",
        "color": "#CC3399"
 

# Vitrasa Bus Line Data Request & Filtering
This Python script sends a *POST request* to the Vitrasa website to retrieve bus line data. It processes the received JSON and then *pretty-prints* the cleaned information. The script also includes robust error handling for network or JSON parsing issues.

## Data Request
This part of the script constructs and sends a *POST request* to the Vitrasa web service, including specific parameters and a `idBusLine`. It then verifies the success of the HTTP request.

In [16]:
import requests

base_url = "https://www.vitrasa.es/lineas-y-horarios/todas-las-lineas"

params = {
    "p_p_id": "mx_com_ado_all_lines_web_AllLinesPortlet_INSTANCE_zDp1QpJtQsKA",
    "p_p_lifecycle": "2",
    "p_p_state": "normal",
    "p_p_mode": "view",
    "p_p_cacheability": "cacheLevelPage"
}

data = {"idBusLine": "104"}

response = requests.post(base_url, params=params, data=data)
response.raise_for_status()

## JSON Processing & Filtering
This section defines a helper function to *recursively remove `geometry` fields* for cleaner output. It then parses the server's response into JSON, applies this filtering, and *pretty-prints the resulting, human-readable data*. Robust error handling is included for invalid JSON responses.

In [17]:
import json

def remove_geometry_field(obj):
    """
    Recursively removes all keys named 'geometry' from dictionaries within
    a JSON-like Python object (dict or list).
    Returns a new object with 'geometry' fields removed.
    """
    if isinstance(obj, dict):
        new_dict = {}
        for key, value in obj.items():
            if key == "geometry":
                continue
            new_dict[key] = remove_geometry_field(value)
        return new_dict
    elif isinstance(obj, list):
        return [remove_geometry_field(elem) for elem in obj]
    else:
        return obj

try:
    raw_json_content = response.json()
    filtered_json_content = remove_geometry_field(raw_json_content)
    print(json.dumps(filtered_json_content, indent=4, ensure_ascii=False))
except json.JSONDecodeError:
    print("The server did not return valid JSON.")

{
    "idBusLine": "C3i",
    "idBusSAE": "C3i",
    "descBusLine": "BOUZAS/COIA – ENCARNACIÓN (PZA. ESPAÑA)",
    "color": "#FFCC00",
    "geographic_data_structure": {
        "initial_map_coordinates": [
            "-7.855431773723",
            "42.328299075595"
        ]
    },
    "scale": "13",
    "distance": "10",
    "outTrip": {
        "type": "FeatureCollection",
        "features": [
            {
                "type": "Feature",
                "properties": {
                    "idBusLine": "C3i",
                    "color": "#FFCC00",
                    "idBusStop": "3002"
                },
                "style": {
                    "fill": "#da291d",
                    "strokeWidth": "3",
                    "fillOpacity": "0.3"
                }
            },
            {
                "type": "Feature",
                "properties": {
                    "idBusLine": "C3i",
                    "color": "#FFCC00",
                    "idBusStop": "142

# Vitrasa Bus Stop ETA Retrieval & Processing
This Python script efficiently retrieves estimated arrival times (ETAs) for a specified Vitrasa bus stop by sending a *POST request* to the API. It then expertly *processes the unique nested JSON response*, extracting the core data and presenting it in a clean, readable format, with built-in error handling for seamless operation.

## Requesting Live Stop Data
This section of the script *sends a POST request* to the Vitrasa API. It targets a specific bus stop to retrieve its estimated times of arrival (ETAs). The request includes various Liferay portlet parameters and a command to `getETAS`, ensuring the correct data is requested.

In [21]:
import requests

base_url = "https://www.vitrasa.es/detalleparada"

params = {
    "p_p_id": "com_ado_portlet_parada_AdoParadaPortlet_INSTANCE_e3K3ns9GxruP",
    "p_p_lifecycle": "2",
    "p_p_state": "normal",
    "p_p_mode": "view",
    "p_p_cacheability": "cacheLevelPage",
    "_com_ado_portlet_parada_AdoParadaPortlet_INSTANCE_e3K3ns9GxruP_cmd": "getETAS"
}

data = {"_com_ado_portlet_parada_AdoParadaPortlet_INSTANCE_e3K3ns9GxruP_busStopID": "14901"}

response = requests.post(base_url, params=params, data=data)
response.raise_for_status()

## Processing Nested JSON Response
This part handles the *parsing and cleaning of the received JSON data*. It defines a function specifically designed to correctly interpret the API's unusual response format, where the main data is a JSON string nested within another the field `jsontraffics2`. The parsed, readable data is then *pretty-printed* to the console, with robust error handling for any JSON decoding issues.

In [22]:
import json

def parse_nested_vitrasa_json(json_response: dict) -> list | None:
    """
    Parses a JSON response from Vitrasa that contains a nested JSON string
    under the 'jsontraffics2' key.
    """
    json_string_data = json_response.get("jsontraffics2")

    if json_string_data:
        try:
            normal_dict_list = json.loads(json_string_data)
            return normal_dict_list
        except json.JSONDecodeError as e:
            print(f"Error decoding the nested JSON string from 'jsontraffics2': {e}")
            return None
    else:
        print("'jsontraffics2' key not found in the provided JSON response.")
        return None

try:
    raw_json_content = response.json()
    filtered_json_content = parse_nested_vitrasa_json(raw_json_content)
    print(json.dumps(filtered_json_content, indent=4, ensure_ascii=False))
except json.JSONDecodeError:
    print("The server did not return valid JSON.")

[
    {
        "idBusLine": "3002",
        "idBusSAE": "C3i",
        "desBusLine": "BOUZAS/COIA – ENCARNACIÓN (PZA. ESPAÑA)",
        "desLocalCompany": "VITRASA",
        "desDepartureBusStop": "Rúa do Porriño, 9",
        "desArrivalBusStop": "Avda. de Vigo, 6",
        "arrivalTime": "",
        "minutesArrive": 1
    },
    {
        "idBusLine": "1501",
        "idBusSAE": "15A",
        "desBusLine": "CABRAL - SAMIL",
        "desLocalCompany": "VITRASA",
        "desDepartureBusStop": "Avda. de Samil (frente Verbum)",
        "desArrivalBusStop": "Avda. da Ponte, 31",
        "arrivalTime": "",
        "minutesArrive": 4
    },
    {
        "idBusLine": "3002",
        "idBusSAE": "C3i",
        "desBusLine": "BOUZAS/COIA – ENCARNACIÓN (PZA. ESPAÑA)",
        "desLocalCompany": "VITRASA",
        "desDepartureBusStop": "Rúa do Porriño, 9",
        "desArrivalBusStop": "Avda. de Vigo, 6",
        "arrivalTime": "",
        "minutesArrive": 17
    },
    {
        "idBusLine":