In [None]:
import requests
from bs4 import BeautifulSoup
import time
import json

BASE_URL = "https://www.alta.ru"
RAILWAY_LIST_URL = f"{BASE_URL}/railway/"

HEADERS = {
    "User-Agent": "Mozilla/5.0"
}

def get_russian_railway_links():
    res = requests.get(RAILWAY_LIST_URL, headers=HEADERS)
    soup = BeautifulSoup(res.text, "html.parser")

    railway_links = []

    russia_div = soup.find("div", class_="h3", string="Россия")
    if not russia_div:
        raise Exception("Couldn't find Россия section.")

    # Grab all <div>s until the next <div class="h3">
    current = russia_div.find_next_sibling()
    while current and (not current.has_attr("class") or "h3" not in current["class"]):
        a_tag = current.find("a", class_="pRailway_item")
        if a_tag and a_tag["href"].startswith("/railway/"):
            full_url = BASE_URL + a_tag["href"]
            railway_links.append(full_url)
        current = current.find_next_sibling()

    return railway_links

def get_station_links(railway_url):
    res = requests.get(railway_url, headers=HEADERS)
    soup = BeautifulSoup(res.text, "html.parser")

    station_links = []
    for a in soup.find_all("a", class_="pRailway_item mFastSearch_key", href=True):
        if a["href"].startswith("/railway/station/"):
            station_links.append(BASE_URL + a["href"])
    return station_links

def get_station_coords(station_url):
    res = requests.get(station_url, headers=HEADERS)
    soup = BeautifulSoup(res.text, "html.parser")

    coords = {}

    # Grab all right-side columns (could be multiple)
    coord_blocks = soup.find_all("div", class_="pRailway_column pRailway_column-right")
    
    for block in coord_blocks:
        divs = block.find_all("div", class_="dib")
        for div in divs:
            label = div.find("strong")
            if label and label.text.strip() == "Широта:":
                parts = div.contents
                # Find text after <br> tag
                for i, item in enumerate(parts):
                    if item.name == "br" and i + 1 < len(parts):
                        coords['latitude'] = parts[i + 1].strip()
            elif label and label.text.strip() == "Долгота:":
                parts = div.contents
                for i, item in enumerate(parts):
                    if item.name == "br" and i + 1 < len(parts):
                        coords['longitude'] = parts[i + 1].strip()

    return coords if 'latitude' in coords and 'longitude' in coords else None


def main():
    output = {}
    railway_links = get_russian_railway_links()
    print(f"Found {len(railway_links)} Russian railways")

    for railway_url in railway_links:
        print(f"\nProcessing railway: {railway_url}")
        station_links = get_station_links(railway_url)
        print(f"  Found {len(station_links)} stations")

        for station_url in station_links:
            station_id = station_url.rstrip("/").split("/")[-1]
            try:
                coords = get_station_coords(station_url)
                if coords:
                    output[station_id] = coords
                    print(f"    {station_id}: {coords}")
                else:
                    print(f"No coords found for {station_url}")
                time.sleep(0.5)
            except Exception as e:
                print(f"    Error fetching {station_url}: {e}")

    with open("station_coords.json", "w", encoding="utf-8") as f:
        json.dump(output, f, ensure_ascii=False, indent=2)

if __name__ == "__main__":
    main()


Exception: Couldn't find Россия section.