In [None]:
import pywikibot
from pywikibot import pagegenerators
from tqdm import tqdm
import re
import requests

lang_wiki = pywikibot.Site(code='en', fam='hitchwiki')
if not lang_wiki.user():
    lang_wiki.login()

In [None]:
places = {}

In [None]:
pages = list(pagegenerators.AllpagesPageGenerator(site=lang_wiki))

In [None]:
for page in tqdm(pages):
    try:
        if any(s in page.text for s in ["{{Infobox"]):
            places[page.title()] = {"text": page.text}
    except Exception as e:
        print(f"Error processing page: {e}")
        continue

In [None]:
len(places)

In [None]:
places

In [None]:
for place, items in tqdm(places.items()):
    try:
        items["urls"] = []
        if items["text"]:
            matches = re.findall(r'http[s]?://[^\s\]\)]+', items["text"])
            urls = [url for url in matches if any(x in url for x in ['map', 'lat', 'lon', 'osm', 'wego'])]
            items["urls"] = urls
    except Exception as e:
        print(f"Error processing page {place}: {e}")
        continue


In [None]:
for place, items in tqdm(places.items()):
    try:
        coords = []
        items["coords"] = coords
        if "urls" in items and len(items["urls"]) > 0:
            for url in items["urls"]:
                response = requests.head(url, allow_redirects=True, timeout=5)
                if response.status_code < 400:

                    # kick out collections of points
                    if any([x in response.url for x in ["viewer", "directions", "route", "path"]]):
                        continue
                    
                    
                    lat_lon_pattern = r"(-?\d+\.\d{4,})(?:[,/]|%2C){1}(-?\d+\.\d{4,})"
                    matches = re.findall(lat_lon_pattern, response.url)
                    if len(matches) == 0:
                        # print(f"No coordinates found in {url}")
                        continue
                    if len(matches) > 1:
                        # print(
                        #     f"Multiple coordinates found in {url} likely a path: {matches}"
                        # )
                        continue
                    lat, lon = matches[0]
                    coords.append((url, response.url, (lat, lon)))
            print(f"{len(coords)}/{len(items['urls'])} urls found")
            # print(place, json.dumps(items, indent=2))
        items["coords"] = coords
    except Exception as e:
        print(f"Error processing page {place}: {e}")
        continue

In [None]:
for place, items in tqdm(places.items()):
    try:
        items.pop("new_text", None)
        if "coords" in items and items["coords"]:
            for url, real_url, (lat, lon) in items["coords"]:
                text = items["new_text"] if "new_text" in items else items["text"]
                if url in text:
                    escaped_url = re.escape(url)
                    pattern = rf'\[{escaped_url}\s+([^\]]+)\]'
                    match = re.search(pattern, items["text"])
                    if match:
                        link_text = match.group(1)
                        new_text = text.replace(
                            f"[{url} {link_text}]",
                            f"''{link_text}'' {{{{Coords|{lat}|{lon}|1}}}}"
                        )
                    else:
                        new_text = text.replace(
                            url,
                            f"{{{{Coords|{lat}|{lon}}}}}"
                        )
                    items["new_text"] = new_text
    except Exception as e:
        print(f"Error updating page {place}: {e}")
        continue

In [None]:
edits = {k: v for k, v in places.items() if "new_text" in v}
len(edits)

In [None]:
for place, items in tqdm(list(edits.items())):
    try:
        page = pywikibot.Page(lang_wiki, place)
        if page.text != items["new_text"]:
            page.text = items["new_text"]
            page.save(summary="Adding coordinates from map URLs", minor=False)
            print(f"Updated: {place}")
        else:
            print(f"No changes for {place}")
    except Exception as e:
        print(f"Error saving page {place}: {e}")
        continue

In [None]:
edits