In [None]:
import requests
from bs4 import BeautifulSoup

URL = "https://sites.google.com/view/pharmaciedegarde-lome/tour-de-garde/"

html = requests.get(URL, timeout=15).text
soup = BeautifulSoup(html, "html.parser")

pharmacies_raw = []

for a in soup.find_all("a"):
    href = a.get("href", "")
    text = a.get_text(strip=True).lower()
    

    if "maps" in href or "itin" in text:
        pharmacies_raw.append({
            "lien_itineraire": href,
            "context": a.find_parent().get_text(separator="\n")
        })


In [21]:
type(pharmacies_raw)

list

In [22]:
import re
from urllib.parse import unquote

def extract_place_info(google_maps_url: str) -> dict:
    """
    Extrait le nom du lieu et les coordonnées GPS depuis une URL Google Maps
    """

    # 1️ Extraction du nom du lieu
    name_match = re.search(r"/dir//([^/]+),", google_maps_url)
    place_name = unquote(name_match.group(1)).replace("+", " ") if name_match else "Nom inconnu"

    # 2️ Extraction des coordonnées GPS 
    lat, lon = None, None

    # Format fiable 
    marker_match = re.search(r"!2d([0-9\.\-]+)!2d([0-9\.\-]+)", google_maps_url)
    if marker_match:
        lon = float(marker_match.group(1))
        lat = float(marker_match.group(2))
    else:
        # Fallback : @LAT,LON
        center_match = re.search(r"@([0-9\.\-]+),([0-9\.\-]+)", google_maps_url)
        if center_match:
            lat = float(center_match.group(1))
            lon = float(center_match.group(2))

    return {
        "name": place_name,
        "latitude": lat,
        "longitude": lon, 
        "lien_itinineraire": google_maps_url
    }


# ------------------ TEST ------------------

if __name__ == "__main__":

    url = pharmacies_raw[7]['lien_itineraire']
    result = extract_place_info(url)
    print(result)


{'name': '578M M7H Pharmacie LE PROGRES', 'latitude': 6.1666913, 'longitude': 1.2806713, 'lien_itinineraire': 'https://www.google.com/maps/dir//578M%2BM7H+Pharmacie+LE+PROGRES,+Lom%C3%A9/@6.1666913,1.2806713,17z/data=!4m8!4m7!1m0!1m5!1m1!1s0x1023e30d1ef042ed:0x77606b3541157b07!2m2!1d1.2832432!2d6.1666854?entry=ttu'}


In [17]:
len(pharmacies_raw)

29

In [28]:
def extract_pharmacies(pharmacies_raw: list) -> dict:
    """
    Prend la liste brute issue du scraping
    et retourne un dictionnaire structuré de toutes les pharmacies
    """

    pharmacies = []

    for item in pharmacies_raw:
        url = item.get("lien_itineraire", "")

        # --- Extraction du nom ---
        name_match = re.search(r"/dir//([^/]+),", url)
        name = (
            unquote(name_match.group(1)).replace("+", " ")
            if name_match else "Nom inconnu"
        )

        # --- Extraction coordonnées GPS ---
        lat, lon = None, None

        marker_match = re.search(r"!2d([0-9\.\-]+)!2d([0-9\.\-]+)", url)
        if marker_match:
            lon = float(marker_match.group(1))
            lat = float(marker_match.group(2))
        else:
            center_match = re.search(r"@([0-9\.\-]+),([0-9\.\-]+)", url)
            if center_match:
                lat = float(center_match.group(1))
                lon = float(center_match.group(2))

        pharmacies.append({
            "name": name,
            "latitude": lat,
            "longitude": lon,
            "lien_itineraire": url,
        })

    return {
        "total": len(pharmacies),
        "pharmacies": pharmacies
    }
if __name__ == "__main__":
    raw = pharmacies_raw
    data = extract_pharmacies(raw)

    print(f"{data['total']} pharmacies extraites")
    print(data["pharmacies"][10])

29 pharmacies extraites
{'name': 'Pharmacie Campus', 'latitude': 6.1629236, 'longitude': 1.2107827, 'lien_itineraire': 'https://www.google.com/maps/dir//Pharmacie+Campus,+Lom%C3%A9/@6.1629236,1.2107827,17z/data=!4m8!4m7!1m0!1m5!1m1!1s0x1023e2086ca9a241:0x8336285d5349e394!2m2!1d1.2132861!2d6.1629072?entry=ttu'}
