In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import xml.etree.ElementTree as ET
from dataclasses import dataclass
from typing import List


@dataclass
class Location:
    name: str
    latitude: float
    longitude: float


def extract_locations_from_kml(file_path: str) -> List[Location]:
    tree = ET.parse(file_path)
    root = tree.getroot()

    ns = {"kml": "http://www.opengis.net/kml/2.2"}
    locations = []

    for placemark in root.findall(".//kml:Placemark", ns):
        name_element = placemark.find("kml:name", ns)
        coords_element = placemark.find(".//kml:coordinates", ns)

        if name_element is not None and coords_element is not None:
            name = name_element.text.strip()
            coord_text = coords_element.text.strip()

            # Some coordinate strings have multiple coordinate sets (e.g. lines or polygons)
            # We will only take the first one
            first_coord = coord_text.split()[0]  # Handles multiple coords in one tag
            coord_parts = first_coord.strip("> \n\t").split(",")

            if len(coord_parts) >= 2:
                try:
                    lon = float(coord_parts[0])
                    lat = float(coord_parts[1])
                    locations.append(Location(name=name, latitude=lat, longitude=lon))
                except ValueError:
                    print(
                        f"Warning: Could not parse coordinates for {name}: {coord_parts}"
                    )

    return locations


# Example usage
if __name__ == "__main__":
    locations = extract_locations_from_kml("archaeogeodesy.kml")

In [None]:
import xml.etree.ElementTree as ET
from dataclasses import dataclass
from typing import List, Optional


@dataclass
class Location:
    latitude: float
    longitude: float
    type: Optional[str] = None
    subtype: Optional[str] = None
    continent: Optional[str] = None
    country: Optional[str] = None
    region: Optional[str] = None
    subregion: Optional[str] = None


def parse_kml_element(elem, ns, parent_names=None) -> List[Location]:
    if parent_names is None:
        parent_names = []

    locations = []

    name_elem = elem.find("kml:name", ns)
    current_names = parent_names.copy()
    if name_elem is not None:
        current_names.append(name_elem.text.strip())

    if elem.tag == f"{{{ns['kml']}}}Placemark":
        coords_elem = elem.find(".//kml:coordinates", ns)
        if coords_elem is not None:
            coord_text = coords_elem.text.strip()
            first_coord = coord_text.split()[0]
            coord_parts = first_coord.strip("> \n\t").split(",")

            if len(coord_parts) >= 2:
                try:
                    lon = float(coord_parts[0])
                    lat = float(coord_parts[1])

                    hierarchy = current_names[1:8] + [None] * (
                        6 - len(current_names[1:8])
                    )

                    loc = Location(
                        latitude=lat,
                        longitude=lon,
                        type=hierarchy[0],
                        subtype=hierarchy[1],
                        continent=hierarchy[2],
                        country=hierarchy[3],
                        region=hierarchy[4],
                        subregion=hierarchy[5],
                    )
                    locations.append(loc)
                except ValueError:
                    pass  # Invalid coordinates, silently skip

    for child in elem:
        locations.extend(parse_kml_element(child, ns, current_names))

    return locations


def extract_locations_from_kml(file_path: str) -> List[Location]:
    tree = ET.parse(file_path)
    root = tree.getroot()
    ns = {"kml": "http://www.opengis.net/kml/2.2"}
    return parse_kml_element(root, ns)


# Example usage without printing
if __name__ == "__main__":
    locations = extract_locations_from_kml("archaeogeodesy.kml")
    # Now `locations` contains all extracted data for further processing

In [None]:
locations

In [None]:
import xml.etree.ElementTree as ET
from dataclasses import dataclass
from typing import List, Optional
import geopandas as gpd
import matplotlib.pyplot as plt
from shapely.geometry import Point
import contextily as ctx


@dataclass
class Location:
    latitude: float
    longitude: float
    type: Optional[str] = None
    subtype: Optional[str] = None
    continent: Optional[str] = None
    country: Optional[str] = None
    region: Optional[str] = None
    subregion: Optional[str] = None


def parse_kml_element(elem, ns, parent_names=None) -> List[Location]:
    if parent_names is None:
        parent_names = []

    locations = []

    name_elem = elem.find("kml:name", ns)
    current_names = parent_names.copy()
    if name_elem is not None:
        current_names.append(name_elem.text.strip())

    if elem.tag == f"{{{ns['kml']}}}Placemark":
        coords_elem = elem.find(".//kml:coordinates", ns)
        if coords_elem is not None:
            coord_text = coords_elem.text.strip()
            first_coord = coord_text.split()[0]
            coord_parts = first_coord.strip("> \n\t").split(",")

            if len(coord_parts) >= 2:
                try:
                    lon = float(coord_parts[0])
                    lat = float(coord_parts[1])

                    hierarchy = current_names[1:8] + [None] * (
                        6 - len(current_names[1:8])
                    )

                    loc = Location(
                        latitude=lat,
                        longitude=lon,
                        type=hierarchy[0],
                        subtype=hierarchy[1],
                        continent=hierarchy[2],
                        country=hierarchy[3],
                        region=hierarchy[4],
                        subregion=hierarchy[5],
                    )
                    locations.append(loc)
                except ValueError:
                    pass

    for child in elem:
        locations.extend(parse_kml_element(child, ns, current_names))

    return locations


def extract_locations_from_kml(file_path: str) -> List[Location]:
    tree = ET.parse(file_path)
    root = tree.getroot()
    ns = {"kml": "http://www.opengis.net/kml/2.2"}
    return parse_kml_element(root, ns)


def plot_locations(locations: List[Location]):
    gdf = gpd.GeoDataFrame(
        locations,
        geometry=[Point(loc.longitude, loc.latitude) for loc in locations],
        crs="EPSG:4326",  # WGS84
    )

    # Convert to Web Mercator for OSM basemap
    gdf = gdf.to_crs(epsg=3857)

    ax = gdf.plot(figsize=(15, 10), alpha=0.7, edgecolor="k", color="red")

    # Add labels (subtype)
    for x, y, label in zip(gdf.geometry.x, gdf.geometry.y, gdf["subtype"]):
        if label:
            ax.text(x, y, label, fontsize=9, ha="center", va="bottom")

    ctx.add_basemap(ax, source=ctx.providers.OpenStreetMap.Mapnik)
    plt.title("Locations by Subtype on OpenStreetMap")
    plt.axis("off")
    plt.show()


# Main
if __name__ == "__main__":
    locations = extract_locations_from_kml("archaeogeodesy.kml")
    plot_locations(locations)