In [14]:
from lxml import etree
from shapely.geometry import Point, Polygon
import pandas as pd

#  Extract Building Node IDs from OSM data
def nodes4building(path):
    """
    Extract node IDs for buildings from the OSM XML file.

    Args:
        path (str): Path to the OSM XML file.

    Returns:
        list: List of node IDs associated with buildings.
    """
    tree = etree.parse(path, parser=etree.XMLParser(recover=True))
    way_elements = tree.xpath('//way[tag[@k="building"]]')

    building_node_ids = []
    for way in way_elements:
        nodes = way.xpath('.//nd/@ref')
        building_node_ids.extend(nodes)

    return building_node_ids

#  Get the Polygon Coordinates from an Excel file
def get_f_column_as_string(excel_path):
    """
    Read an Excel file and concatenate all values from the 6th column (column F) into a single string.
    Remember in this case all my coordinates included in the F column if it changes you can edit the code.

    Args:
        excel_path (str): Path to the Excel file.

    Returns:
        str: Concatenated string of all values in the 6th column.
    """
    # Read the Excel file
    df = pd.read_excel(excel_path, header=0)  # Reads with the first row as column headers

    # Select the 6th column (Column F is the 6th column, 0-indexed as 5)
    if len(df.columns) < 6:
        raise ValueError("The Excel file does not have a 6th column (column F).")

    f_column = df.iloc[:, 5]  # Select the 6th column by position

    # Drop NaN values and concatenate the column values
    result = ",".join(f_column.dropna().astype(str).tolist())
    return result

#  Function to Parse Polygon and Return Bounds
def parse_polygon(coord_string):
    coord_list = [value.strip() for value in coord_string.split(',') if value.strip()]  # Remove empty strings and extra spaces

    # Ensure the list has an even number of values (lat, lon pairs)
    if len(coord_list) % 2 != 0:
        raise ValueError("The coordinate string must contain an even number of values (lon/lat pairs).")

    # Convert coordinates to float tuples
    coords = []
    for i in range(0, len(coord_list), 2):
        try:
            lon = float(coord_list[i])
            lat = float(coord_list[i + 1])
            coords.append((lat, lon))
        except ValueError:
            raise ValueError(f"Invalid coordinate value at index {i}: {coord_list[i]}, {coord_list[i + 1]}")

    # Ensure the polygon is closed (first point == last point)
    if coords[0] != coords[-1]:
        coords.append(coords[0])  # Close the polygon

    return Polygon(coords)

# 4. Function to Find Nodes within the Polygon
def nodes_in_polygon(coord_string, xml_path):
    """
    Find all nodes within a given polygon from an XML file.

    Args:
        coord_string (str): Polygon coordinates in the format "lon1,lat1,lon2,lat2,...,lonn,latn".
        xml_path (str): Path to the XML file containing node data.

    Returns:
        list: A list of dictionaries with node IDs and their lat/lon within the polygon.
    """
    try:
        # Parse the XML file with a forgiving parser
        parser = etree.XMLParser(recover=True)
        tree = etree.parse(xml_path, parser=parser)

        # Parse the polygon from the coord_string
        polygon = parse_polygon(coord_string)

        # Extract nodes from the XML
        nodes = tree.xpath('//node')
        result = []

        for node in nodes:
            node_id = node.attrib.get('id')
            lat_str = node.attrib.get('lat', '')
            lon_str = node.attrib.get('lon', '')

            # Skip node if lat/lon values are missing or invalid
            if lat_str and lon_str:
                try:
                    lat = float(lat_str)
                    lon = float(lon_str)
                    point = Point(lon, lat)

                    if polygon.contains(point):
                        result.append({'id': node_id, 'lat': lat, 'lon': lon})
                except ValueError:
                    continue  # Skip nodes with invalid lat/lon values
            else:
                continue  # Skip nodes with missing lat/lon values

        return result
    except ValueError as e:
        raise ValueError(f"Error processing the inputs: {e}")
    except etree.XMLSyntaxError as e:
        raise ValueError(f"Error parsing XML file: {e}")




In [15]:
from lxml import etree
from shapely.geometry import Point, Polygon
import pandas as pd
from datetime import datetime

#  Extract all Node Data and Store It
def store_nodes(path):
    tree = etree.parse(path, parser=etree.XMLParser(recover=True))
    nodes = tree.xpath('//node')
    node_data = []

    for node in nodes:
        node_id = node.attrib.get('id')
        lat = node.attrib.get('lat')
        lon = node.attrib.get('lon')

        if lat and lon:
            node_data.append({
                'id': node_id,
                'lat': float(lat),
                'lon': float(lon)
            })

    return node_data

#  Filter Nodes Based on Polygon
def filter_nodes_by_polygon(nodes, coord_string):
    polygon = parse_polygon(coord_string)
    node_ids_within_polygon = set()

    for node in nodes:
        point = Point(node['lon'], node['lat'])
        if polygon.contains(point):
            node_ids_within_polygon.add(node['id'])

    return node_ids_within_polygon

#  Count Buildings with Timestamp Before Input Date
def count_buildings_with_nodes_in_polygon(osm_path, excel_path, input_date):
    """
    Count buildings that have at least one node within a polygon from an XML OSM file,
    and have a timestamp before the specified date.

    Args:
        osm_path (str): Path to the OSM XML file.
        excel_path (str): Path to the Excel file containing the polygon coordinates in column F.
        input_date (str): Date in the format 'YYYY-MM-DD'.

    Returns:
        int: The count of buildings with at least one node inside the polygon and timestamp before input_date.
    """
    # Convert input_date to a datetime object
    input_date = datetime.strptime(input_date, "%Y-%m-%d")

    # Step 1: Store all nodes from the OSM XML file
    nodes = store_nodes(osm_path)

    # Step 2: Get polygon coordinates from the Excel file
    coord_string = get_f_column_as_string(excel_path)

    # Step 3: Filter nodes by the polygon
    node_ids_within_polygon = filter_nodes_by_polygon(nodes, coord_string)

    # Step 4: Count buildings with nodes in polygon and timestamp before input_date
    buildings_with_nodes_in_polygon = 0
    tree = etree.parse(osm_path, parser=etree.XMLParser(recover=True))
    way_elements = tree.xpath('//way[tag[@k="building"]]')

    for way in way_elements:
        timestamp = way.attrib.get('timestamp')
        if timestamp:
            way_date = datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%SZ")
            if way_date < input_date:  # Check if timestamp is before the input date
                # Get the list of node references (nd ref attributes)
                ref_node_ids = way.xpath('.//nd/@ref')

                # Check if any of the node IDs referenced in this way are within the polygon
                if any(ref_node_id in node_ids_within_polygon for ref_node_id in ref_node_ids):
                    buildings_with_nodes_in_polygon += 1

    return buildings_with_nodes_in_polygon


osm_path = "/content/map (2)"
excel_path = "/content/poligon.xlsx"
input_date = "2018-01-01"

building_count = count_buildings_with_nodes_in_polygon(osm_path, excel_path, input_date)
print(f"Number of buildings with at least one node inside the polygon and timestamp before {input_date}: {building_count}")


Number of buildings with at least one node inside the polygon and timestamp before 2018-01-01: 999
