In [6]:
import xml.etree.ElementTree as ET
import pandas as pd

def parse_kml_to_df(kml_file):
    # Parse the KML file into an ElementTree object
    tree = ET.parse(kml_file)
    root = tree.getroot()
    
    # Define the namespace mapping for KML elements
    ns = {'kml': 'http://www.opengis.net/kml/2.2'}
    
    data = []
    
    # Find all Placemark elements in the KML
    for placemark in root.findall('.//kml:Placemark', ns):
        row = {}
        
        # Extract the ExtendedData / SchemaData SimpleData values
        schema_data = placemark.find('.//kml:SchemaData', ns)
        if schema_data is not None:
            for simpledata in schema_data.findall('kml:SimpleData', ns):
                key = simpledata.attrib.get('name')
                value = simpledata.text
                row[key] = value
        
        # Extract the coordinates from the first <coordinates> element in the Placemark
        coordinates_elem = placemark.find('.//kml:coordinates', ns)
        if coordinates_elem is not None:
            row['coordinates'] = coordinates_elem.text.strip()
        else:
            row['coordinates'] = None
        
        data.append(row)
    
    # Create and return a pandas DataFrame from the extracted data
    return pd.DataFrame(data)

if __name__ == '__main__':
    # Replace with the path to your KML file
    kml_file_path = '/Users/alexyi/Desktop/school/vs_code/dat_eng_200/EY-Data-Chal/Building_Footprint.kml'
    
    # Parse the KML and convert it to a DataFrame
    df = parse_kml_to_df(kml_file_path)
    
    # Display the resulting DataFrame
    print(df)

    df.to_csv("Original_Building_Footprints.csv", index=False)


     tessellate extrude visibility                id      fid           layer  \
0            -1       0         -1     cugir009034.3     7624      clip_Bronx   
1            -1       0         -1     cugir009034.4     7625      clip_Bronx   
2            -1       0         -1     cugir009034.5     7626      clip_Bronx   
3            -1       0         -1     cugir009034.6     7627      clip_Bronx   
4            -1       0         -1   cugir009034.142     7829      clip_Bronx   
...         ...     ...        ...               ...      ...             ...   
9431         -1       0         -1  cugir009062.5858  4806054  clip_Manhattan   
9432         -1       0         -1  cugir009062.5859  4806055  clip_Manhattan   
9433         -1       0         -1  cugir009062.5860  4806056  clip_Manhattan   
9434         -1       0         -1  cugir009062.5861  4806068  clip_Manhattan   
9435         -1       0         -1  cugir009062.5862  4806069  clip_Manhattan   

                           

In [7]:
df

Unnamed: 0,tessellate,extrude,visibility,id,fid,layer,path,coordinates
0,-1,0,-1,cugir009034.3,7624,clip_Bronx,/Users/killo/Desktop/Clip_Bronx.kml|layername=...,"-73.919029,40.848199 -73.919329,40.847953 -73...."
1,-1,0,-1,cugir009034.4,7625,clip_Bronx,/Users/killo/Desktop/Clip_Bronx.kml|layername=...,"-73.921952,40.849629 -73.921911,40.849679 -73...."
2,-1,0,-1,cugir009034.5,7626,clip_Bronx,/Users/killo/Desktop/Clip_Bronx.kml|layername=...,"-73.920502,40.850113 -73.920453,40.850167 -73...."
3,-1,0,-1,cugir009034.6,7627,clip_Bronx,/Users/killo/Desktop/Clip_Bronx.kml|layername=...,"-73.92056,40.851403 -73.920531,40.851445 -73.9..."
4,-1,0,-1,cugir009034.142,7829,clip_Bronx,/Users/killo/Desktop/Clip_Bronx.kml|layername=...,"-73.912343,40.852184 -73.912472,40.852098 -73...."
...,...,...,...,...,...,...,...,...
9431,-1,0,-1,cugir009062.5858,4806054,clip_Manhattan,/Users/killo/Desktop/Clip_Manhattan.kml|layern...,"-73.952668,40.779232 -73.95254,40.779428 -73.9..."
9432,-1,0,-1,cugir009062.5859,4806055,clip_Manhattan,/Users/killo/Desktop/Clip_Manhattan.kml|layern...,"-73.949638,40.776133 -73.949312,40.776674 -73...."
9433,-1,0,-1,cugir009062.5860,4806056,clip_Manhattan,/Users/killo/Desktop/Clip_Manhattan.kml|layern...,"-73.952102,40.768802 -73.951738,40.769309 -73...."
9434,-1,0,-1,cugir009062.5861,4806068,clip_Manhattan,/Users/killo/Desktop/Clip_Manhattan.kml|layern...,"-73.952297,40.75904 -73.952463,40.759083 -73.9..."


In [9]:
def process_coordinates(coord_string):
    coord_pairs = coord_string.split()
    return [tuple(map(float, coord.split(','))) for coord in coord_pairs]

df['coordinates'] = df['coordinates'].apply(process_coordinates)
df.to_csv("Processed_Building_Footprints.csv", index=False)
print(f"Processed CSV saved as: {"Processed_Building_Footprints.csv"}")

Processed CSV saved as: Processed_Building_Footprints.csv


In [10]:
df

Unnamed: 0,tessellate,extrude,visibility,id,fid,layer,path,coordinates
0,-1,0,-1,cugir009034.3,7624,clip_Bronx,/Users/killo/Desktop/Clip_Bronx.kml|layername=...,"[(-73.919029, 40.848199), (-73.919329, 40.8479..."
1,-1,0,-1,cugir009034.4,7625,clip_Bronx,/Users/killo/Desktop/Clip_Bronx.kml|layername=...,"[(-73.921952, 40.849629), (-73.921911, 40.8496..."
2,-1,0,-1,cugir009034.5,7626,clip_Bronx,/Users/killo/Desktop/Clip_Bronx.kml|layername=...,"[(-73.920502, 40.850113), (-73.920453, 40.8501..."
3,-1,0,-1,cugir009034.6,7627,clip_Bronx,/Users/killo/Desktop/Clip_Bronx.kml|layername=...,"[(-73.92056, 40.851403), (-73.920531, 40.85144..."
4,-1,0,-1,cugir009034.142,7829,clip_Bronx,/Users/killo/Desktop/Clip_Bronx.kml|layername=...,"[(-73.912343, 40.852184), (-73.912472, 40.8520..."
...,...,...,...,...,...,...,...,...
9431,-1,0,-1,cugir009062.5858,4806054,clip_Manhattan,/Users/killo/Desktop/Clip_Manhattan.kml|layern...,"[(-73.952668, 40.779232), (-73.95254, 40.77942..."
9432,-1,0,-1,cugir009062.5859,4806055,clip_Manhattan,/Users/killo/Desktop/Clip_Manhattan.kml|layern...,"[(-73.949638, 40.776133), (-73.949312, 40.7766..."
9433,-1,0,-1,cugir009062.5860,4806056,clip_Manhattan,/Users/killo/Desktop/Clip_Manhattan.kml|layern...,"[(-73.952102, 40.768802), (-73.951738, 40.7693..."
9434,-1,0,-1,cugir009062.5861,4806068,clip_Manhattan,/Users/killo/Desktop/Clip_Manhattan.kml|layern...,"[(-73.952297, 40.75904), (-73.952463, 40.75908..."
