In [2]:
import zipfile
from lxml import etree
import pandas as pd
import os
from fastkml import kml

kmz_file = "Preprocesamiento/KMZ/todalared.kmz"
kml_file = "doc.kml"

#os.chdir('c:/Users/artur/OneDrive/Documents/TrabajoTesis') 
os.chdir('c:/Users/admin/OneDrive/Documents/TrabajoTesis') 

In [3]:
# Abrir KMZ y listar contenidos en formato kml (html)
with zipfile.ZipFile(kmz_file, 'r') as kmz:
    print(kmz.namelist())

with zipfile.ZipFile(kmz_file, 'r') as kmz:
    kmz.extractall("Preprocesamiento/KMZ")  

['doc.kml']


In [4]:
with open("Preprocesamiento/KMZ/doc.kml", "rb") as f:
    kml_content = f.read()

k = kml.KML()
k.from_string(kml_content)

# Recursively print all placemarks and their geometry types
def print_features(features, level=0):
    for feature in features:
        indent = "  " * level
        print(f"{indent}Feature name: {feature.name}, type: {feature.__class__.__name__}")
        if hasattr(feature, 'geometry') and feature.geometry:
            print(f"{indent}  Geometry type: {feature.geometry.geom_type}")
        if hasattr(feature, 'features') and feature.features:
            print_features(feature.features, level + 1)

print_features(k.features)





In [5]:
# Abrir KMZ y extraer KML
with zipfile.ZipFile(kmz_file, 'r') as kmz:
    kml_names = [name for name in kmz.namelist() if name.endswith('.kml')]
    kml_content = kmz.read(kml_names[0])


In [6]:
# 1️⃣ Open KMZ and extract KML
with zipfile.ZipFile(kmz_file, 'r') as kmz:
    kml_names = [name for name in kmz.namelist() if name.endswith('.kml')]
    kml_content = kmz.read(kml_names[0])

# 2️⃣ Parse XML
tree = etree.fromstring(kml_content)

# 3️⃣ Namespaces
ns = {
    "kml": "http://www.opengis.net/kml/2.2",
    "gx": "http://www.google.com/kml/ext/2.2"
}

stations = []

# 4️⃣ Traverse all Placemark elements
for placemark in tree.findall(".//kml:Placemark", namespaces=ns):
    name_elem = placemark.find("kml:name", namespaces=ns)
    name = name_elem.text if name_elem is not None else "Unknown"

    # Find all coordinates inside this Placemark
    for coord_elem in placemark.findall(".//kml:coordinates", namespaces=ns):
        coords_text = coord_elem.text.strip()
        for line in coords_text.split():
            parts = line.strip().replace(",", " ").split()
            if len(parts) >= 2:
                lon, lat = map(float, parts[:2])
                stations.append({
                    "name": name,
                    "longitude": lon,
                    "latitude": lat
                })

df_stations = pd.DataFrame(stations)





In [7]:
# Filtrar filas para solo dejar las que son estaciones
df_estaciones_clean = df_stations[
    df_stations['name'].str.contains("Est", case=True, na=False) &
    ~df_stations['name'].str.contains("Ex-Est", case=True, na=False) &
    ~df_stations['name'].str.contains("Variante a Estación de Transferencia", case=True, na=False)
].copy()

# Eliminar duplicados por nombre y coordenadas (opcional)
df_estaciones_clean = df_estaciones_clean.drop_duplicates(subset=['name','longitude','latitude'])

# Resetear índice
df_estaciones_clean.reset_index(drop=True, inplace=True)

# Guardar CSV limpio
df_estaciones_clean.to_csv("Bases/estaciones.csv", index=False)

print(df_estaciones_clean)


                         name  longitude   latitude
0       Est ALAMEDA (Central) -70.679052 -33.451823
1            Est YUNGAY NORTE -70.681263 -33.428082
2                   Est RENCA -70.690631 -33.400383
3               Est QUILICURA -70.705348 -33.364199
4                  Est COLINA -70.769486 -33.291047
..                        ...        ...        ...
737             Est LLAU-LLAU -73.767505 -42.462248
738                Est CASTRO -73.759805 -42.481223
739  Est ARICA (FFCC a Tacna) -70.317542 -18.473507
740            Est CHACALLUTA -70.325011 -18.318546
741             Est ALPATACAL -67.248522 -33.554839

[742 rows x 3 columns]
