In [8]:
import geopandas as gpd
import pandas as pd
import os
import fiona
from pathlib import Path
os.chdir('C:/Users/admin/OneDrive/Documents/TrabajoTesis')

In [9]:
SHAPEFILE_DIR = 'data/electricas' 

# The list of files to process, based on your image
FILE_NAMES = [
    "Almacenamiento_de_Energía.shp",
    "Bioenergía.shp",
    "Eólicas.shp",
    "Geotérmicas.shp",
    "Hidroeléctricas.shp",
    "Línea_de_Transmisión.shp",
    "Solares.shp",
    "Subestaciones.shp",
    "Termoeléctricas.shp"
]
all_extracted_data = {}
print(f"Attempting to read files from: {Path(SHAPEFILE_DIR).resolve()}")


Attempting to read files from: C:\Users\admin\OneDrive\Documents\TrabajoTesis\data\Electricas


In [10]:



# 2. Iterate and Process Files
for filename in FILE_NAMES:
    # Use os.path.join for correct path construction
    file_path = os.path.join(SHAPEFILE_DIR, filename)
    
    # Check if the main .shp file exists
    if not os.path.exists(file_path):
        print(f"Skipping: Main file '{filename}' not found at {file_path}")
        continue

    print(f"\n--- Processing {filename} ---")
    
    try:
        # **CRITICAL FIX:** Apply SHAPE_RESTORE_SHX='YES' using fiona.Env
        # This forces the recreation of the .shx index if it's somehow not being read.
        with fiona.Env(SHAPE_RESTORE_SHX='YES'):
            # Load the shapefile into a GeoDataFrame
            gdf = gpd.read_file(file_path)

            # Transform to WGS84 (EPSG:4326) for standard Lon/Lat degrees
            gdf_wgs84 = gdf.to_crs(epsg=4326)

            # Calculate Longitude (X) and Latitude (Y) from the geometry's centroid
            # The centroid is used because some files (like 'Línea_de_Transmisión') are lines/polygons.
            gdf_wgs84['Longitude'] = gdf_wgs84.geometry.centroid.x
            gdf_wgs84['Latitude'] = gdf_wgs84.geometry.centroid.y
            
            # Create a clean output DataFrame with coordinates
            # This includes all original attribute columns plus the new coordinates.
            output_cols = [col for col in gdf_wgs84.columns if col not in ['geometry', 'Longitude', 'Latitude']]
            output_cols.extend(['Longitude', 'Latitude'])
            
            df_output = pd.DataFrame(gdf_wgs84[output_cols])
            all_extracted_data[filename] = df_output

            print(f"✅ Extraction successful. Total features: {len(df_output)}")
            print("First 3 rows of data:")
            print(df_output.head(3))
            
    except Exception as e:
        print(f"❌ An error occurred while processing {filename}: {e}")




--- Processing Almacenamiento_de_Energía.shp ---
✅ Extraction successful. Total features: 6
First 3 rows of data:
                              NOMBRE       PROPIEDAD F_OPERACIO  \
0                 CA S/E ANDES 23 KV  AES ANDES S.A. 2009-07-11   
1  CA S/E COCHRANE D12 (BESS) 6.9 KV  AES ANDES S.A. 2016-10-13   
2  CA S/E ANGAMOS CT1 (BESS) 13.8 KV  AES ANDES S.A. 2011-10-05   

      COORD_ESTE    COORD_NORT  HUSO   DATUM REGION PROVINCIA COMUNA  \
0  542318.797021  7.343896e+06  19 S  WGS 84     02       021  02101   
1  360276.935599  7.448520e+06  19 S  WGS 84     02       021  02102   
2  359797.493446  7.448249e+06  19 S  WGS 84     02       021  02102   

  FUENTE_BAS  FECH_CREA   FECH_ACT TIPO_EQUIP              NOMBRE_SE  \
0        CEN 2023-04-11 2023-04-11       BESS  S/E ANDES (AES ANDES)   
1        CEN 2023-04-11 2023-04-11       BESS  S/E GIS COCHRANE BESS   
2        CEN 2023-04-11 2023-04-11       BESS            S/E ANGAMOS   

   Longitude   Latitude  
0 -68.583881


  gdf_wgs84['Longitude'] = gdf_wgs84.geometry.centroid.x

  gdf_wgs84['Latitude'] = gdf_wgs84.geometry.centroid.y

  gdf_wgs84['Longitude'] = gdf_wgs84.geometry.centroid.x

  gdf_wgs84['Latitude'] = gdf_wgs84.geometry.centroid.y

  gdf_wgs84['Longitude'] = gdf_wgs84.geometry.centroid.x

  gdf_wgs84['Latitude'] = gdf_wgs84.geometry.centroid.y


✅ Extraction successful. Total features: 66
First 3 rows of data:
   ID_GE_EOLI                NOMBRE                      PROPIEDAD    TIPO  \
0           1                 ALENA                   AR ALENA SPA  EOLICO   
1          44    EOLICA LOS CURUROS  PARQUE EOLICO LOS CURUROS SPA  EOLICO   
2         124  VALLE DE LOS VIENTOS    ENEL GREEN POWER CHILE S.A.  EOLICO   

  MEDIO_GENE COMBUSTIBL  POTENCIAMW  UNIDADES       RCA SIST_ELECT  ...  \
0       None  NO APLICA       83.92        18  314/2013        SEN  ...   
1       None  NO APLICA      107.70        57  213/2009        SEN  ...   
2       None  NO APLICA       88.90        45  138/2010        SEN  ...   

     HUSO     DATUM  REGION  PROVINCIA COMUNA FUENTE_BAS  FECH_CREA  \
0  19 SUR  WGS 1984      08        083  08301        CNE 2015-08-03   
1  19 SUR  WGS 1984      04        043  04301        CNE 2015-03-18   
2  19 SUR  WGS 1984      02        022  02201        CNE 2014-12-10   

    FECH_ACT  Longitude   Latitude 


  gdf_wgs84['Longitude'] = gdf_wgs84.geometry.centroid.x

  gdf_wgs84['Latitude'] = gdf_wgs84.geometry.centroid.y

  gdf_wgs84['Longitude'] = gdf_wgs84.geometry.centroid.x

  gdf_wgs84['Latitude'] = gdf_wgs84.geometry.centroid.y

  gdf_wgs84['Longitude'] = gdf_wgs84.geometry.centroid.x

  gdf_wgs84['Latitude'] = gdf_wgs84.geometry.centroid.y


✅ Extraction successful. Total features: 1052
First 3 rows of data:
   ID_LIN_TRA  SUBTIPO                                  NOMBRE CIRCUITO  \
0         865      103  LOS HIERROS II - TAP LOS HIERROS 110KV       C1   
1        1312      101         PUNTA SIERRA - LAS PALMAS 220KV  C1 - C2   
2        1157      104                  LAGUNA SECA - 418 69KV       C1   

       TIPO F_OPERACIO    LONG_KM                      TRAMO  \
0  DEDICADO 2013-03-01   0.363654        TRAVESIA - CARDONES   
1  NACIONAL 1980-01-01  17.157957  PUNTA SIERRA - LAS PALMAS   
2  DEDICADO 2002-08-01  11.252329          LAGUNA SECA - 418   

                          PROPIEDAD  TENSION_KV  ...        ESTADO  \
0  EMPRESA ELÉCTRICA PORTEZUELO SPA       110.0  ...  EN OPERACION   
1                    TRANSELEC S.A.       220.0  ...  EN OPERACION   
2            MINERA ESCONDIDA LTDA.        66.0  ...  EN OPERACION   

        REGION      PROVINCIA         COMUNA FUENTE_BAS  FECH_CREA   FECH_ACT  \
0        MAU


  gdf_wgs84['Longitude'] = gdf_wgs84.geometry.centroid.x

  gdf_wgs84['Latitude'] = gdf_wgs84.geometry.centroid.y

  gdf_wgs84['Longitude'] = gdf_wgs84.geometry.centroid.x

  gdf_wgs84['Latitude'] = gdf_wgs84.geometry.centroid.y

  gdf_wgs84['Longitude'] = gdf_wgs84.geometry.centroid.x

  gdf_wgs84['Latitude'] = gdf_wgs84.geometry.centroid.y


In [11]:

# 3. Output Results (Save to CSVs)
print("\n--- Saving extracted data to CSV files ---")
# Create a folder for the output within the shapefile directory
OUTPUT_DIR = os.path.join(SHAPEFILE_DIR, 'Extracted_Coordinates')
os.makedirs(OUTPUT_DIR, exist_ok=True)

for filename, df in all_extracted_data.items():
    csv_filename = filename.replace('.shp', '.csv')
    csv_path = os.path.join(OUTPUT_DIR, csv_filename)
    
    df.to_csv(csv_path, index=False)
    print(f"Saved: {csv_path}")


--- Saving extracted data to CSV files ---
Saved: data/electricas\Extracted_Coordinates\Almacenamiento_de_Energía.csv
Saved: data/electricas\Extracted_Coordinates\Bioenergía.csv
Saved: data/electricas\Extracted_Coordinates\Eólicas.csv
Saved: data/electricas\Extracted_Coordinates\Geotérmicas.csv
Saved: data/electricas\Extracted_Coordinates\Hidroeléctricas.csv
Saved: data/electricas\Extracted_Coordinates\Línea_de_Transmisión.csv
Saved: data/electricas\Extracted_Coordinates\Solares.csv
Saved: data/electricas\Extracted_Coordinates\Subestaciones.csv
Saved: data/electricas\Extracted_Coordinates\Termoeléctricas.csv


In [12]:
import pandas as pd

# Define your files manually
files = {
    "Termoelectricas": "data/Electricas/Extracted_Coordinates/Termoeléctricas.csv",
    "Hidroelectricas": "data/Electricas/Extracted_Coordinates/Hidroeléctricas.csv",
    "Solares": "data/Electricas/Extracted_Coordinates/Solares.csv",
    "Eolicas": "data/Electricas/Extracted_Coordinates/Eólicas.csv"
}

cols_to_keep = ["NOMBRE", "PROPIEDAD", "TIPO", "COMBUSTIBL", "POTENCIAMW", "FECH_CREA", "Longitude", "Latitude"]

dfs = []

for name, path in files.items():
    df = pd.read_csv(path)
    
    # Keep only desired columns that exist
    df = df[[c for c in cols_to_keep if c in df.columns]].copy()
    
    # Add a column with the energy source name
    df["FUENTE"] = name
    
    dfs.append(df)

# Combine all into one dataframe
combined_df = pd.concat(dfs, ignore_index=True)

# Save final cleaned CSV
combined_df.to_csv("Bases/centrales_combinadas.csv", index=False)

print("✅ Saved as 'centrales_combinadas.csv' with column 'FUENTE' indicating origin.")


✅ Saved as 'centrales_combinadas.csv' with column 'FUENTE' indicating origin.


In [13]:
import pandas as pd

# Cargar el CSV original
archivo = "data/Electricas/Extracted_Coordinates/subestaciones.csv"  # reemplaza con tu ruta
df = pd.read_csv(archivo)

# Seleccionar solo las columnas que quieres
columnas_seleccionadas = ["ID_SUBEST", "NOMBRE", "PROPIEDAD", "TENSION_KV", "TIPO", "FECH_CREA", "Latitude", "Longitude"]
df_clean = df[columnas_seleccionadas].copy()

# Renombrar columnas si quieres un estándar uniforme
df_clean.rename(columns={
    "TENSION_KV": "TENSION",
    "FECH_CREA": "FECHA_CREA"
}, inplace=True)

# Revisar los primeros registros
print(df_clean.head())

# Guardar a un nuevo CSV limpio
df_clean.to_csv("Bases/subestaciones.csv", index=False)


   ID_SUBEST              NOMBRE                    PROPIEDAD TENSION  \
0       2275   CENTRAL FUTALELFU                    EDELAYSEN      23   
1        380  S/E ARENAS BLANCAS         CGE TRANSMISIÓN S.A.      66   
2        877   S/E CEMENTO MELON                    MELON S.A      66   
3       1908        S/E NORGENER       MINERA ESCONDIDA LTDA.     220   
4        851      S/E PANIMAVIDA  CHILQUINTA TRANSMISIÓN S.A.      66   

        TIPO  FECHA_CREA   Latitude  Longitude  
0  NO APLICA  2014-12-04 -43.183323 -71.865833  
1      ZONAL  2014-04-12 -37.011786 -73.176813  
2      ZONAL  2014-04-12 -32.789706 -71.198551  
3   DEDICADO  2014-04-12 -22.096891 -70.210757  
4      ZONAL  2014-04-12 -35.749418 -71.420258  


In [14]:
import pandas as pd

# Cargar CSV de líneas de transmisión
archivo = "data/Electricas/Extracted_Coordinates/Línea_de_transmisión.csv"  # reemplaza con tu ruta
df = pd.read_csv(archivo)

# Seleccionar solo las columnas relevantes
columnas_seleccionadas = ["NOMBRE", "PROPIEDAD", "TENSION_KV", "TIPO", "FECH_CREA", "Latitude", "Longitude"]
df_clean = df[columnas_seleccionadas].copy()

# Renombrar columnas para estandarizar
df_clean.rename(columns={
    "ID_LIN_TRA": "ID_LINEA",
    "TENSION_KV": "TENSION",
    "FECH_CREA": "FECHA_CREA"
}, inplace=True)

# Revisar primeros registros
print(df_clean.head())

# Guardar CSV limpio
df_clean.to_csv("Bases/lineas_transmision.csv", index=False)


                                   NOMBRE                         PROPIEDAD  \
0  LOS HIERROS II - TAP LOS HIERROS 110KV  EMPRESA ELÉCTRICA PORTEZUELO SPA   
1         PUNTA SIERRA - LAS PALMAS 220KV                    TRANSELEC S.A.   
2                  LAGUNA SECA - 418 69KV            MINERA ESCONDIDA LTDA.   
3                  TRES PINOS - LEBU 66KV              CGE TRANSMISIÓN S.A.   
4           HUASCO - MAITENCILLO 110KV L2                    TRANSELEC S.A.   

   TENSION      TIPO  FECHA_CREA   Latitude  Longitude  
0    110.0  DEDICADO  2019-04-26 -35.839554 -71.081267  
1    220.0  NACIONAL  2023-04-10 -31.212258 -71.603395  
2     66.0  DEDICADO  2019-04-25 -24.356936 -69.102680  
3     66.0     ZONAL  2019-04-25 -37.645865 -73.526505  
4    110.0  DEDICADO  2019-04-26 -28.506009 -71.091596  
