In [71]:
from shapely.validation import make_valid
import rasterio
from rasterio.features import shapes
import geopandas as gpd
import pandas as pd
import numpy as np
import os

In [7]:
root_dir = "GEOCATMIN/commondata"
directories = [os.path.join(root_dir, d) for d in os.listdir(root_dir)
               if os.path.isdir(os.path.join(root_dir, d)) and not any(s in d for s in ["shp", "raster"])]

shp_paths = []
for directory in directories:
    shp_paths.extend([
        os.path.join(directory, f) for f in os.listdir(directory) if f.endswith(".shp")
    ])
    
Hydrogeologic_units = pd.concat([gpd.read_file(p) for p in shp_paths], ignore_index=True)

In [12]:
print(shp_paths)
print(Hydrogeologic_units)

['GEOCATMIN/commondata\\afs\\AFS.shp', 'GEOCATMIN/commondata\\afv\\AFV.shp', 'GEOCATMIN/commondata\\agm\\AGM.shp', 'GEOCATMIN/commondata\\apnc\\APNC-al.shp', 'GEOCATMIN/commondata\\apnc\\APNC-b.shp', 'GEOCATMIN/commondata\\as\\AS.shp', 'GEOCATMIN/commondata\\ati\\ATI.shp', 'GEOCATMIN/commondata\\ats\\ATS.shp', 'GEOCATMIN/commondata\\atv\\ATV.shp', 'GEOCATMIN/commondata\\atvs\\ATVS.shp', 'GEOCATMIN/commondata\\avfb\\AVFB.shp', 'GEOCATMIN/commondata\\avs\\AVS.shp']
           AREA  PERIMETER  GEOLUTMM_I  CODI   NAME  \
0      849428.0   6368.210        1624  1624  Ki-mu   
1     1764560.0   7754.260        3047  3047  Kis-a   
2      235090.0   3028.980        1624  1624  Ki-mu   
3       49083.6    932.318        3047  3047  Kis-a   
4      615929.0   3316.420        3047  3047  Kis-a   
...         ...        ...         ...   ...    ...   
1695        0.0      0.000           0  3065   N-co   
1696        0.0      0.000           0  3065   N-co   
1697        0.0      0.000           

In [16]:
print(pd.unique(Hydrogeologic_units["NAME"]))
Hydrogeologic_units["NAME"] = Hydrogeologic_units["NAME"].str.strip()
print(pd.unique(Hydrogeologic_units["NAME"]))

['Ki-mu' 'Kis-a' 'Js-l' 'Ki-hu' 'Jms-p' 'Ks-as' 'P-pu' 'Kis-ayb' 'Qp-col'
 'N-dmri' 'PN-o/a' 'Qp-b/c-anda' 'NQ-b-and' None 'PP-cbc/m-gn,gra'
 'NP-cbc-gnmg' 'PP-cbc-gn' 'MNP-cbc-pz,esq' 'Qp-al' 'Qh-al' 'Q-gl' 'Q-bi'
 'PN-m' 'Np-mi' 'Ks-bc/t-gd' 'O-bac-gr' 'O-bac-di' 'Ks-bc/t-tn'
 'Ks-bc/t-gr' 'Ks-bc/t-di' 'Ki-di' 'Ki-gb' 'Ks-bc/t-gr,tn' 'P-tn'
 'Pe-hu/gr-to' 'Pp-tn,gd' 'N-dq' 'P-p-di' 'PN-ca-di' 'Jm-ca' 'Jm-so'
 'Peo-h/t' 'Nm-pi' 'Nm-cam' 'P-so' 'Js-g' 'D-c' 'Q-qm-tr' 'Q-la' 'Ks-se'
 'Peo-h/qu' 'Peo-h/h' 'Nm-al' 'N-and' 'Np-se' 'Np-ar' 'PN-o/m' 'N-da'
 'Nmp-ch-da' 'P-and' 'Nm-na/da' 'Nm-ta/da' 'Ks-da' 'Nm-si' 'Nm-js-and'
 'Nm-ja-da' 'Nm-tut-ri' 'N-cat-da' 'N-h-da' 'N-pu-prda' 'N-qu-and'
 'Qp-b/am-tcri' 'NQ-b-tb' 'Nm-ich' 'PN-o/sr' 'Nm-p' 'PN-t' 'Ji-cho'
 'Js-gu' 'Qp-pu' 'Qh-an' 'Nm-ma' 'Nm-ma/i' 'N-co' 'Nm-ca']
['Ki-mu' 'Kis-a' 'Js-l' 'Ki-hu' 'Jms-p' 'Ks-as' 'P-pu' 'Kis-ayb' 'Qp-col'
 'N-dmri' 'PN-o/a' 'Qp-b/c-anda' 'NQ-b-and' None 'PP-cbc/m-gn,gra'
 'NP-cbc-gnmg' 'PP-cbc-gn' 'MNP-cbc-p

In [17]:
replacements = {
    "Kis-a": "Kis-ar",
    "Js-l": "Js-la",
    "Jm-p": "Jm-pu",
    "Qp-b/c-anda": "Q-br2",
    "NQ-b-and": "N-br2",
    "NP-cbc-gnmg": "Js-la",
    "Jms-p": "Jms-pu",
    "Peo-h/t": "Peo-hu/t",
    "Js-g": "Js-gr",
    "Q-la": "Qpl-la",
    "Peo-h/qu": "Peo-hu/qu",
    "Peo-h/h": "Peo-hu/h",
    "N-and": "Js-la",
    "Nmp-ch-da": "N-da",
    "\tPeo-hu/h": "Peo-hu/h",
    "Nm-na/da": "N-da",
    "Nm-ta/da": "N-da",
    "Qp-b/am-tcri": "Q-br1",
    "NQ-b-tb": "Q-br1"
}
Hydrogeologic_units["NAME"] = Hydrogeologic_units["NAME"].replace(replacements)
Hydrogeologic_units.loc[Hydrogeologic_units["NAME_2"] == "PN-o", "NAME"] = "PN-o"

In [72]:
litologia = pd.read_excel("GEOCATMIN/resultados.xlsx", sheet_name=5)
nC = litologia.shape[1]

def calc_min(row):
    valores = row.iloc[2:nC]
    return valores.dropna().min() if not valores.dropna().empty else np.nan

def calc_max(row):
    valores = row.iloc[2:nC]
    return valores.dropna().max() if not valores.dropna().empty else np.nan

def calc_prom(row):
    unique_vals = row.iloc[2:4].dropna().tolist()
    paired_vals = row.iloc[4:nC]
    promedios = [np.nanmean(paired_vals[i:i+2]) for i in range(0, len(paired_vals), 2)]
    return np.nanmean(unique_vals + promedios)

litologia["Min"] = litologia.apply(calc_min, axis=1)
litologia["Max"] = litologia.apply(calc_max, axis=1)
litologia["Prom"] = litologia.apply(calc_prom, axis=1)
litologia = litologia[["Formación geológica", "Min", "Max", "Prom"]]
litologia.rename(columns={"Formación geológica": "NAME"}, inplace=True)

Hydrogeologic_units_join = Hydrogeologic_units.merge(litologia, on="NAME", how="left")
Hydrogeologic_units_join = Hydrogeologic_units_join[["NAME", "Min", "Max", "Prom", "geometry"]]
Hydrogeologic_units_join["geometry"] = Hydrogeologic_units_join["geometry"].apply(make_valid)

Hydrogeologic_units_join.to_file("1.PRODUCTOS/Hydrogeologic_units_join.shp")

  promedios = [np.nanmean(paired_vals[i:i+2]) for i in range(0, len(paired_vals), 2)]
  return np.nanmean(unique_vals + promedios)


In [62]:
Soil_raster_MAPSWAT_path = "MAPSWAT/MapSWAT/SWAT_INPUT_MAPS/SOIL/SOIL.b1.tif"
Soil_MAPSWAT_properties = pd.read_csv("MAPSWAT/MapSWAT/SWAT_INPUT_MAPS/SOIL/DSOLMap_usersoil.csv")
#tmp2 = pd.read_csv("MAPSWAT/MapSWAT/SWAT_INPUT_MAPS/SOIL/DSOLMap_taxonomy.csv")
#tmp1.columns.values[0] = tmp2.columns.values[0] = "SOILID"
#tmp1.columns.values[0] = "SOILID"
#Soil_properties = tmp1.merge(tmp2, on = "SOILID", how = "left")
#print(list(Soil_properties.columns))

In [73]:
with rasterio.open(Soil_raster_MAPSWAT_path) as src:
    image = src.read(1)
    mask = image != src.nodata
    transform = src.transform

    results = (
        {"properties": {"OBJECTID": v}, "geometry": s}
        for s, v in shapes(image, mask=mask, transform=transform)
    )

Soil_shp_MAPSWAT = gpd.GeoDataFrame.from_features(results)
Soil_shp_MAPSWAT.crs = src.crs
Soil_shp_MAPSWAT = Soil_shp_MAPSWAT.dissolve(by="OBJECTID")
Soil_shp_MAPSWAT = Soil_shp_MAPSWAT.merge(Soil_MAPSWAT_properties, on = "OBJECTID", how = "left")

Soil_shp_MAPSWAT.to_file("1.PRODUCTOS/SOIL_MAPSWAT.shp")

In [75]:
SOIL_MAPSWAT = gpd.read_file("1.PRODUCTOS/SOIL_MAPSWAT.shp")
Hydrogeologic_units_join = gpd.read_file("1.PRODUCTOS/Hydrogeologic_units_join.shp")
Hydrogeologic_units_join_dis = Hydrogeologic_units_join.dissolve()

In [76]:
MAPSWAT_Hydrogeologic_inte = gpd.overlay(SOIL_MAPSWAT, Hydrogeologic_units_join, how='intersection')

In [77]:
MAPSWAT_Hydrogeologic_diff = gpd.overlay(SOIL_MAPSWAT, Hydrogeologic_units_join_dis, how='difference')

In [78]:
# (3) Unión de shp3 y shp4
SOIL_map = gpd.GeoDataFrame(pd.concat([MAPSWAT_Hydrogeologic_inte, MAPSWAT_Hydrogeologic_diff], ignore_index=True), crs=SOIL_MAPSWAT.crs)

# (Opcional) Guardar resultado
SOIL_map.to_file("1.PRODUCTOS/SOIL_map.shp")