From c3151902f61796449570d6d8feea0cce359b3996 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Mon, 8 Jul 2024 08:29:16 +0200
Subject: [PATCH] Compatibility with geopandas version 1 (#1136)

* address geopandas and pandas deprecations/warnings

* compatibility with geopandas v1

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 envs/environment.yaml                              |  2 +-
 scripts/add_electricity.py                         |  6 +++---
 scripts/base_network.py                            | 13 ++++++++-----
 scripts/build_gas_input_locations.py               | 13 ++++++++-----
 scripts/build_gas_network.py                       |  6 ++++--
 scripts/build_hourly_heat_demand.py                |  6 +++---
 scripts/build_industrial_distribution_key.py       |  2 +-
 ...d_industrial_energy_demand_per_country_today.py |  4 ++--
 scripts/build_industrial_production_per_country.py |  3 ++-
 scripts/build_population_layouts.py                |  4 +++-
 scripts/build_renewable_profiles.py                |  2 +-
 scripts/build_shapes.py                            | 14 ++++++++------
 scripts/build_shipping_demand.py                   |  4 +---
 scripts/cluster_gas_network.py                     |  6 +++---
 scripts/determine_availability_matrix_MD_UA.py     |  2 +-
 scripts/prepare_sector_network.py                  |  2 +-
 scripts/solve_network.py                           |  4 ++--
 17 files changed, 52 insertions(+), 41 deletions(-)

diff --git a/envs/environment.yaml b/envs/environment.yaml
index e57c87612..febd6ea2e 100644
--- a/envs/environment.yaml
+++ b/envs/environment.yaml
@@ -28,7 +28,7 @@ dependencies:
 - powerplantmatching>=0.5.15
 - numpy
 - pandas>=2.1
-- geopandas>=0.11.0, <1
+- geopandas>=1
 - xarray>=2023.11.0
 - rioxarray
 - netcdf4
diff --git a/scripts/add_electricity.py b/scripts/add_electricity.py
index 49d0bdf75..f90d6c851 100755
--- a/scripts/add_electricity.py
+++ b/scripts/add_electricity.py
@@ -287,9 +287,9 @@ def shapes_to_shapes(orig, dest):
     transfer = sparse.lil_matrix((len(dest), len(orig)), dtype=float)
 
     for i, j in product(range(len(dest)), range(len(orig))):
-        if orig_prepped[j].intersects(dest[i]):
-            area = orig[j].intersection(dest[i]).area
-            transfer[i, j] = area / dest[i].area
+        if orig_prepped[j].intersects(dest.iloc[i]):
+            area = orig.iloc[j].intersection(dest.iloc[i]).area
+            transfer[i, j] = area / dest.iloc[i].area
 
     return transfer
 
diff --git a/scripts/base_network.py b/scripts/base_network.py
index df3bc2b2c..f87d666b1 100644
--- a/scripts/base_network.py
+++ b/scripts/base_network.py
@@ -671,7 +671,7 @@ def _set_links_underwater_fraction(n, offshore_shapes):
     if not hasattr(n.links, "geometry"):
         n.links["underwater_fraction"] = 0.0
     else:
-        offshore_shape = gpd.read_file(offshore_shapes).unary_union
+        offshore_shape = gpd.read_file(offshore_shapes).union_all()
         links = gpd.GeoSeries(n.links.geometry.dropna().map(shapely.wkt.loads))
         n.links["underwater_fraction"] = (
             links.intersection(offshore_shape).length / links.length
@@ -874,7 +874,8 @@ def build_bus_shapes(n, country_shapes, offshore_shapes, countries):
                         onshore_locs.values, onshore_shape
                     ),
                     "country": country,
-                }
+                },
+                crs=n.crs,
             )
         )
 
@@ -889,12 +890,14 @@ def build_bus_shapes(n, country_shapes, offshore_shapes, countries):
                 "y": offshore_locs["y"],
                 "geometry": voronoi_partition_pts(offshore_locs.values, offshore_shape),
                 "country": country,
-            }
+            },
+            crs=n.crs,
         )
-        offshore_regions_c = offshore_regions_c.loc[offshore_regions_c.area > 1e-2]
+        sel = offshore_regions_c.to_crs(3035).area > 10  # m2
+        offshore_regions_c = offshore_regions_c.loc[sel]
         offshore_regions.append(offshore_regions_c)
 
-    shapes = pd.concat(onshore_regions, ignore_index=True)
+    shapes = pd.concat(onshore_regions, ignore_index=True).set_crs(n.crs)
 
     return onshore_regions, offshore_regions, shapes, offshore_shapes
 
diff --git a/scripts/build_gas_input_locations.py b/scripts/build_gas_input_locations.py
index 67dbc9860..ca43db3c1 100644
--- a/scripts/build_gas_input_locations.py
+++ b/scripts/build_gas_input_locations.py
@@ -7,6 +7,7 @@
 production sites with data from SciGRID_gas and Global Energy Monitor.
 """
 
+import json
 import logging
 
 import geopandas as gpd
@@ -19,7 +20,8 @@
 
 def read_scigrid_gas(fn):
     df = gpd.read_file(fn)
-    df = pd.concat([df, df.param.apply(pd.Series)], axis=1)
+    expanded_param = df.param.apply(json.loads).apply(pd.Series)
+    df = pd.concat([df, expanded_param], axis=1)
     df.drop(["param", "uncertainty", "method"], axis=1, inplace=True)
     return df
 
@@ -97,11 +99,11 @@ def build_gas_input_locations(gem_fn, entry_fn, sto_fn, countries):
         ~(entry.from_country.isin(countries) & entry.to_country.isin(countries))
         & ~entry.name.str.contains("Tegelen")  # only take non-EU entries
         | (entry.from_country == "NO")  # malformed datapoint  # entries from NO to GB
-    ]
+    ].copy()
 
     sto = read_scigrid_gas(sto_fn)
     remove_country = ["RU", "UA", "TR", "BY"]  # noqa: F841
-    sto = sto.query("country_code not in @remove_country")
+    sto = sto.query("country_code not in @remove_country").copy()
 
     # production sites inside the model scope
     prod = build_gem_prod_data(gem_fn)
@@ -132,7 +134,8 @@ def build_gas_input_locations(gem_fn, entry_fn, sto_fn, countries):
         snakemake = mock_snakemake(
             "build_gas_input_locations",
             simpl="",
-            clusters="128",
+            clusters="5",
+            configfiles="config/test/config.overnight.yaml",
         )
 
     configure_logging(snakemake)
@@ -162,7 +165,7 @@ def build_gas_input_locations(gem_fn, entry_fn, sto_fn, countries):
 
     gas_input_nodes = gpd.sjoin(gas_input_locations, regions, how="left")
 
-    gas_input_nodes.rename(columns={"index_right": "bus"}, inplace=True)
+    gas_input_nodes.rename(columns={"name": "bus"}, inplace=True)
 
     gas_input_nodes.to_file(snakemake.output.gas_input_nodes, driver="GeoJSON")
 
diff --git a/scripts/build_gas_network.py b/scripts/build_gas_network.py
index 5e9a5c9af..e16096d36 100644
--- a/scripts/build_gas_network.py
+++ b/scripts/build_gas_network.py
@@ -7,6 +7,7 @@
 (https://www.gas.scigrid.de/).
 """
 
+import json
 import logging
 
 import geopandas as gpd
@@ -54,8 +55,9 @@ def diameter_to_capacity(pipe_diameter_mm):
 
 def load_dataset(fn):
     df = gpd.read_file(fn)
-    param = df.param.apply(pd.Series)
-    method = df.method.apply(pd.Series)[["diameter_mm", "max_cap_M_m3_per_d"]]
+    param = df.param.apply(json.loads).apply(pd.Series)
+    cols = ["diameter_mm", "max_cap_M_m3_per_d"]
+    method = df.method.apply(json.loads).apply(pd.Series)[cols]
     method.columns = method.columns + "_method"
     df = pd.concat([df, param, method], axis=1)
     to_drop = ["param", "uncertainty", "method", "tags"]
diff --git a/scripts/build_hourly_heat_demand.py b/scripts/build_hourly_heat_demand.py
index 0dcf3524a..c28860f3c 100644
--- a/scripts/build_hourly_heat_demand.py
+++ b/scripts/build_hourly_heat_demand.py
@@ -41,10 +41,10 @@
         from _helpers import mock_snakemake
 
         snakemake = mock_snakemake(
-            "build_hourly_heat_demands",
+            "build_hourly_heat_demand",
             scope="total",
             simpl="",
-            clusters=48,
+            clusters=5,
         )
     set_scenario_config(snakemake)
 
@@ -85,6 +85,6 @@
 
     heat_demand.index.name = "snapshots"
 
-    ds = heat_demand.stack().to_xarray()
+    ds = heat_demand.stack(future_stack=True).to_xarray()
 
     ds.to_netcdf(snakemake.output.heat_demand)
diff --git a/scripts/build_industrial_distribution_key.py b/scripts/build_industrial_distribution_key.py
index bfbba35e1..e66547289 100644
--- a/scripts/build_industrial_distribution_key.py
+++ b/scripts/build_industrial_distribution_key.py
@@ -116,7 +116,7 @@ def prepare_hotmaps_database(regions):
 
     gdf = gpd.sjoin(gdf, regions, how="inner", predicate="within")
 
-    gdf.rename(columns={"index_right": "bus"}, inplace=True)
+    gdf.rename(columns={"name": "bus"}, inplace=True)
     gdf["country"] = gdf.bus.str[:2]
 
     # the .sjoin can lead to duplicates if a geom is in two overlapping regions
diff --git a/scripts/build_industrial_energy_demand_per_country_today.py b/scripts/build_industrial_energy_demand_per_country_today.py
index 9c8f2e986..b77ba8d65 100644
--- a/scripts/build_industrial_energy_demand_per_country_today.py
+++ b/scripts/build_industrial_energy_demand_per_country_today.py
@@ -184,7 +184,7 @@ def separate_basic_chemicals(demand, production):
 
     demand.drop(columns="Basic chemicals", inplace=True)
 
-    demand["HVC"].clip(lower=0, inplace=True)
+    demand["HVC"] = demand["HVC"].clip(lower=0)
 
     return demand
 
@@ -248,7 +248,7 @@ def industrial_energy_demand(countries, year):
     demand = add_non_eu28_industrial_energy_demand(countries, demand, production)
 
     # for format compatibility
-    demand = demand.stack(dropna=False).unstack(level=[0, 2])
+    demand = demand.stack(future_stack=True).unstack(level=[0, 2])
 
     # style and annotation
     demand.index.name = "TWh/a"
diff --git a/scripts/build_industrial_production_per_country.py b/scripts/build_industrial_production_per_country.py
index 458062059..ec86a78d5 100644
--- a/scripts/build_industrial_production_per_country.py
+++ b/scripts/build_industrial_production_per_country.py
@@ -301,7 +301,8 @@ def separate_basic_chemicals(demand, year):
     demand["Basic chemicals"] -= demand["Ammonia"]
 
     # EE, HR and LT got negative demand through subtraction - poor data
-    demand["Basic chemicals"].clip(lower=0.0, inplace=True)
+    col = "Basic chemicals"
+    demand[col] = demand[col].clip(lower=0.0)
 
     # assume HVC, methanol, chlorine production proportional to non-ammonia basic chemicals
     distribution_key = (
diff --git a/scripts/build_population_layouts.py b/scripts/build_population_layouts.py
index dc4cf2f8c..ca664ed0a 100644
--- a/scripts/build_population_layouts.py
+++ b/scripts/build_population_layouts.py
@@ -92,7 +92,9 @@
 
         # The first low density grid cells to reach rural fraction are rural
         asc_density_i = density_cells_ct.sort_values().index
-        asc_density_cumsum = pop_cells_ct[asc_density_i].cumsum() / pop_cells_ct.sum()
+        asc_density_cumsum = (
+            pop_cells_ct.iloc[asc_density_i].cumsum() / pop_cells_ct.sum()
+        )
         rural_fraction_ct = 1 - urban_fraction[ct]
         pop_ct_rural_b = asc_density_cumsum < rural_fraction_ct
         pop_ct_urban_b = ~pop_ct_rural_b
diff --git a/scripts/build_renewable_profiles.py b/scripts/build_renewable_profiles.py
index 0aef89bc0..57568f246 100644
--- a/scripts/build_renewable_profiles.py
+++ b/scripts/build_renewable_profiles.py
@@ -406,7 +406,7 @@
 
     if snakemake.wildcards.technology.startswith("offwind"):
         logger.info("Calculate underwater fraction of connections.")
-        offshore_shape = gpd.read_file(snakemake.input["offshore_shapes"]).unary_union
+        offshore_shape = gpd.read_file(snakemake.input["offshore_shapes"]).union_all()
         underwater_fraction = []
         for bus in buses:
             p = centre_of_mass.sel(bus=bus).data
diff --git a/scripts/build_shapes.py b/scripts/build_shapes.py
index 85afdaea4..2a3cc297a 100644
--- a/scripts/build_shapes.py
+++ b/scripts/build_shapes.py
@@ -124,7 +124,7 @@ def countries(naturalearth, country_list):
     df = df.loc[
         df.name.isin(country_list) & ((df["scalerank"] == 0) | (df["scalerank"] == 5))
     ]
-    s = df.set_index("name")["geometry"].map(_simplify_polys)
+    s = df.set_index("name")["geometry"].map(_simplify_polys).set_crs(df.crs)
     if "RS" in country_list:
         s["RS"] = s["RS"].union(s.pop("KV"))
         # cleanup shape union
@@ -145,7 +145,8 @@ def eez(country_shapes, eez, country_list):
         lambda s: _simplify_polys(s, filterremote=False)
     )
     s = gpd.GeoSeries(
-        {k: v for k, v in s.items() if v.distance(country_shapes[k]) < 1e-3}
+        {k: v for k, v in s.items() if v.distance(country_shapes[k]) < 1e-3},
+        crs=df.crs,
     )
     s = s.to_frame("geometry")
     s.index.name = "name"
@@ -156,7 +157,7 @@ def country_cover(country_shapes, eez_shapes=None):
     shapes = country_shapes
     if eez_shapes is not None:
         shapes = pd.concat([shapes, eez_shapes])
-    europe_shape = shapes.unary_union
+    europe_shape = shapes.union_all()
     if isinstance(europe_shape, MultiPolygon):
         europe_shape = max(europe_shape.geoms, key=attrgetter("area"))
     return Polygon(shell=europe_shape.exterior)
@@ -235,11 +236,11 @@ def nuts3(country_shapes, nuts3, nuts3pop, nuts3gdp, ch_cantons, ch_popgdp):
         [["BA1", "BA", 3871.0], ["RS1", "RS", 7210.0], ["AL1", "AL", 2893.0]],
         columns=["NUTS_ID", "country", "pop"],
         geometry=gpd.GeoSeries(),
+        crs=df.crs,
     )
-    manual["geometry"] = manual["country"].map(country_shapes)
+    manual["geometry"] = manual["country"].map(country_shapes.to_crs(df.crs))
     manual = manual.dropna()
     manual = manual.set_index("NUTS_ID")
-    manual = manual.set_crs("ETRS89")
 
     df = pd.concat([df, manual], sort=False)
 
@@ -265,7 +266,8 @@ def nuts3(country_shapes, nuts3, nuts3pop, nuts3gdp, ch_cantons, ch_popgdp):
     offshore_shapes.reset_index().to_file(snakemake.output.offshore_shapes)
 
     europe_shape = gpd.GeoDataFrame(
-        geometry=[country_cover(country_shapes, offshore_shapes.geometry)]
+        geometry=[country_cover(country_shapes, offshore_shapes.geometry)],
+        crs=country_shapes.crs,
     )
     europe_shape.reset_index().to_file(snakemake.output.europe_shape)
 
diff --git a/scripts/build_shipping_demand.py b/scripts/build_shipping_demand.py
index b50cd3168..d8c960ae0 100644
--- a/scripts/build_shipping_demand.py
+++ b/scripts/build_shipping_demand.py
@@ -45,9 +45,7 @@
     # assign ports to nearest region
     p = european_ports.to_crs(3857)
     r = regions.to_crs(3857)
-    outflows = (
-        p.sjoin_nearest(r).groupby("index_right").properties_outflows.sum().div(1e3)
-    )
+    outflows = p.sjoin_nearest(r).groupby("name").properties_outflows.sum().div(1e3)
 
     # calculate fraction of each country's port outflows
     countries = outflows.index.str[:2]
diff --git a/scripts/cluster_gas_network.py b/scripts/cluster_gas_network.py
index 19585aa94..6d4e6c48a 100755
--- a/scripts/cluster_gas_network.py
+++ b/scripts/cluster_gas_network.py
@@ -41,9 +41,9 @@ def build_clustered_gas_network(df, bus_regions, length_factor=1.25):
     for i in [0, 1]:
         gdf = gpd.GeoDataFrame(geometry=df[f"point{i}"], crs="EPSG:4326")
 
-        bus_mapping = gpd.sjoin(
-            gdf, bus_regions, how="left", predicate="within"
-        ).index_right
+        bus_mapping = gpd.sjoin(gdf, bus_regions, how="left", predicate="within")[
+            "name"
+        ]
         bus_mapping = bus_mapping.groupby(bus_mapping.index).first()
 
         df[f"bus{i}"] = bus_mapping
diff --git a/scripts/determine_availability_matrix_MD_UA.py b/scripts/determine_availability_matrix_MD_UA.py
index 80c04083c..678ef025d 100644
--- a/scripts/determine_availability_matrix_MD_UA.py
+++ b/scripts/determine_availability_matrix_MD_UA.py
@@ -147,7 +147,7 @@ def get_wdpa_layer_name(wdpa_fn, layer_substring):
     regions_geometry = regions.to_crs(3035).geometry
     band, transform = shape_availability(regions_geometry, excluder)
     fig, ax = plt.subplots(figsize=(4, 8))
-    gpd.GeoSeries(regions_geometry.unary_union).plot(ax=ax, color="none")
+    gpd.GeoSeries(regions_geometry.union_all()).plot(ax=ax, color="none")
     show(band, transform=transform, cmap="Greens", ax=ax)
     plt.axis("off")
     plt.savefig(snakemake.output.availability_map, bbox_inches="tight", dpi=500)
diff --git a/scripts/prepare_sector_network.py b/scripts/prepare_sector_network.py
index dfa06cac5..9292e3488 100644
--- a/scripts/prepare_sector_network.py
+++ b/scripts/prepare_sector_network.py
@@ -3718,7 +3718,7 @@ def lossy_bidirectional_links(n, carrier, efficiencies={}):
     rev_links.index = rev_links.index.map(lambda x: x + "-reversed")
 
     n.links = pd.concat([n.links, rev_links], sort=False)
-    n.links["reversed"] = n.links["reversed"].fillna(False)
+    n.links["reversed"] = n.links["reversed"].fillna(False).infer_objects(copy=False)
     n.links["length_original"] = n.links["length_original"].fillna(n.links.length)
 
     # do compression losses after concatenation to take electricity consumption at bus0 in either direction
diff --git a/scripts/solve_network.py b/scripts/solve_network.py
index ba2fac7fe..42ffe6be9 100644
--- a/scripts/solve_network.py
+++ b/scripts/solve_network.py
@@ -155,7 +155,7 @@ def _add_land_use_constraint(n):
             existing_large, "p_nom_min"
         ]
 
-    n.generators.p_nom_max.clip(lower=0, inplace=True)
+    n.generators["p_nom_max"] = n.generators["p_nom_max"].clip(lower=0)
 
 
 def _add_land_use_constraint_m(n, planning_horizons, config):
@@ -207,7 +207,7 @@ def _add_land_use_constraint_m(n, planning_horizons, config):
             existing_large, "p_nom_min"
         ]
 
-    n.generators.p_nom_max.clip(lower=0, inplace=True)
+    n.generators["p_nom_max"] = n.generators["p_nom_max"].clip(lower=0)
 
 
 def add_solar_potential_constraints(n, config):