HSLdevcom · johpiip · Jan 13, 2023 · Sep 5, 2022 · Sep 5, 2022 · Sep 20, 2022
diff --git a/Scripts/tests/test_data/Scenario_input_data/2030_test/2016.lnd b/Scripts/tests/test_data/Scenario_input_data/2030_test/2016.lnd
@@ -5,8 +5,9 @@
 #
 	builtar	detach
 102	0.1168	0
-103	0.382	0.005
+103	0.382	0.007
 244	0.1748	0
+300	0	0.003
 1063	0.2268	1
 1531	0.4836	0.124
 2703	0.3516	0.016

diff --git a/Scripts/tests/test_data/Scenario_input_data/2030_test/2030.pop b/Scripts/tests/test_data/Scenario_input_data/2030_test/2030.pop
@@ -8,13 +8,16 @@
 # sh_65-: share of population aged over 65
 #
 	total	sh_7-17	sh_1829	sh_3049	sh_5064	sh_65-
+101	10	0	0	1	0	0
 102	0	0	0	0	0	0
-103	200	0.0743	0.2432	0.2432	0.277	0.0946
+103	100	0.0743	0.2432	0.2432	0.277	0.0946
 244	1300	0.169	0.1489	0.3012	0.1804	0.1016
+300	100	0.0743	0.2432	0.2432	0.277	0.0946
 1063	43	0.1163	0.1628	0.3023	0.3721	0.0233
 1531	732	0.168	0.0861	0.306	0.2404	0.1161
 2703	749	0.0534	0.1669	0.2203	0.2003	0.2911
 2741	272	0.1581	0.0699	0.375	0.1691	0.0846
 6272	620	0.121	0.1	0.2839	0.2161	0.1871
 6291	23	0.0435	0.087	0.3043	0.087	0.4348
 19071	260
+20000	100
diff --git a/Scripts/tests/test_data/Scenario_input_data/2030_test/zone_mapping.txt b/Scripts/tests/test_data/Scenario_input_data/2030_test/zone_mapping.txt
@@ -0,0 +1,14 @@
+	aggregation
+102	102
+103	103
+244	244
+300	103
+1063	1063
+1531	1531
+2703	2703
+2741	2741
+6272	6272
+6291	6291
+19071	19071
+31102	31102
+31500	31500
diff --git a/Scripts/utils/read_csv_file.py b/Scripts/utils/read_csv_file.py
@@ -1,3 +1,5 @@
+from decimal import DivisionByZero
+from itertools import groupby
 import os
 import pandas
 import numpy
@@ -40,10 +42,7 @@ def read_csv_file(data_dir, file_end, zone_numbers=None, dtype=None, squeeze=Fal
         msg = "No {} file found in folder {}".format(file_end, data_dir)
         # This error should not be logged, as it is sometimes excepted
         raise NameError(msg)
-    if squeeze:
-        header = None
-    else:
-        header = "infer"
+    header = None if squeeze else "infer"
     data = pandas.read_csv(
         path, delim_whitespace=True, squeeze=squeeze, keep_default_na=False,
         na_values="", comment='#', header=header)
@@ -64,6 +63,20 @@ def read_csv_file(data_dir, file_end, zone_numbers=None, dtype=None, squeeze=Fal
     if data.index.has_duplicates:
         raise IndexError("Index in file {} has duplicates".format(path))
     if zone_numbers is not None:
+        map_path = os.path.join(data_dir, "zone_mapping.txt")
+        if os.path.exists(map_path):
+            mapping = pandas.read_csv(map_path, delim_whitespace=True).squeeze()
+            if "total" in data.columns:
+                # If file contains total and shares of total,
+                # shares are aggregated as averages with total as weight
+                data = data.groupby(mapping).agg(avg, weights=data["total"])
+            elif "detach" in data.columns:
+                funcs = dict.fromkeys(data.columns, "sum")
+                funcs["detach"] = "mean"
+                data = data.groupby(mapping).agg(funcs)
+            else:
+                data = data.groupby(mapping).sum()
+            data.index = data.index.astype(int)
         if not data.index.is_monotonic:
             data.sort_index(inplace=True)
             log.warn("File {} is not sorted in ascending order".format(path))
@@ -91,3 +104,11 @@ def read_csv_file(data_dir, file_end, zone_numbers=None, dtype=None, squeeze=Fal
             log.error(msg)
             raise ValueError(msg)
     return data
+
+def avg (data, weights):
+    if data.name == weights.name:
+        return sum(data)
+    try:
+        return numpy.average(data, weights=weights[data.index])
+    except ZeroDivisionError:
+        return 0