In [1]:
import os

import geopandas as gpd
import nivapy3 as nivapy
import pandas as pd
import teotil3 as teo

In [2]:
eng = nivapy.da.connect_postgis()

Connection successful.


# Identify WWTPs with direct discharges to Oslofjord

Phil has sent an Excel file listing WWTPs in the Martini setup that discharge directly to the fjord (see e-mail received 08.09.2025). In Phil's file there are 22 sites marked as "internal" to the marine model.

This notebook checks Phil's list against the data in TEOTIL3.

In [3]:
# Period of interest
st_yr, end_yr = 2017, 2019

# Vassdragsområder of interest
vassom_list = range(1, 18)

# Only consider sites bigger than this
capacity_thresh = 10000

teo3_base_dir = r"/home/jovyan/shared/common/teotil3"

In [4]:
df_list = []
for year in range(st_yr, end_yr + 1):
    df = teo.io.get_raw_annual_point_data(
        eng,
        year,
        "large wastewater",
        par_list=[
            "totn_kg",
            "din_kg",
            "ton_kg",
            "totp_kg",
            "tdp_kg",
            "tpp_kg",
            "toc_kg",
            "ss_kg",
        ],
    )

    # Get capacity data
    cap_xls = os.path.join(
        teo3_base_dir, "point_data", str(year), f"large_wastewater_{year}_raw.xlsx"
    )
    cap_df = pd.read_excel(cap_xls)[["anlegg_nr", "year", 'kilderefnr', "current_capacity"]].rename(
        columns={"anlegg_nr": "site_id"}
    )
    df = pd.merge(df, cap_df, how="left", on=["site_id", "year"])
    df_list.append(df)
df = pd.concat(df_list, axis="rows")

# Filter to region of interest
df["vassom"] = df["regine"].str.split(".", n=1, expand=True)[0].astype(int)
df = df.query("vassom in @vassom_list").reset_index(drop=True)
del df["vassom"]

# Filter by capacity
df = df.query("current_capacity >= @capacity_thresh")

cols = [
    "site_id",
    'kilderefnr',
    "site_name",
    "site_x_utm33",
    "site_y_utm33",
    "outlet_x_utm33",
    "outlet_y_utm33",
]
site_df = df.drop_duplicates(subset=cols)[cols]
assert site_df["site_id"].is_unique

# Convert to gdf
site_gdf = gpd.GeoDataFrame(
    site_df,
    geometry=gpd.points_from_xy(
        site_df["outlet_x_utm33"], site_df["outlet_y_utm33"], crs="epsg:25833"
    ),
)

# Spatial join with Norway land area
shp_path = (
    r"/home/jovyan/shared/common/01_datasets/spatial/vector/Fennoscandia_Countries.shp"
)
gdf = gpd.read_file(shp_path).query("NAME == 'Norway'").to_crs("epsg:25833")
site_gdf = gpd.sjoin(site_gdf, gdf, how="left", predicate="intersects")
site_gdf['direct'] = site_gdf['NAME'].fillna(1).replace({'Norway':0})
site_gdf = site_gdf.drop(columns=['index_right', 'NAME'])

# Print summary
direct = site_gdf.query("direct == 1")
land = site_gdf.query("direct == 0")
print('Direct:', len(direct))
print('Land:', len(land))

direct.sort_values('kilderefnr').head(50)

Direct: 24
Land: 39


  site_gdf['direct'] = site_gdf['NAME'].fillna(1).replace({'Norway':0})


Unnamed: 0,site_id,kilderefnr,site_name,site_x_utm33,site_y_utm33,outlet_x_utm33,outlet_y_utm33,geometry,direct
6,3101.0055.01,0101AL07,Remmendalen avløpsanlegg,291726.78537,6559128.0,291532.153379,6558785.0,POINT (291532.153 6558784.914),1
7,3103.0052.03,0104AL01,Kambo avløpsanlegg,255718.040625,6600681.0,255525.946599,6600990.0,POINT (255525.947 6600989.722),1
12,3107.0134.01,0106AL00,Øra avløpsanlegg,269663.26917,6567409.0,268769.453093,6567356.0,POINT (268769.453 6567356.48),1
8,3103.0153.01,0136AL00,Fuglevik avløpsanlegg,253652.727739,6590854.0,252657.971203,6590931.0,POINT (252657.971 6590930.675),1
63,3216.0024.01,0211AL47,Søndre Follo renseanlegg,256107.565063,6613539.0,253853.012315,6612885.0,POINT (253853.012 6612885.381),1
64,3218.0016.01,0214AL23,Nordre Follo renseanlegg,263160.17517,6631487.0,259928.435329,6633273.0,POINT (259928.435 6633272.865),1
34,3203.0059.01,0220AL01,Sentralrenseanlegg Vest (VEAS),247524.527452,6636968.0,248266.010514,6636757.0,POINT (248266.011 6636756.946),1
0,0301.0979.01,0301AL01,Bekkelaget renseanlegg med tilførselstuneller ...,263347.073035,6646048.0,262704.276918,6645807.0,POINT (262704.277 6645806.588),1
84,3301.0187.01,0602AL06,Solumstrand avløpsanlegg,233877.177595,6628837.0,234038.163322,6628972.0,POINT (234038.163 6628972.24),1
85,3301.0201.01,0602AL45,Muusøya avløpsanlegg,227804.509452,6633602.0,227827.364928,6633466.0,POINT (227827.365 6633466.105),1


This list is similar to Phil's, but not the same. However, since we aren't re-running the baseline in the marine model, I think it makes sense to use Phil's dataset. 