In [1]:
import pandas as pd

# TEOTIL3: Tidy annual data

## Part 2: Small wastewater

This notebooks estimates inflows and outflow of nutrients from "small" wastewater sites (<50 p.e.) based on population data provided by Gisle Berge at SSB.

## Workflow overview

 1. Gisle provides a dataset with the number of people in each kommune connected to each type of "small" wastewater treatment plant.
    
 2. Inflows are estimated by multiplying the number of people by typical discharges per person in g/pers/day. Default values used by the model are [here](https://github.com/NIVANorge/teotil3/blob/main/data/nutrient_discharges_per_person.csv)).

 3. Outflows are estimated by assuming typical treatment efficiencies for each type of site, based on [this report](https://nibio.brage.unit.no/nibio-xmlui/handle/11250/3183748) from NIBIO.

 4. Annual input files are saved to Excel.

In [2]:
# Final year for which emissions will be estimated
final_year = 2023

# Raw datasets to use i.e.
# /home/jovyan/shared/common/teotil3/point_data/raw_data_delivered_{deliv_year}
deliv_year = 2024

## 1. Default discharges per person

The default values come from a [book published by Norsk Vann](https://va-kompetanse.no/butikk/laerebok-i-vann-og-avlopsteknikk/). See e-mail from Gisle received 27.03.2025 for a screenshot of the relevant page.

These factors are used for both large and small wastewater sites.

In [3]:
# Get a dict mapping parameters to per-person discharges
url = r"https://raw.githubusercontent.com/NIVANorge/teotil3/refs/heads/main/data/nutrient_discharges_per_person.csv"
pers_df = pd.read_csv(url)

# Remove 'KOF', as not used for small wastewater
pers_df = pers_df.query("parameter != 'kof'")

# Convert to dict
pers_dict = dict(zip(pers_df["parameter"], pers_df["g_per_pers_per_day"]))
pers_dict

{'totn': 12.0, 'totp': 1.8, 'bof5': 60.0, 'ss': 70.0}

## 2. Treatment efficiencies

A [recent report](https://nibio.brage.unit.no/nibio-xmlui/handle/11250/3183748) by NIBIO provides updated treatment efficiencies for N, P, BOF5 and SS (but not KOF). The report suggests ideal treatment efficiencies for properly maintained facilities, but also provides "reduction factors" by which the ideal efficiencies should be reduced to represent typical conditions (i.e. the real efficiency of normally maintained facilities of average age). Based on the new data, the expected mean efficiency is calculated as:

$$E_{mean} = E_{ideal} (1 - \frac{R}{100})$$

Where $E_{mean}$ is the expected true average efficiency (%); $E_{ideal}$ is the reference or "best case" efficiency for a well maintained facility (%); and $R$ is the efficiency reduction factor (%).

The old and new efficiencies for each "small" treatment type can be found in `treatment_efficiencies_small_wastewater.csv`.

In [4]:
# Read treatment efficiency data
url = r"https://raw.githubusercontent.com/NIVANorge/teotil3/refs/heads/main/data/treatment_efficiencies_small_wastewater.csv"
eff_df = pd.read_csv(url)
eff_df.head()

Unnamed: 0,ssb_code,ssb_desc,teotil_type,totn_eff_old_pct,totp_eff_old_pct,totn_eff_new_pct,totp_eff_new_pct,bof5_eff_new_pct,ss_eff_new_pct,totn_redfac_new_pct,totp_redfac_new_pct,bof5_redfac_new_pct,ss_redfac_new_pct
0,FASTBOURENS2,Urenset utslipp,Direkte utslipp,0,0,0,0,0,0,0,0,0,0
1,FASTBOSLAM2,Slamavskiller uten etterfiltrering,Slamavskiller,5,5,10,10,25,60,25,25,25,25
2,FASTBOINFILT2,Slamavskiller med infiltrasjon (stedegne masser),Infiltrasjonsanlegg,20,75,40,90,90,100,10,20,10,5
3,FASTBOSAND2,Slamavskiller med sandfilter (tilførte masser),Sandfilteranlegg,15,15,30,75,90,95,10,75,20,10
4,FASTBOBIO2,Minirenseanlegg - biologisk,Biologisk,10,15,30,60,90,80,10,20,10,20


## 3. Estimate discharges

Using population data from SSB.

In [5]:
# Path to SSB data
fpath = f"/home/jovyan/shared/common/teotil3/point_data/raw_data_delivered_{deliv_year}/tmp_teotil_smaa_anlegg_2002_{final_year}.sdv"

In [6]:
# Read SSB data
df = pd.read_csv(fpath, sep=";", encoding="cp1252")

# Read mapping for SSB codes => TEOTIL3 codes for små anlegg
url = r"https://raw.githubusercontent.com/NIVANorge/teotil3/refs/heads/main/data/ssb_sma_anlegg_type_codes.csv"
ssb_df = pd.read_csv(url)

for idx, row in eff_df.iterrows():
    site_type = row["ssb_code"]
    df[site_type] = df[site_type].fillna(0)
    for par, pers_contrib in pers_dict.items():
        # Estimate typical efficiency, allowing for reduction factors
        red_fac = row[f"{par}_redfac_new_pct"] / 100
        treat_eff = row[f"{par}_eff_new_pct"] * (1 - red_fac) / 100

        # Estimate discharge
        df[f"TEO3_{par.upper()}_{site_type}"] = (
            365.25 * df[site_type] * pers_contrib * (1 - treat_eff) / 1000
        )

# Tidy
cols = ["KOMMUNE_NR", "aargang"] + [
    col for col in df.columns if col.startswith("TEO3_")
]
df = df[cols]
df.columns = [col.upper() for col in cols]
df.rename({"KOMMUNE_NR": "KOMMUNENR", "AARGANG": "year"}, axis="columns", inplace=True)

# Melt
df = df.melt(id_vars=["KOMMUNENR", "year"])
df["variable"] = df["variable"].str[5:]
df[["variable", "ssb_code"]] = df["variable"].str.split("_", n=1, expand=True)

# Check SSB codes in new data are valid
valid_codes = set(ssb_df["ssb_code"].unique())
new_codes = set(df["ssb_code"].unique())
assert new_codes.issubset(valid_codes)

# Convert to wide
df = df.dropna(subset="value")
df = pd.merge(df, ssb_df, how="left", on="ssb_code")
df["variable"] = df["variable"] + "-" + df["teotil_type"]
df = df.drop(columns=["ssb_code", "ssb_desc", "teotil_type"])
df = df.set_index(["KOMMUNENR", "year", "variable"]).unstack("variable").fillna(0)
df.columns = df.columns.get_level_values(1)
df.reset_index(inplace=True)
df.columns.name = ""
df["KOMMUNENR"] = df["KOMMUNENR"].astype(str).str.zfill(4)

df.head()

Unnamed: 0,KOMMUNENR,year,BOF5-Annen løsning,BOF5-Biologisk,BOF5-Biologisk og kjemisk,BOF5-Biologisk toalett,"BOF5-Biologisk toalett, gråvannsfilter",BOF5-Direkte utslipp,BOF5-Infiltrasjonsanlegg,BOF5-Kjemisk,...,"TOTP-Biologisk toalett, gråvannsfilter",TOTP-Direkte utslipp,TOTP-Infiltrasjonsanlegg,TOTP-Kjemisk,TOTP-Konstruert våtmark,TOTP-Sandfilteranlegg,TOTP-Slamavskiller,TOTP-Tett tank (for alt avløpsvann),TOTP-Tett tank for svartvann,"TOTP-Tett tank for svartvann, gråvannsfilter"
0,101,2002,0.0,0.0,782.8038,0.0,0.0,2805.12,1561.44375,703.9098,...,0.0,84.1536,69.03225,13.438278,0.0,71.045691,982.75626,0.0,131.989662,227.765334
1,101,2003,0.0,0.0,782.8038,0.0,0.0,2805.12,1561.44375,703.9098,...,0.0,84.1536,69.03225,13.438278,0.0,71.045691,982.75626,0.0,131.989662,227.765334
2,101,2004,0.0,0.0,782.8038,0.0,0.0,2805.12,1561.44375,703.9098,...,0.0,84.1536,69.03225,13.438278,0.0,71.045691,982.75626,0.0,131.989662,227.765334
3,101,2005,0.0,0.0,824.4423,0.0,0.0,2717.46,1548.9522,848.5488,...,0.0,81.5238,68.479992,16.199568,0.0,71.045691,977.282989,0.0,139.353102,58.973265
4,101,2006,0.0,0.0,878.57235,0.0,0.0,2454.48,1578.09915,887.1192,...,0.0,73.6344,69.768594,16.935912,0.0,71.045691,976.066706,0.0,139.169016,59.162282


In [7]:
# Save tidied data for each year
for year, ann_df in df.groupby("year"):
    if (year >= 2013) and (year <= final_year):
        xl_path = f"/home/jovyan/shared/common/teotil3/point_data/{year}/small_wastewater_{year}_raw.xlsx"
        ann_df.to_excel(xl_path, index=False)