In [11]:
import pandas as pd
from os import listdir
from os.path import isfile, join
import functools as ft
import shapefile
from datetime import datetime, timedelta
import numpy as np
import utm
import sys

sys.path.append("..")
from tools import data_path, output_path, cache_path

### Data origin: 
Data was downloaded by hand from their webservice: 
https://gld.lhw-sachsen-anhalt.de/

In [12]:
mypath = data_path + "/sachsen_anhalt/"
onlyfiles = [mypath + f for f in listdir(mypath) if not isfile(join(mypath, f))]

In [13]:
stack = []
for x in onlyfiles:
    stations = [x + "/" + f for f in listdir(x) if not isfile(join(x, f))]
    for y in stations: 
        files = [y + "/"  + f for f in listdir(y) if "Metadaten"  in f]
        if len(files) == 1: 
            d = pd.read_csv(files[0], sep=";")[2:]
            d.index = d["Metadatum"].values
            d.drop(columns=["Metadatum"], inplace=True)
            stack.append(d)

In [14]:
meta_data = pd.concat(stack,axis=1).T
meta_data.index = meta_data["Pegelkennziffer"].values
meta_data.drop_duplicates(inplace=True)
meta_data.drop(columns=meta_data.columns[:3],inplace=True)
meta_data["Entfernung zur Mündung"]= meta_data["Entfernung zur Mündung"].str[:-3].str.replace(",", ".").astype(float)

In [15]:
meta_data.reset_index(inplace=True)

In [16]:
# add data from meta export.
sf = shapefile.Reader(
    data_path
    + "/Export/Pegel Wasserstand-Durchfluss/Pegel Wasserstand-Durchfluss"
)
records = sf.records()
columns = [x[0] for x in sf.fields[1:]]
full_meta = pd.DataFrame([list(x) for x in records], columns=columns)

In [17]:
meta_data = meta_data.merge(full_meta, left_on="index", right_on="kennziffer", how="left")

In [18]:
stack = []
name_stack = []
double = 0
for x in onlyfiles:
    stations = [x + "/" + f for f in listdir(x) if not isfile(join(x, f))]
    for y in stations: 
        files = [y + "/"  + f for f in listdir(y) if "Metadaten" not in f]
        if len(files) == 1: 
            name = files[0].split(",")[0].split("/")[-1]
            d = pd.read_csv(files[0], sep=";")[2:]
            d.columns = ["DT", "Q"]
            # dt fix so we can convert
            d.loc[d["DT"].str.len() != 19, "DT"] +=  " 00:00:00"
            d["DT"]= pd.to_datetime(d["DT"], format="%d.%m.%Y %H:%M:%S")
            d["Q"] = d["Q"].str.replace(",", ".").astype(float)
            if name not in name_stack:
                d.columns = ["datetime", name]
                name_stack.append(name)
                stack.append(d)
            else:
                double+=1
        else:
            print(files)

In [19]:
stack = [x for x in stack if len(x) > 0]

In [20]:
df_final = ft.reduce(lambda left, right: pd.merge(left, right, on='datetime', how='outer'), stack)

In [21]:
df_final = df_final[df_final.datetime.dt.year < 2024]
df_final = df_final[df_final.datetime.dt.year > 2018]

In [22]:
df_final.columns = ["datetime"] + [x + "_sa" for x in df_final.columns[1:]]

In [23]:
t = pd.to_datetime(np.arange(datetime(2019,1,1, 0,0), datetime(2023,12,31,23,46), timedelta(minutes=15)).astype(datetime))

In [24]:
df_final.index = df_final["datetime"]
df_final.drop(columns="datetime", inplace=True)

In [25]:
# No data here so drop.
[x for x in meta_data["index"] if x + "_sa" not in df_final.columns]

['597008']

In [26]:
meta_data[meta_data["index"] == '597008'].index[0]

np.int64(62)

In [27]:
meta_data = meta_data.drop(index = meta_data[meta_data["index"] == '597008'].index[0]).reset_index(drop=True)

## Bring meta into format

In [28]:
ids = meta_data["index"].values

In [29]:
meta_data = meta_data[["Gewässer","Entfernung zur Mündung", "rw", "hw", "pnp"]]
meta_data.index = ids
meta_data.index.name = "ID"

In [30]:
meta_data["pnp"] = (
    meta_data["pnp"]
    .str.replace("m+NN", "",regex=False)
    .str.replace(" m+HN", "",regex=False)
    .str.replace(" DHHN 92", "",regex=False)
    .str.replace(" DHHN 12", "",regex=False)
)

In [31]:
meta_data.loc["578370", "pnp"] = None

In [32]:
meta_data["pnp"] = meta_data["pnp"].astype(float)

In [33]:
meta_data.columns  = ["R","D", "X", "Y", "H"]

In [34]:
# coordinate parsing
for ind, line in meta_data.iterrows():
    try:
        meta_data.loc[ind, ["X", "Y"]] = utm.to_latlon(
                int(line["X"]), int(line["Y"]), 32, "U"
            )
    except:
        print(ind)

In [35]:
meta_data["QD"] = -1
meta_data["QH"] = -1
meta_data["QX"] = -1
meta_data["QY"] = -1
meta_data["QR"] = -1

In [36]:
meta_data.loc[~(meta_data["X"].isnull()), "QX"] = 0
meta_data.loc[~(meta_data["Y"].isnull()), "QY"] = 0
meta_data.loc[~(meta_data["D"].isnull()), "QD"] = 0
meta_data.loc[~(meta_data["R"].isnull()), "QR"] = 0
meta_data.loc[~(meta_data["H"].isnull()), "QH"] = 0

In [37]:
meta_data.to_csv(output_path + "/saxony_anhalt_meta_data.csv")

In [38]:
df_final.to_csv(output_path + "saxony_anhalt_processed.csv")