## Integration der Berechnung der klimatischen Kenngrößen

In [None]:
from glob import glob
import pandas as pd
import numpy as np
from scipy.optimize import curve_fit
import os

Festlegung der Gebiets-ID

In [None]:
ID = 1

# dataframe to store results
df_results = pd.DataFrame()

p_mean

In [None]:
# read camels de hydromet timeseries data
df = pd.read_csv(f"../output_data/camels_de/timeseries/CAMELS_DE_hydromet_timeseries_{ID}.csv")

# calculate p_mean from precipitation_mean
p_mean = df["precipitation_mean"].mean()

# add to results
df_results.loc[ID, "p_mean"] = round(p_mean, 2)

df_results

p_seasonality

In [None]:
# Define the sine function to fit
def sine_curve(day_of_year, mean_value, amplitude, phase_shift):
    return mean_value * (1 + amplitude * np.sin(2 * np.pi * (day_of_year - phase_shift) / 365.25))

In [None]:
# convert date to datetime
df["date"] = pd.to_datetime(df["date"])

# Create a time variable that represents the day of the year
df["day_of_year"] = df["date"].dt.dayofyear

# Get the mean precipitation and temperature
average_precipitation = df["precipitation_mean"].mean()
average_temperature = df["temperature_mean"].mean()

# Get the first guess for the phase shift
initial_phase_shift_guess_prec = 90 - df["precipitation_mean"].idxmax() * 30
initial_phase_shift_guess_prec = initial_phase_shift_guess_prec % 360

initial_phase_shift_guess_temp = -90

# Fit a sine curve to the precipitation and temperature data
optimized_parameters_prec, parameter_covariances_prec = curve_fit(sine_curve, df["day_of_year"], df["precipitation_mean"], p0=[average_precipitation, 0.4, initial_phase_shift_guess_prec])
optimized_parameters_temp, parameter_covariances_temp = curve_fit(sine_curve, df["day_of_year"], df["temperature_mean"], p0=[average_temperature, 5, initial_phase_shift_guess_temp])

# The phase shifts are optimized_parameters[2]
precipitation_seasonality = optimized_parameters_prec[2]
temperature_seasonality = optimized_parameters_temp[2]

# The amplitudes are optimized_parameters[1]
amplitude_prec = optimized_parameters_prec[1]
amplitude_temp = optimized_parameters_temp[1]

# Calculate p_seasonality
p_seasonality = amplitude_prec * np.sign(amplitude_temp) * np.cos(2 * np.pi * (precipitation_seasonality - temperature_seasonality) / 365.25)

# Add to results
df_results.loc[ID, "p_seasonality"] = round(p_seasonality, 2)

df_results

frac_snow

In [None]:
# fraction of precipitation falling as snow (for days colder than 0°C)
sum_precip_snow = df[df["temperature_mean"] < 0]["precipitation_mean"].sum()
sum_precip_water = df[df["temperature_mean"] >= 0]["precipitation_mean"].sum()
frac_snow = sum_precip_snow / (sum_precip_snow + sum_precip_water)

# add to results
df_results.loc[ID, "frac_snow"] = round(frac_snow, 2)

df_results

high_prec_freq

In [None]:
# Average precipitation
p_mean = df["precipitation_mean"].mean()

# Number of days with precipitation >= 5 times the average precipitation
n_days_high_freq = len(df[df["precipitation_mean"] >= 5 * p_mean]) / len(df) * 365.25

 # add to results
df_results.loc[ID, "high_prec_freq"] = round(n_days_high_freq, 2)

df_results.head()

high_prec_dur

In [None]:
# initialize variables to keep track of high precipitation event
high_precip_streaks = []
current_streak = 0


# mean precipitation
p_mean = df["precipitation_mean"].mean()

# iterate over the DataFrame's rows
for precip in df["precipitation_mean"]:
    if precip >= 5 * p_mean:
        # if the day's precipitation is higher 5 times mean precipitation, increment the current streak
        current_streak += 1
    elif current_streak > 0:
        # if the day's precipitation is not high and there's a current streak, add it to the list of all streaks and reset it
        high_precip_streaks.append(current_streak)
        current_streak = 0

# if there's a current streak at the end of the DataFrame, add it to the list of all streaks
if current_streak > 0:
    high_precip_streaks.append(current_streak)

# calculate the average streak length for the station
average_streak_length = sum(high_precip_streaks) / len(high_precip_streaks) if high_precip_streaks else 0

# add to results
df_results.loc[ID, "high_prec_dur"] = round(average_streak_length, 2)

df_results.head()

low_prec_freq

In [None]:
 # number of days < 1 mm of precipitation
n_days_low_freq = len(df[df["precipitation_mean"] < 1]) / len(df) * 365.25

# add to results
df_results.loc[ID, "low_prec_freq"] = round(n_days_low_freq, 2)

df_results.head()

low_prec_dur

In [None]:
# initialize variables to keep track of high precipitation event
low_precip_streaks = []
current_streak = 0

# iterate over the DataFrame's rows
for precip in df["precipitation_mean"]:
    if precip < 1:
        # if the day's precipitation is higher 5 times mean precipitation, increment the current streak
        current_streak += 1
    elif current_streak > 0:
        # if the day's precipitation is not high and there's a current streak, add it to the list of all streaks and reset it
        low_precip_streaks.append(current_streak)
        current_streak = 0

# if there's a current streak at the end of the DataFrame, add it to the list of all streaks
if current_streak > 0:
    low_precip_streaks.append(current_streak)

# calculate the average streak length for the station
average_streak_length = sum(low_precip_streaks) / len(low_precip_streaks) if low_precip_streaks else 0

# add to results
df_results.loc[ID, "low_prec_dur"] = round(average_streak_length, 2)

df_results.head()

df_results

Ergebnisse speichern

In [None]:
# file path to save the results
data_file = "../output_data/camels_de/CAMELS_DE_climatic_attributes.csv"

# Check if the file already exists
if os.path.exists(data_file):
    df_existing = pd.read_csv(data_file, index_col="gauge_id")
    df_combined = pd.concat([df_existing, df_results])
else:
    df_combined = df_results

# Save the results
df_combined.to_csv(data_file, index_label="gauge_id")