## Script for baseflow separation

Stream flow input data needed. Converted from xlsx to csv, and then exported as xlsx.

Exports file that includes both total flow and baseflow.

This new file will be used in all the BFMs and TFMs.

Joaquim Altimiras Granel, 2024

In [None]:
# Libraries

import baseflow
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from datetime import datetime

## Part 1: Additional data processing (transform input file to csv)

In [None]:
# Preprocess input data. Use xlsx-file from inputs and place the output file in the models folder. Only needed the first time.

# Input file path
file_path = input("Enter path for file (DAY):")

# Read file
flow_data = pd.read_excel(file_path, usecols=[0, 1], names=["date_time", "flow"], parse_dates=['date_time'])

# Snippet the data between start and end date
start_date = '2022-12-01'
end_date = '2024-03-19'

# Filter the data between start_date and end_date
flow_data = flow_data.loc[(flow_data['date_time'] >= start_date) & (flow_data['date_time'] <= end_date)]

# Count missing before interpolation
missing_count_before = flow_data['flow'].isna().sum()

# Interpolate missing values
flow_data['flow'] = flow_data['flow'].interpolate()

# Count missing after interpolation
missing_count_after = flow_data['flow'].isna().sum()

# Calculate the number of interpolated values
interpolated_count = missing_count_before - missing_count_after

# Print the number of interpolated values
print(f"Number of interpolated values: {interpolated_count}")

# Check if there are still any missing values not filled (e.g., at the beginning or end of the series)
if missing_count_after > 0:
    print(f"Warning: There are {missing_count_after} missing values that were not interpolated.")

# Ask for the output directory path
output_dir = input("Enter the directory path to save the file: ")

# Ask for the output file name
output_file_name = input("Enter the output file name (without .csv extension): ")

# Ensure the file name has .csv extension
if not output_file_name.endswith('.csv'):
    output_file_name += '.csv'

# Combine directory and file name to create full path
output_file_path = os.path.join(output_dir, output_file_name)

# Save the data in a csv file at the specified location
flow_data.to_csv(output_file_path, index=False, sep=',', float_format='%.6f')

# Confirmation
print(f"Data exported successfully to: {output_file_path}")

## Part 2: Baseflow separation procedure

In [None]:
# Visualize input data

input_file = input("Enter path for input file:")

data = pd.read_csv(input_file, parse_dates=['date_time'], index_col='date_time')

# Snippet the data between start and end date
start_date = '2022-12-01'
end_date = '2024-03-19'

# Filter the data between start_date and end_date
data2 = data.loc[start_date:end_date]

# Print the first few rows of the DataFrame to verify it's loaded correctly
print(data.head())

# Plotting the data
plt.figure(figsize=(10, 5))
plt.plot(data.index, data['flow'], label='Data', color='blue', linewidth=2)
plt.title('Data Over Time')
plt.xlabel('Date')
plt.ylabel('Data Value')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# IMPORTANT - Remove yearly filter from a specific function (clean_streamflow) in the package

import baseflow.utils

def custom_clean_streamflow(date, Q):
    has_value = np.isfinite(Q)
    Q = np.abs(Q[has_value])
    date = date[has_value]
    return Q, date

# Override the original clean_streamflow with your custom function
baseflow.utils.clean_streamflow = custom_clean_streamflow

import baseflow

In [None]:
# Baseflow separation

input_file = input("Enter path for input file:")
Q, date = baseflow.load_streamflow(input_file)

b, KGEs = baseflow.separation(Q, date, area=9947129) # Catchment area in m^2

best_method_name = b.dtype.names[KGEs.argmax()]
best_baseflow_series = b[best_method_name]

print(f'Best Method: {b.dtype.names[KGEs.argmax()]}')

In [None]:
# SPECIAL: Visualize all methods

# Baseflow separation

input_file = input("Enter path for input file:")
Q, date = baseflow.load_streamflow(input_file)

# Convert structured date array to pandas datetime series
date = pd.to_datetime({'year': date['Y'], 'month': date['M'], 'day': date['D']})

# Perform baseflow separation
b, KGEs = baseflow.separation(Q, date, area=9947129)  # Catchment area in m^2

# Plotting all baseflow separation methods and their KGE values
fig, axs = plt.subplots(len(b.dtype.names), 1, figsize=(10, 5*len(b.dtype.names)))  # Adjust size as necessary

for i, method in enumerate(b.dtype.names):
    axs[i].plot(date, Q, label='Total Flow', color='blue', alpha=0.6)
    axs[i].plot(date, b[method], label='Baseflow - ' + method, color='green')
    axs[i].set_title(f'{method} - KGE: {KGEs[i]:.3f}')
    axs[i].legend()

# Automatically adjust the plot layout
plt.tight_layout()
plt.show()

# Finding and printing the best method based on KGE
best_method_index = KGEs.argmax()
best_method_name = b.dtype.names[best_method_index]
best_baseflow_series = b[best_method_name]

print(f'Best Method: {best_method_name}')

In [None]:
# See all methods

import baseflow.methods
import pkgutil

# This will list all modules under the baseflow.methods package
available_methods = [name for _, name, _ in pkgutil.iter_modules(baseflow.methods.__path__)]

print("Available baseflow separation methods:")
for method in available_methods:
    print(method)

In [None]:
# Export the results

import os
import pandas as pd
from datetime import datetime
import baseflow.methods
import pkgutil

# Ensure 'available_methods' is defined. This would typically be a list of methods from baseflow.methods.
available_methods = [method.name for method in pkgutil.iter_modules(baseflow.methods.__path__)]

# User selects the method
selected_method = input("Enter the name of the method you want to export: ")

# Validate user selection
if selected_method not in available_methods:
    raise ValueError(f"Selected method {selected_method} is not available.")

# Assuming `date` is already in the correct format
# Convert structured date array to pandas datetime series
if isinstance(date, pd.DatetimeIndex):
    date_datetime = date
else:
    date_datetime = pd.to_datetime(date)

# Create a DataFrame using the selected method
df = pd.DataFrame({
    'Date': date_datetime,
    'Total Streamflow': Q,
    f'Baseflow ({selected_method})': b[selected_method]
})

# Get the output directory and file name from the user
output_dir = input("Enter the directory path to save the file: ")
file_name = input("Enter the name for the output Excel file (without extension): ")

# Ensure the file name has .xlsx extension
if not file_name.endswith('.xlsx'):
    file_name += '.xlsx'

# Combine directory and file name to create full path
output_file_path = os.path.join(output_dir, file_name)

# Save to Excel
df.to_excel(output_file_path, index=False)

print(f'File saved successfully at {output_file_path}')