In [1]:
import numpy as np
import pandas as pd
from typing import Set
import networkx as nx
from typing import List, Tuple
import string

In [2]:
def read_and_extract_multiple(file_path, target_strings):
    try:
        with open(file_path, 'r') as file:
            lines = file.readlines()

        # Convert lines to a NumPy array
        data = np.array([line.strip() for line in lines])

        result_2d_array = []

        for target_string in target_strings:
            # Find the index of the target string
            start_index = np.where(data == target_string)[0]

            if len(start_index) == 0:
                print(f"Target string '{target_string}' not found in the file.")
                result_2d_array.append(None)
                continue

            start_index = start_index[0]

            # Find the index of the next '####'
            end_index = np.where(data == '####')[0]

            if len(end_index) == 0:
                print("No '####' found after the target string.")
                result_2d_array.append(None)
                continue

            end_index = end_index[np.where(end_index > start_index)]

            if len(end_index) == 0:
                print("No '####' found after the target string.")
                result_2d_array.append(None)
                continue

            end_index = end_index[0]

            # Skip the first 5 lines after the target string
            start_index += 6

            # Extract the range between the target string and '####'
            result_array = data[start_index:end_index]
            result_2d_array.append(result_array)

        return result_2d_array

    except FileNotFoundError:
        print(f"File not found: {file_path}")
        return [None] * len(target_strings)

In [3]:
def calculate_metrics(data_list, variable_names):
    metrics_dict = {
        'Count': [],
        'Mean': [],
        'Std Dev': [],
        'Max-Min': [],
        'Coef of Var': []  # Added Coefficient of Variation
    }

    for idx, data_array in enumerate(data_list):
        if data_array is not None:
            # Convert the array to numpy floats for calculations
            data_array = data_array.astype(float)

            # Calculate metrics
            count_value = len(data_array)
            mean_value = np.mean(data_array)
            std_dev_value = np.std(data_array)
            max_min_diff = np.max(data_array) - np.min(data_array)
            coef_of_var = std_dev_value / mean_value  # Coefficient of Variation

            # Append values to the metrics dictionary
            metrics_dict['Count'].append(count_value)
            metrics_dict['Mean'].append(mean_value)
            metrics_dict['Std Dev'].append(std_dev_value)
            metrics_dict['Max-Min'].append(max_min_diff)
            metrics_dict['Coef of Var'].append(abs(coef_of_var))
        else:
            # If data is None, append NaN values
            metrics_dict['Count'].append(np.nan)
            metrics_dict['Mean'].append(np.nan)
            metrics_dict['Std Dev'].append(np.nan)
            metrics_dict['Max-Min'].append(np.nan)
            metrics_dict['Coef of Var'].append(np.nan)

    # Create a DataFrame from the metrics dictionary with variable names as index
    df = pd.DataFrame(metrics_dict, index=variable_names)

    return df

In [4]:
def filter_variables_coef_variation(dataframe, min_coef, max_coef):
    """
    Filter variables in a DataFrame based on the standard deviation range.

    Parameters:
    - dataframe (pd.DataFrame): The input DataFrame containing variables and their metrics.
    - min_std_dev (float): The minimum standard deviation threshold.
    - max_std_dev (float): The maximum standard deviation threshold.

    Returns:
    - pd.DataFrame: A DataFrame containing variables that meet the standard deviation criteria.
    """
    selected_variables = dataframe[
        (dataframe['Coef of Var'] >= min_coef) & 
        (dataframe['Coef of Var'] <= max_coef) 
    ]

    return selected_variables


In [5]:
def filter_variables_by_use(dataframe: pd.DataFrame, used: pd.DataFrame) -> pd.DataFrame:
    """
    Filter variables in a DataFrame based on the standard deviation range.

    Parameters:
    - dataframe (pd.DataFrame): The input DataFrame containing variables and their metrics.
    - used (pd.DataFrame): The DataFrame containing information about the use of variables.

    Returns:
    - pd.DataFrame: A DataFrame containing variables that meet the standard deviation criteria.
    """
    # Merge dataframes on index and the specified column
    merged_df = pd.merge(dataframe, used.iloc[:, [0]], left_index=True, right_index=True)

    # Filter rows where the merged column is equal to 1
    selected_variables = merged_df[merged_df.iloc[:, -1] == 1]
    
        # Drop the last column before returning
    selected_variables = selected_variables.iloc[:, :-1]

    return selected_variables

Par_Paired

In [6]:
def find_nseg(file_path: str, target_id: int) -> List[int]:
    '''Find upstream segments in a river network given
    in the form of a text file between "tosegment" and "#### 
    these represent the nseg index values to replace with parameters, the target ID is included in the list
    because the gauge is assumed to be at the end of the segments
    
    Parameters
    ----------
    file_path: str
        Path to the text file containing river segment information
    target_id: int
        Target ID for which upstream segments are desired
    
    Returns
    -------
    nodes: list of int
        IDs of upstream or ancestor segments
    '''
    with open(file_path, 'r') as file:
        lines = file.readlines()

    # Convert lines to a NumPy array
    data = np.array([line.strip() for line in lines])

    # Find the index of 'tosegment'
    start_index = np.where(data == 'tosegment')[0]

    if len(start_index) == 0:
        print("Target string 'tosegment' not found in the file.")
        return []

    start_index = start_index[0]

    # Find the index of the next '####'
    end_index = np.where(data == '####')[0]

    if len(end_index) == 0:
        print("No '####' found after 'tosegment'.")
        return []

    end_index = end_index[np.where(end_index > start_index)]

    if len(end_index) == 0:
        print("No '####' found after 'tosegment'.")
        return []

    end_index = end_index[0]

    start_index += 5
    
    result_array = data[start_index:end_index]

    # Create a DataFrame
    df = pd.DataFrame({'upstream_segments': result_array}, index=np.arange(1, len(result_array) + 1))
    
    # Convert index to strings
    df.index = df.index.astype(str)

    # Creating a DiGraph out of `df` object
    riv_graph = nx.from_pandas_edgelist(df.reset_index(), source='index', target='upstream_segments', create_using=nx.DiGraph)
    
    # Return nodes as a list of integers, including the target_id
    nodes = nx.ancestors(riv_graph, str(target_id))
    nodes_list = list(map(int, nodes)) + [target_id]

    return nodes_list

In [7]:
def remove_duplicates(list1, list2):
    '''Find unique values from two lists.
    
    Parameters
    ----------
    list1: list of int
        First list of integers
    list2: list of int
        Second list of integers
    
    Returns
    -------
    unique_values: list of int
        List of unique values from both lists
    '''
    set1 = set(list1)
    set2 = set(list2)
    
    # Find unique values from both sets
    unique_values = list(set1.symmetric_difference(set2))
    
    return unique_values

In [8]:
def combine_nseg(list1, list2):
    '''Merge two lists and find unique values.
    
    Parameters
    ----------
    list1: list of int
        First list of integers
    list2: list of int
        Second list of integers
    
    Returns
    -------
    unique_values: list of int
        List of unique values from both lists
    '''
    merged_list = list1 + list2
    unique_values = list(set(merged_list))
    
    return unique_values


In [9]:
def find_nhru(file_path: str, nseg: list) -> List[int]:
    ''' find the corresponding nhru index positions to the nseg
    
    Parameters
    ----------
    file_path: str
        Path to the text file containing river segment information
        
    nseg: list of index positions of upstream river segments
    
    Returns
    -------
    nhru: list of hru values corresponding to the segments
    
    '''
    with open(file_path, 'r') as file:
        lines = file.readlines()

    # Convert lines to a NumPy array
    data = np.array([line.strip() for line in lines])

    # Find the index of 'tosegment'
    start_index = np.where(data == 'hru_segment')[0]

    if len(start_index) == 0:
        print("Target string 'tosegment' not found in the file.")
        return []

    start_index = start_index[0]

    # Find the index of the next '####'
    end_index = np.where(data == '####')[0]

    if len(end_index) == 0:
        print("No '####' found after 'tosegment'.")
        return []

    end_index = end_index[np.where(end_index > start_index)]

    if len(end_index) == 0:
        print("No '####' found after 'tosegment'.")
        return []

    end_index = end_index[0]

    start_index += 5
    
    result_array = data[start_index:end_index]
    
    # Create a DataFrame
    df = pd.DataFrame({'corresponding hru': result_array}, index=np.arange(1, len(result_array) + 1))

    # Find corresponding nhru values for each nseg
    nhru_values = []
    for seg in nseg:
        # Find index positions where seg is present in the first column
        indices = np.where(df['corresponding hru'] == str(seg))[0] + 1  # Adding 1 to convert to 1-based index
        nhru_values.extend(indices)

    return nhru_values


In [10]:
def write_nseg(array: np.ndarray, dataframe: pd.DataFrame, nhru_results: List[int], updated_strings: List[List[str]]) -> np.ndarray:
    '''Filter the DataFrame based on the 'Count' column values of 908 or 909.
    
    Parameters
    ----------
    array: np.ndarray
        NumPy array (to be updated).
    dataframe: pd.DataFrame
        DataFrame to be filtered.
    nhru_results: List[int]
        List of nhru values.
    updated_strings: List[List[str]]
        2D List to store updated strings (each row: [original_string, numerical_value, updated_string]).
    
    Returns
    -------
    updated_array: np.ndarray
        Updated NumPy array with index numbers rewritten as strings.
    '''
    # Filter the DataFrame based on 'Count' column values
    filtered_dataframe = dataframe[dataframe['Count'].isin([448, 449])]
    
    # Iterate through strings in the index of the filtered DataFrame
    for string_to_find in filtered_dataframe.index:
        # Find the index of 'tosegment'
        start_index = np.where(array == string_to_find)[0]

        if len(start_index) == 0:
            print(f"Target string '{string_to_find}' not found in the array.")
            continue

        start_index = start_index[0]

        # Find the index of the next '####'
        end_index = np.where(array == '####')[0]

        if len(end_index) == 0:
            print("No '####' found after the target string.")
            continue

        end_index = end_index[np.where(end_index > start_index)]

        if len(end_index) == 0:
            print("No '####' found after the target string.")
            continue

        end_index = end_index[0]

        # Skip the first 4 lines for nseg
        start_index += 5

        # Save the range between start and end index to a NumPy array
        result_array = np.arange(start_index, end_index + 1)

        # Iterate through the list of nhru_results and update the corresponding indices
        for nhru_value in nhru_results:
            # Map nhru_value to the corresponding index in result_array
            index_to_update = nhru_value - 1

            if index_to_update < 0 or index_to_update >= len(result_array):
                print(f"Index value '{nhru_value}' is out of range for '{string_to_find}'. Skipping.")
                continue

            # Save the numerical value being overwritten
            original_value = array[result_array[index_to_update]]

            # Update the array with the new string format
            updated_value = f'___{string_to_find}{nhru_value}_A'
            array[result_array[index_to_update]] = updated_value

            # Append the updated string to the 2D list
            updated_strings.append([string_to_find, original_value, updated_value])

    return array

In [11]:
def write_nhru(array: np.ndarray, dataframe: pd.DataFrame, nhru_results: List[int], updated_strings: List[List[str]]) -> np.ndarray:
    '''Filter the DataFrame based on the 'Count' column values of 908 or 909.
    
    Parameters
    ----------
    array: np.ndarray
        NumPy array (to be updated).
    dataframe: pd.DataFrame
        DataFrame to be filtered.
    nhru_results: List[int]
        List of nhru values.
    updated_strings: List[List[str]]
        2D List to store updated strings (each row: [original_string, numerical_value, updated_string]).
    
    Returns
    -------
    updated_array: np.ndarray
        Updated NumPy array with index numbers rewritten as strings.
    '''
    # Filter the DataFrame based on 'Count' column values
    filtered_dataframe = dataframe[dataframe['Count'].isin([908, 909])]
    
    # Iterate through strings in the index of the filtered DataFrame
    for string_to_find in filtered_dataframe.index:
        # Find the index of 'tosegment'
        start_index = np.where(array == string_to_find)[0]

        if len(start_index) == 0:
            print(f"Target string '{string_to_find}' not found in the array.")
            continue

        start_index = start_index[0]

        # Find the index of the next '####'
        end_index = np.where(array == '####')[0]

        if len(end_index) == 0:
            print("No '####' found after the target string.")
            continue

        end_index = end_index[np.where(end_index > start_index)]

        if len(end_index) == 0:
            print("No '####' found after the target string.")
            continue

        end_index = end_index[0]

        # Skip the first 4 lines for nhru
        start_index += 5

        # Save the range between start and end index to a NumPy array
        result_array = np.arange(start_index, end_index + 1)

        # Iterate through the list of nhru_results and update the corresponding indices
        for nhru_value in nhru_results:
            # Map nhru_value to the corresponding index in result_array
            index_to_update = nhru_value - 1

            if index_to_update < 0 or index_to_update >= len(result_array):
                print(f"Index value '{nhru_value}' is out of range for '{string_to_find}'. Skipping.")
                continue

            # Save the numerical value being overwritten
            original_value = array[result_array[index_to_update]]

            # Update the array with the new string format
            updated_value = f'___{string_to_find}{nhru_value}_A'
            array[result_array[index_to_update]] = updated_value

            # Append the updated string to the 2D list
            updated_strings.append([string_to_find, original_value, updated_value])

    return array

In [12]:
def write_nmonths(array: np.ndarray, dataframe: pd.DataFrame, nhru_results: List[int], updated_strings: List[List[str]]) -> np.ndarray:
    '''Filter the DataFrame based on the 'Count' column values of 908 or 909.
    
    Parameters
    ----------
    array: np.ndarray
        NumPy array (to be updated).
    dataframe: pd.DataFrame
        DataFrame to be filtered.
    nhru_results: List[int]
        List of nhru values.
    updated_strings: List[List[str]]
        2D List to store updated strings (each row: [original_string, numerical_value, updated_string]).
    
    Returns
    -------
    updated_array: np.ndarray
        Updated NumPy array with index numbers rewritten as strings.
    '''
    # Filter the DataFrame based on 'Count' column values
    filtered_dataframe = dataframe[dataframe['Count'].isin([10908, 10909])]
    
    # Iterate through strings in the index of the filtered DataFrame
    for string_to_find in filtered_dataframe.index:
        # Find the index of 'tosegment'
        start_index = np.where(array == string_to_find)[0]

        if len(start_index) == 0:
            print(f"Target string '{string_to_find}' not found in the array.")
            continue

        start_index = start_index[0]

        # Find the index of the next '####'
        end_index = np.where(array == '####')[0]

        if len(end_index) == 0:
            print("No '####' found after the target string.")
            continue

        end_index = end_index[np.where(end_index > start_index)]

        if len(end_index) == 0:
            print("No '####' found after the target string.")
            continue

        end_index = end_index[0]

        # Skip the first 5 lines for nmonths
        start_index += 6

        # Save the range between start and end index to a NumPy array
        result_array = np.arange(start_index, end_index + 1)

        # Iterate through the list of nhru_results and update the corresponding indices
        for nhru_value in nhru_results:
            # Map nhru_value to the corresponding index in result_array
            index_to_update = (nhru_value - 1) * 12

            if index_to_update < 0 or index_to_update + 11 >= len(array):
                print(f"Index value '{nhru_value}' is out of range for '{string_to_find}'. Skipping.")
                continue

            # Save the numerical values being overwritten
            original_values = array[result_array[index_to_update:index_to_update + 12]]

            for i in range(12):
                # Use letters A-L at the end instead of double digits
                updated_value = f'___{string_to_find}{nhru_value}_{string.ascii_uppercase[i]}'
                array[result_array[index_to_update + i]] = updated_value

                # Append the updated string to the 2D list
                updated_strings.append([string_to_find, original_values[i], updated_value])

    return array

In [13]:
def write_alt_input(array: np.ndarray, dataframe: pd.DataFrame, nhru_results: List[int], updated_strings: List[List[str]]) -> pd.DataFrame:
    '''
    
    Parameters
    ----------
    array: np.ndarray
        NumPy array (to be updated).
    dataframe: pd.DataFrame
        DataFrame to be filtered.
    nhru_results: List[int]
        List of nhru values.
    updated_strings: List[List[str]]
        2D List to store updated strings (each row: [original_string, numerical_value, updated_string]).
    
    Returns
    -------
    new_dataframe: pd.DataFrame
        New DataFrame with 'nhru' as the index and columns named after the row index in the filtered DataFrame, filled with 1.
    '''
    # Filter the DataFrame based on 'Count' column values
    filtered_dataframe = dataframe[dataframe['Count'].isin([10908, 10909])]
    
    # Create a new DataFrame with 'nhru' as the index and specified columns
    new_dataframe = pd.DataFrame(index=nhru_results, columns=filtered_dataframe.index.astype(str))
    
    # Fill all values in the new DataFrame with 1
    new_param= new_dataframe.fillna(1)
    
        # Extract column names as a list
    column_names = new_dataframe.columns.tolist()
    
    # Iterate through each column and populate with strings
    for column_name in new_dataframe.columns:
        for row_index in new_dataframe.index:
            value = f'___adjust_{column_name}_{row_index}_A'
            new_dataframe.at[row_index, column_name] = value
            updated_strings.append([column_name, new_dataframe.at[row_index, column_name], value])
    
    return new_param, new_dataframe

In [14]:
def obsin_segment(array: np.ndarray, seg: int, obs: int) -> np.ndarray:
    '''Replace the inflow to a segment with observed flow.
    
    Parameters
    ----------
    array: np.ndarray
        NumPy array (to be updated).
    seg: int
        Segment to replace.
    obs: int
        Index of observed flow.
    
    Returns
    -------
    updated_array: np.ndarray
        Updated NumPy array with index numbers replaced by observed flow.
    '''

    # Find the index of 'obsin_segment'
    start_index = np.where(array == 'obsin_segment')[0]

    if len(start_index) == 0:
        print("Target string 'obsin_segment' not found in the array.")
        return array

    start_index = start_index[0]

    # Find the index of the next '####'
    end_index = np.where(array == '####')[0]

    if len(end_index) == 0:
        print("No '####' found after 'obsin_segment'.")
        return array

    end_index = end_index[np.where(end_index > start_index)]

    if len(end_index) == 0:
        print("No '####' found after 'obsin_segment'.")
        return array

    end_index = end_index[0]

    # Skip the first 4 lines for nseg
    start_index += 5

    # Save the range between start and end index to a NumPy array
    result_array = np.arange(start_index, end_index + 1)

    # Map seg to the corresponding index in result_array
    index_to_update = seg - 1

    if index_to_update < 0 or index_to_update >= len(result_array):
        print(f"Index value '{seg}' is out of range for 'obsin_segment'. Skipping.")
        return array

    # Update the array with the observed flow value
    array[result_array[index_to_update]] = obs

    return array


In [15]:
def write_ostin_table(ostin_table: pd.DataFrame, updated_strings: np.ndarray) -> pd.DataFrame:
    '''
    Generate a new DataFrame based on specified conditions.

    Parameters
    ----------
    ostin_table : pd.DataFrame
        DataFrame containing 'ostin_table' data.

    updated_strings : np.ndarray
        2D array containing numerical values.

    Returns
    -------
    new_dataframe : pd.DataFrame
        A new DataFrame generated based on the specified conditions.
    '''
    # Create a DataFrame from the updated_strings data
    updated_strings_df = pd.DataFrame(updated_strings, columns=['Parameter', 'Original_Value', 'Updated_String'])

    # Set the 'Parameter' column as the index for quick lookups
    ostin_table.index.name = 'Parameter'
    updated_strings_df.set_index('Parameter', inplace=True)

    # Change the original values of 0 to 0.01
    updated_strings_df['Original_Value'] = pd.to_numeric(updated_strings_df['Original_Value'], errors='coerce')
    updated_strings_df['Original_Value'].replace(0, 0.01, inplace=True)

    for parameter_name in updated_strings_df.index.unique():
        # Find corresponding values in ostin_table
        if parameter_name in ostin_table.index:
            low_bound = ostin_table.loc[parameter_name, 'low_bound']
            upper_bound = ostin_table.loc[parameter_name, 'upper_bound']

            # Check if the low_bound is -9999
            if low_bound == -9999:
                original_values = updated_strings_df.loc[parameter_name, 'Original_Value']
                updated_strings_df.loc[parameter_name, 'Low_Bound'] = 0.8 * original_values
            else:
                updated_strings_df.loc[parameter_name, 'Low_Bound'] = low_bound

            # Check if the upper_bound is -9999
            if upper_bound == -9999:
                original_values = updated_strings_df.loc[parameter_name, 'Original_Value']
                updated_strings_df.loc[parameter_name, 'Upper_Bound'] = 1.2 * original_values
            else:
                updated_strings_df.loc[parameter_name, 'Upper_Bound'] = upper_bound

    # Set the "Updated_String" column as the index
    updated_strings_df = updated_strings_df.set_index('Updated_String')

    # Add three new columns filled with 'none'
    updated_strings_df['1'] = 'none'
    updated_strings_df['2'] = 'none'
    updated_strings_df['3'] = 'none'
    
    # optionally use keyword extract
    # updated_strings_df['Original_Value']= 'extract'
    
        # Replace NaN values in 'Original_Value' with 1
    updated_strings_df['Original_Value'].fillna(1, inplace=True)
    
    return updated_strings_df

Inputs

In [16]:
# Inputs
file_path = '/home/paulc600/scratch/prms_final/par_files/12_myparam.param' # ensure its the updated par file
output_file_path = './data/paired_myparam.param'
ostin_filepath= './written_ostIn.txt'
target_ID= 24
upstream_ID_1= 24 # to remove hrus upstream of a previous gauge
upstream_ID_2= 429 # to remove hrus upstream of a previous gauge
target_strings = ['adjmix_rain', 'carea_max', 'cecn_coef','emis_noppt','fastcoef_lin','freeh2o_cap',
                  'gwflow_coef','jh_coef','mann_n','potet_sublim','rad_trncf','radmax','rain_cbh_adj',
                  'slowcoef_sq','smidx_coef', 'snarea_thresh','snowinfil_max','snow_cbh_adj',
                  'soil2gw_max','soil_moist_max','soil_rechr_max_frac','ssr2gw_exp','ssr2gw_rate',
                  'tmax_allrain_offset','tmax_allsnow','tmax_cbh_adj','tmin_cbh_adj']
min_coef_variation = 0
max_coef_variation = 20
updated_strings = []

In [17]:
# Parameter ranges
low_bound= [0,-9999, 0, 0.757,-9999, 0.01,-9999,0, 0.001, 0.06,-9999, 0, 0, 0, 0.0001,-9999, 
            0, 0, -9999,-9999,-9999,0 , 0.01,0,0, 0, 0]
upper_bound= [2, -9999, 5.5, 1,-9999, 0.1,-9999,2, 0.15, 0.4,-9999, 2, 2, 1, 0.8,-9999, 
            20, 2, -9999, -9999,-9999,3,0.8, 2,2, 2, 2]
used= [1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 
                   0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0] # subtracting tmax_allsnow and cecn because of tiny coef of var
data = {
    'low_bound': low_bound,
    'upper_bound': upper_bound
}
# Create a DataFrame with target_strings as the index
ostin_table = pd.DataFrame(data, index=target_strings)
use_table= pd.DataFrame(used, index=target_strings)

In [18]:
values = read_and_extract_multiple(file_path, target_strings)

In [19]:
result_df = calculate_metrics(values, target_strings)

In [20]:
filter_variables_coef_variation = filter_variables_coef_variation(result_df, min_coef_variation, max_coef_variation)

In [21]:
# Display the selected variables
print("Selected variables based on coef of variation:")
print(filter_variables_coef_variation)

Selected variables based on coef of variation:
                     Count       Mean   Std Dev    Max-Min  Coef of Var
adjmix_rain          10908   1.016365  0.204827   0.800000     0.201529
carea_max              908   0.462356  0.252819   1.886716     0.546806
cecn_coef            10908   4.999627  0.443873   2.000000     0.088781
emis_noppt             908   0.832001  0.077010   0.242883     0.092560
fastcoef_lin           908   0.191484  0.133472   0.514845     0.697040
freeh2o_cap            908   0.052786  0.027615   0.090000     0.523143
gwflow_coef            908   0.020989  0.018417   0.092292     0.877490
jh_coef              10908   0.006254  0.002329   0.014163     0.372375
mann_n                 448   0.052369  0.040137   0.148844     0.766438
potet_sublim           908   0.469392  0.075677   0.537019     0.161223
rad_trncf              908   0.667957  0.273844   1.000000     0.409972
radmax               10908   0.759153  0.141742   0.492738     0.186711
rain_cbh_adj     

In [22]:
len(filter_variables_coef_variation)

27

In [23]:
selected_variables_by_use= filter_variables_by_use(result_df, use_table)

In [24]:
len(selected_variables_by_use)

12

In [25]:
# Display the selected variables
print("Selected variables based on use in StreamFlow Cal:")
print(selected_variables_by_use)

Selected variables based on use in StreamFlow Cal:
                     Count       Mean   Std Dev    Max-Min  Coef of Var
adjmix_rain          10908   1.016365  0.204827   0.800000     0.201529
cecn_coef            10908   4.999627  0.443873   2.000000     0.088781
emis_noppt             908   0.832001  0.077010   0.242883     0.092560
freeh2o_cap            908   0.052786  0.027615   0.090000     0.523143
gwflow_coef            908   0.020989  0.018417   0.092292     0.877490
potet_sublim           908   0.469392  0.075677   0.537019     0.161223
smidx_coef             908   0.019470  0.092222   0.797997     4.736545
soil2gw_max            908   0.227774  0.159264   0.583265     0.699220
soil_moist_max         908   3.654633  2.850470   9.997445     0.779961
soil_rechr_max_frac    908   0.692621  0.300670   0.978949     0.434105
tmax_allrain_offset  10908   7.114998  2.874634  10.000000     0.404025
tmax_allsnow         10908  33.753586  3.450426  13.476460     0.102224


Paired Parameter File

In [26]:
# Read Par File into a numpy array to convert to Paired    
with open(file_path, 'r') as file:
    lines = file.readlines()
par_paired = np.array([line.strip() for line in lines])

Define river segment list

In [27]:
# find upstream nseg index values which can be used to write nseg parameters
nseg= find_nseg(file_path, target_ID) 

In [28]:
len(nseg)

4

In [29]:
# find previous or upstream gauge
# previous_nseg1= find_nseg(file_path, upstream_ID_1)
# previous_nseg2= find_nseg(file_path, upstream_ID_2)

In [30]:
# remove previous gauge
# nseg= remove_duplicates(nseg, previous_nseg1)
# nseg= remove_duplicates(nseg, previous_nseg2)

In [31]:
# combine gauge nseg lists
#nseg= combine_nseg(nseg,nseg)
# nseg

Find nhru and write paired parameter file

In [32]:
nhru= find_nhru(file_path, nseg)

In [33]:
len(nhru)

8

In [34]:
par_paired= write_nseg(par_paired, filter_variables_coef_variation, nseg, updated_strings)

In [35]:
par_paired= write_nhru(par_paired, filter_variables_coef_variation, nhru, updated_strings)

In [36]:
# par_paired= write_nmonths(par_paired, filter_variables_coef_variation, nhru, updated_strings)

In [37]:
monthly, monthly_paired= write_alt_input(par_paired, filter_variables_coef_variation, nhru, updated_strings)

In [38]:
# Write DataFrames to text files
monthly.to_csv('./data/monthly.txt', sep=' ', index=True)
monthly_paired.to_csv('./data/paired_monthly.txt', sep=' ', index=True)

In [39]:
# replace inflow to segment with observed flow
# par_paired=obsin_segment(par_paired, 21, 0)
# par_paired=obsin_segment(par_paired, 353, 5)

In [40]:
# Save the parpaired to the specified file path
np.savetxt(output_file_path, par_paired, fmt='%s')

In [41]:
len(updated_strings)

212

Write ostin

In [42]:
ostin= write_ostin_table(ostin_table, updated_strings) 

In [43]:
ostin

Unnamed: 0_level_0,Original_Value,Low_Bound,Upper_Bound,1,2,3
Updated_String,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
___mann_n28_A,0.037624,0.001000,0.15,none,none,none
___mann_n29_A,0.140344,0.001000,0.15,none,none,none
___mann_n25_A,0.083048,0.001000,0.15,none,none,none
___mann_n24_A,0.139748,0.001000,0.15,none,none,none
___carea_max5_A,1.699380,1.359504,2.00,none,none,none
...,...,...,...,...,...,...
___adjust_tmin_cbh_adj_4_A,1.000000,0.000000,2.00,none,none,none
___adjust_tmin_cbh_adj_9_A,1.000000,0.000000,2.00,none,none,none
___adjust_tmin_cbh_adj_20_A,1.000000,0.000000,2.00,none,none,none
___adjust_tmin_cbh_adj_7_A,1.000000,0.000000,2.00,none,none,none


In [44]:
# Custom header and footer strings
top_half = """#Basic Configuration for Ostrich Program

#Essential variables
ProgramType ParallelDDS
ModelExecutable ./adj_obsin_calibrate_prms.py
ModelSubdir mod_
ObjectiveFunction GCOP
OstrichWarmStart no

#Template File Configuration
BeginFilePairs
data/paired_myparam.param ; data/myparam.param
data/paired_monthly.txt ; data/monthly.txt
EndFilePairs

BeginExtraFiles
EndExtraFiles

BeginExtraDirs
data
EndExtraDirs


BeginParams"""

In [45]:
bottom_half= """EndParams

#Output variables to analyze
BeginResponseVars
#name   filename            keyword         line    col     token
KGE ./data/average_kge.txt ; OST_NULL 0 1 ' '
EndResponseVars

BeginGCOP
CostFunction KGE
PenaltyFunction APM
EndGCOP

#DDS algorithm setup
BeginParallelDDSAlg
PerturbationValue 0.2
MaxIterations 50000
UseInitialParamValues
EndParallelDDSAlg
"""


In [46]:
# Write the DataFrame to a text file with header and footer strings
with open(ostin_filepath, 'w') as file:
    file.write(top_half + '\n')
    ostin.to_csv(file, header=False, sep=' ')  # You can change the separator if needed
    file.write(bottom_half)