In [31]:
import numpy as np
import pandas as pd

# PyRQP proof of concept

## Input parameters

In [32]:
# Flows
riv_flow_mean = 100
riv_flow_95pc = 20
dis_flow_mean = 20
dis_flow_sd = 8

# Water quality
riv_wq_mean = 2
riv_wq_sd = 1
dis_wq_mean = 15
dis_wq_sd = 7

# Correlations
corr_riv_dis_flow = 0.6
corr_riv_flow_wq = -0.3
corr_dis_flow_wq = -0.2

# Random seed for reproducibility
np.random.seed(42)

## Functions

In [33]:
def transform_log_to_normal(lg_mean, lg_sd):
    """ 
    Transformation from log mean and sd to normal 
    mean and sd using the method of moments
    """
    mean = np.log(lg_mean / ((1 + ((lg_sd**2) / (lg_mean**2))) ** 0.5))
    sd = (np.log(1 + (lg_sd**2) / (lg_mean**2))) ** 0.5
    return mean, sd

In [34]:
def calculate_covariance(corr, std_1, std_2):
    """
    This formula takes a correlation and two std
    and calculates the covariance
    """
    cov = corr * std_1 * std_2
    return cov

In [35]:
def calculate_log_mean_sd_from_95pc(lg_mean, lg_95pc):
    """
    Calculate the underlying normal sd from the lognormal
    mean and 95th low flow percentile
    """
    sd = (2.705543 + 2*np.log(lg_mean) - 2*np.log(lg_95pc)) ** 0.5 - 1.644854
    mean = np.log(lg_mean) - 0.5 * (sd**0.5)
    lg_sd = lg_mean * (np.exp(sd**2) - 1) ** 0.5
    return lg_mean, lg_sd

In [36]:
def calculate_multivariate_log_normal(
    mean1, std1, mean2, std2, mean3, std3, mean4, std4, corr1_2, corr1_3, corr2_4
):
    """ 
    The main equation in RQP to generate the lognormal 
    random multivariate dataset
    """
    # Transform parameters to normal
    mean1, std1 = transform_log_to_normal(mean1, std1)
    mean2, std2 = transform_log_to_normal(mean2, std2)
    mean3, std3 = transform_log_to_normal(mean3, std3)
    mean4, std4 = transform_log_to_normal(mean4, std4)

    # Calculate covariances
    cov1_2 = calculate_covariance(corr1_2, std1, std2)
    cov1_3 = calculate_covariance(corr1_3, std1, std3)
    cov2_4 = calculate_covariance(corr2_4, std2, std4)

    # Build covariance matrix
    cov_matrix = [
        [std1**2, cov1_2, cov1_3, 0],
        [cov1_2, std2**2, 0, cov2_4],
        [cov1_3, 0, std3**2, 0],
        [0, cov2_4, 0, std4**2],
    ]
    cov_matrix = np.array(cov_matrix)

    # Generate normal random multivariate
    data = np.random.multivariate_normal(
        [mean1, mean2, mean3, mean4], cov_matrix, size=100000
    )

    # Transform to lognormal and build dataframe
    data = np.exp(data)
    df = pd.DataFrame(data, columns=["riv_flow", "dis_flow", "riv_qual", "dis_qual"])

    return df, cov_matrix

## Calculate downstream quality

In [37]:
# Retrieve river flow standard deviation from 95th low flow
_, riv_flow_sd = calculate_log_mean_sd_from_95pc(riv_flow_mean, riv_flow_95pc)

In [38]:
# Run main function to generate random data
df, cov_matrix = calculate_multivariate_log_normal(
    # Flow
    riv_flow_mean,
    riv_flow_sd,
    dis_flow_mean,
    dis_flow_sd,
    # Quality
    riv_wq_mean,
    riv_wq_sd,
    dis_wq_mean,
    dis_wq_sd,
    # Correlations
    corr_riv_dis_flow,
    corr_riv_flow_wq,
    corr_dis_flow_wq,
)

In [39]:
# Calculate downstream flow and quality
df = df.eval("ds_flow = riv_flow + dis_flow")
df = df.eval("ds_qual = (riv_flow * riv_qual + dis_flow * dis_qual) / ds_flow")

### Descriptive statistics

In [40]:
stats = df.agg(["mean", "std"]).T
stats["90pc"] = df.quantile(0.90)
stats["95pc"] = df.quantile(0.95)
stats["99pc"] = df.quantile(0.99)
stats

Unnamed: 0,mean,std,90pc,95pc,99pc
riv_flow,99.951257,93.495176,200.593719,267.428607,463.378501
dis_flow,19.955546,7.955544,30.349277,34.917133,45.342075
riv_qual,1.996556,1.000006,3.279178,3.889167,5.345516
dis_qual,15.009708,6.998242,24.068877,28.207937,37.95063
ds_flow,119.906803,98.05645,226.845576,295.633356,498.468296
ds_qual,4.737805,2.268376,7.610431,8.9616,12.380569


In [41]:
df.corr()

Unnamed: 0,riv_flow,dis_flow,riv_qual,dis_qual,ds_flow,ds_qual
riv_flow,1.0,0.544786,-0.228121,-0.001259,0.997683,-0.45737
dis_flow,0.544786,1.0,-0.001764,-0.181875,0.600576,-0.19199
riv_qual,-0.228121,-0.001764,1.0,0.001222,-0.217653,0.532113
dis_qual,-0.001259,-0.181875,0.001222,1.0,-0.015957,0.576689
ds_flow,0.997683,0.600576,-0.217653,-0.015957,1.0,-0.451671
ds_qual,-0.45737,-0.19199,0.532113,0.576689,-0.451671,1.0


## Backward calculation

In [42]:
# Objective downstream quality
target = 6
percentile = 0.9

### Solution by iteration

In [43]:
# Calculate adjustment factors and scale distribution
scale = target / df["ds_qual"].quantile(percentile)
print(scale)
df["ds_qual_target"] = df["ds_qual"] * scale
# Recalculate discharge quality target
df = df.eval("dis_qual_target = (ds_flow * ds_qual_target - riv_flow * riv_qual) / dis_flow")
# Recalculate dis_qual_target based keeping CoV
adj_factor = df["dis_qual_target"].mean() / df["dis_qual"].mean()
df["dis_qual_target"] = df["dis_qual"] * adj_factor
# Re-calculate ds water quality
df = df.eval("ds_qual_target = (riv_flow * riv_qual + dis_flow * dis_qual_target) / ds_flow")
# Check the scale
scale = target / df["ds_qual_target"].quantile(percentile)

0.7883915854705645


In [44]:
while round(scale, 4) != 1:
    print(scale)
    # Calculate adjustment factors and scale distribution
    df["ds_qual_target"] = df["ds_qual_target"] * scale
    # Recalculate discharge quality target
    df = df.eval("dis_qual_target = (ds_flow * ds_qual_target - riv_flow * riv_qual) / dis_flow")
    # Recalculate dis_qual_target based keeping CoV
    adj_factor = df["dis_qual_target"].mean() / df["dis_qual"].mean()
    df["dis_qual_target"] = df["dis_qual"] * adj_factor
    # Re-calculate ds water quality
    df = df.eval("ds_qual_target = (riv_flow * riv_qual + dis_flow * dis_qual_target) / ds_flow")
    # Check the scale
    scale = target / df["ds_qual_target"].quantile(percentile)

1.0455808406236173
0.9909438070207206
1.0016086570370895
0.9996822206108644


### Results

In [45]:
stats = df.agg(["mean", "std"]).T
stats["90pc"] = df.quantile(0.90)
stats["95pc"] = df.quantile(0.95)
stats["99pc"] = df.quantile(0.99)
stats["99.5pc"] = df.quantile(0.995)
stats["cov"] = stats["std"] / stats["mean"]
stats

Unnamed: 0,mean,std,90pc,95pc,99pc,99.5pc,cov
riv_flow,99.951257,93.495176,200.593719,267.428607,463.378501,574.784732,0.935408
dis_flow,19.955546,7.955544,30.349277,34.917133,45.342075,50.01226,0.398663
riv_qual,1.996556,1.000006,3.279178,3.889167,5.345516,6.023642,0.500866
dis_qual,15.009708,6.998242,24.068877,28.207937,37.95063,42.489107,0.466248
ds_flow,119.906803,98.05645,226.845576,295.633356,498.468296,612.834816,0.817772
ds_qual,4.737805,2.268376,7.610431,8.9616,12.380569,14.069518,0.478782
ds_qual_target,3.815179,1.704521,5.999879,6.990539,9.400879,10.590069,0.446773
dis_qual_target,10.705074,4.991216,17.166164,20.118182,27.066768,30.303655,0.466248
