In [324]:
import numpy as np
import pandas as pd

# PyRQP proof of concept

A simple notebook that contains the bare bones of the methodology to show how RQP works

## Input parameters

In [339]:
# Flows
riv_flow_mean = 100
riv_flow_95pc = 20
dis_flow_mean = 20
dis_flow_sd = 8

# Water quality
riv_wq_mean = 2
riv_wq_sd = 1
dis_wq_mean = 15
dis_wq_sd = 7

# Correlations
corr_riv_dis_flow = 0.66  # 0.6
corr_riv_flow_wq = -0.41  # -0.3
corr_dis_flow_wq = -0.23  # -0.2
corr_riv_flow_dis_wq = 0

# https://stats.stackexchange.com/questions/132855/expectation-variance-and-correlation-of-a-bivariate-lognormal-distribution

# Random seed for reproducibility
np.random.seed(42)

## Functions

In [326]:
def transform_log_to_normal(lg_mean, lg_sd):
    """ 
    Transformation from log mean and sd to normal 
    mean and sd using the method of moments
    """
    mean = np.log(lg_mean / ((1 + ((lg_sd**2) / (lg_mean**2))) ** 0.5))
    sd = (np.log(1 + (lg_sd**2) / (lg_mean**2))) ** 0.5
    return mean, sd

In [327]:
def calculate_covariance(corr, std_1, std_2):
    """
    This formula takes a correlation and two std
    and calculates the covariance
    """
    cov = corr * std_1 * std_2
    return cov

In [328]:
def calculate_log_mean_sd_from_95pc(lg_mean, lg_95pc):
    """
    Calculate the underlying normal sd from the lognormal
    mean and 95th low flow percentile
    """
    sd = (2.705543 + 2*np.log(lg_mean) - 2*np.log(lg_95pc)) ** 0.5 - 1.644854
    mean = np.log(lg_mean) - 0.5 * (sd**0.5)
    lg_sd = lg_mean * (np.exp(sd**2) - 1) ** 0.5
    return lg_mean, lg_sd

In [329]:
def calculate_multivariate_log_normal(
    mean1, std1, mean2, std2, mean3, std3, mean4, std4, corr1_2, corr1_3, corr2_4, corr1_4
):
    """ 
    The main equation in RQP to generate the lognormal 
    random multivariate dataset
    """
    # Transform parameters to normal
    mean1, std1 = transform_log_to_normal(mean1, std1)
    mean2, std2 = transform_log_to_normal(mean2, std2)
    mean3, std3 = transform_log_to_normal(mean3, std3)
    mean4, std4 = transform_log_to_normal(mean4, std4)

    # Calculate covariances
    cov1_2 = calculate_covariance(corr1_2, std1, std2)  # River and discharge flow (0.6)
    cov1_3 = calculate_covariance(corr1_3, std1, std3)  # River flow and quality (-0.3)
    cov2_4 = calculate_covariance(corr2_4, std2, std4)  # Discharge flow and quality (-0.2)
    cov1_4 = calculate_covariance(corr1_4, std1, std4)  # River flow and discharge quality

    # Build covariance matrix
    cov_matrix = [
        [std1**2, cov1_2, cov1_3, cov1_4],
        [cov1_2, std2**2, 0, cov2_4],
        [cov1_3, 0, std3**2, 0],
        [cov1_4, cov2_4, 0, std4**2],
    ]
    cov_matrix = np.array(cov_matrix)

    # Generate normal random multivariate
    data = np.random.multivariate_normal(
        [mean1, mean2, mean3, mean4], cov_matrix, size=100000
    )

    # Transform to lognormal and build dataframe
    data = np.exp(data)
    df = pd.DataFrame(data, columns=["riv_flow", "dis_flow", "riv_qual", "dis_qual"])

    return df, cov_matrix

## Calculate downstream quality

In [330]:
# Retrieve river flow standard deviation from 95th low flow
_, riv_flow_sd = calculate_log_mean_sd_from_95pc(riv_flow_mean, riv_flow_95pc)

In [331]:
# Run main function to generate random data
df, cov_matrix = calculate_multivariate_log_normal(
    # Flow
    riv_flow_mean,
    riv_flow_sd,
    dis_flow_mean,
    dis_flow_sd,
    # Quality
    riv_wq_mean,
    riv_wq_sd,
    dis_wq_mean,
    dis_wq_sd,
    # Correlations
    corr_riv_dis_flow,
    corr_riv_flow_wq,
    corr_dis_flow_wq,
    corr_riv_flow_dis_wq
)

In [332]:
df.corr()

Unnamed: 0,riv_flow,dis_flow,riv_qual,dis_qual
riv_flow,1.0,0.602958,-0.304655,-0.001033
dis_flow,0.602958,1.0,0.000795,-0.20695
riv_qual,-0.304655,0.000795,1.0,0.00049
dis_qual,-0.001033,-0.20695,0.00049,1.0


In [333]:
# Calculate downstream flow and quality
df = df.eval("ds_flow = riv_flow + dis_flow")
df = df.eval("ds_qual = (riv_flow * riv_qual + dis_flow * dis_qual) / ds_flow")

### Results

In [334]:
stats = df.agg(["mean", "std"]).T
stats["90pc"] = df.quantile(0.90)
stats["95pc"] = df.quantile(0.95)
stats["99pc"] = df.quantile(0.99)
stats

Unnamed: 0,mean,std,90pc,95pc,99pc
riv_flow,99.952619,93.488291,200.493268,267.624302,464.252006
dis_flow,19.95144,7.957089,30.313332,34.941751,45.130895
riv_qual,1.997139,0.999869,3.273713,3.891704,5.332771
dis_qual,15.015398,6.996507,24.067248,28.219294,37.95083
ds_flow,119.904059,98.490864,227.598554,297.043225,500.290676
ds_qual,4.69385,2.240823,7.547841,8.885761,12.135748


## Backward calculation

In [335]:
# Objective downstream quality
target = 6
percentile = 0.9

### Solution by iteration

In [336]:
# Calculate adjustment factors and scale distribution
df["ds_qual_target"] = df["ds_qual"]
scale = target / df["ds_qual"].quantile(percentile)

while not (0.9999 <= scale <= 1.0001):
    print(scale)
    # Scale target distribution
    df["ds_qual_target"] = df["ds_qual_target"] * scale

    # Recalculate discharge quality target
    df = df.eval("dis_qual_target = (ds_flow * ds_qual_target - riv_flow * riv_qual) / dis_flow")

    # Recalculate discharge quality keeping CoV
    adj_factor = df["dis_qual_target"].mean() / df["dis_qual"].mean()
    df["dis_qual_target"] = df["dis_qual"] * adj_factor

    # Recalculate downstream water quality and check scale
    df = df.eval("ds_qual_target = (riv_flow * riv_qual + dis_flow * dis_qual_target) / ds_flow")
    scale = target / df["ds_qual_target"].quantile(percentile)

0.7949293567907915
1.0344580410112005
0.9949804583693662
1.0005144443711507


### Results

In [337]:
stats = df.agg(["mean", "std"]).T
stats["90pc"] = df.quantile(0.90)
stats["95pc"] = df.quantile(0.95)
stats["99pc"] = df.quantile(0.99)
stats["99.5pc"] = df.quantile(0.995)
stats["cov"] = stats["std"] / stats["mean"]
stats

Unnamed: 0,mean,std,90pc,95pc,99pc,99.5pc,cov
riv_flow,99.952619,93.488291,200.493268,267.624302,464.252006,573.776517,0.935326
dis_flow,19.95144,7.957089,30.313332,34.941751,45.130895,50.064685,0.398823
riv_qual,1.997139,0.999869,3.273713,3.891704,5.332771,6.003334,0.500651
dis_qual,15.015398,6.996507,24.067248,28.219294,37.95083,42.514328,0.465956
ds_flow,119.904059,98.490864,227.598554,297.043225,500.290676,613.5798,0.821414
ds_qual,4.69385,2.240823,7.547841,8.885761,12.135748,13.726118,0.477396
ds_qual_target,3.803303,1.711183,6.000382,7.007878,9.360349,10.500239,0.44992
dis_qual_target,10.815308,5.039452,17.335184,20.325825,27.335267,30.622268,0.465956
