In [1147]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy
import seaborn as sns

# PyRQP

## Input parameters

In [1148]:
# Flows
riv_flow_mean = 100
riv_flow_sd = 93  # New code uses 5th percentile
dis_flow_mean = 20
dis_flow_sd = 8

# Water quality
riv_wq_mean = 2
riv_wq_sd = 1
dis_wq_mean = 15
dis_wq_sd = 7

# Correlations
corr_riv_dis_flow = 0.6
corr_riv_flow_wq = -0.3
corr_dis_flow_wq = -0.2

## Prepare all functionality

In [1149]:
def transform_log_to_normal(lg_mean, lg_sd):
    """ """
    mean = np.log(lg_mean / ((1 + ((lg_sd**2) / (lg_mean**2))) ** 0.5))
    sd = (np.log(1 + (lg_sd**2) / (lg_mean**2))) ** 0.5
    return mean, sd

In [1150]:
def calculate_covariance(corr, std_1, std_2):
    """
    This formula takes a correlation and two std
    and calculates the covariance matrix
    """
    cov = corr * std_1 * std_2
    return cov

In [1151]:
def calculate_multivariate_log_normal(
    mean1, std1, mean2, std2, mean3, std3, mean4, std4, corr1_2, corr1_3, corr2_4
):
    """ """
    # Transform to normal
    mean1, std1 = transform_log_to_normal(mean1, std1)
    mean2, std2 = transform_log_to_normal(mean2, std2)
    mean3, std3 = transform_log_to_normal(mean3, std3)
    mean4, std4 = transform_log_to_normal(mean4, std4)

    # Calculate covariances
    cov1_2 = calculate_covariance(corr1_2, std1, std2)
    cov1_3 = calculate_covariance(corr1_3, std1, std3)
    cov2_4 = calculate_covariance(corr2_4, std2, std4)

    # Build covariance matrix
    cov_matrix = [
        [std1**2, cov1_2, cov1_3, 0],
        [cov1_2, std2**2, 0, cov2_4],
        [cov1_3, 0, std3**2, 0],
        [0, cov2_4, 0, std4**2],
    ]
    cov_matrix = np.array(cov_matrix)

    # Generate normal random multivariate
    data = np.random.multivariate_normal(
        [mean1, mean2, mean3, mean4], cov_matrix, size=100000
    )

    # Transform to lognormal
    data = np.exp(data)

    df = pd.DataFrame(data, columns=["riv_flow", "dis_flow", "riv_qual", "dis_qual"])

    return df, cov_matrix

## Calculate downstream quality

In [1152]:
df, cov_matrix = calculate_multivariate_log_normal(
    # Flow
    riv_flow_mean,
    riv_flow_sd,
    dis_flow_mean,
    dis_flow_sd,
    # Quality
    riv_wq_mean,
    riv_wq_sd,
    dis_wq_mean,
    dis_wq_sd,
    # Correlations
    corr_riv_dis_flow,
    corr_riv_flow_wq,
    corr_dis_flow_wq,
)

In [1153]:
df = df.eval("ds_flow = riv_flow + dis_flow")
df = df.eval("ds_qual = (riv_flow * riv_qual + dis_flow * dis_qual) / ds_flow")

## Calculate descriptive statistics

In [1154]:
stats = df.agg(["mean", "std"]).T
stats["90pc"] = df.quantile(0.90)
stats["95pc"] = df.quantile(0.95)
stats["99pc"] = df.quantile(0.99)
stats

Unnamed: 0,mean,std,90pc,95pc,99pc
riv_flow,100.113994,92.764728,202.159303,268.181671,460.06491
dis_flow,20.011945,8.007262,30.413576,34.932727,45.532806
riv_qual,1.994862,0.997699,3.264662,3.879874,5.356069
dis_qual,15.017385,7.014069,24.060626,28.222059,38.37605
ds_flow,120.125939,97.288842,228.503622,297.144872,493.783074
ds_qual,4.746556,2.285419,7.637795,9.054534,12.44363


In [1155]:
df.corr()  # This is important information that should be part of the analysis

Unnamed: 0,riv_flow,dis_flow,riv_qual,dis_qual,ds_flow,ds_qual
riv_flow,1.0,0.53562,-0.228727,-0.003625,0.997582,-0.460627
dis_flow,0.53562,1.0,-0.000601,-0.178881,0.593017,-0.191493
riv_qual,-0.228727,-0.000601,1.0,-0.001099,-0.21814,0.52639
dis_qual,-0.003625,-0.178881,-0.001099,1.0,-0.018179,0.58306
ds_flow,0.997582,0.593017,-0.21814,-0.018179,1.0,-0.454967
ds_qual,-0.460627,-0.191493,0.52639,0.58306,-0.454967,1.0


## Backward calculation

In [1156]:
target = 6
percentile = 0.9
# TODO Look at equation from notebook to transform from percentile to mean

In [1157]:
# Calculate scale factor and scale
scale = target / df["ds_qual"].quantile(percentile)
print(scale)
df["ds_qual_target"] = df["ds_qual"] * scale
# Recalculate discharge quality target
df = df.eval("dis_qual_target = (ds_flow * ds_qual_target - riv_flow * riv_qual) / dis_flow")
# Recalculate dis_qual_target based keeping CoV
adj_factor = df["dis_qual_target"].mean() / df["dis_qual"].mean()
df["dis_qual_target"] = df["dis_qual"] * adj_factor
# Re-calculate ds water quality
df = df.eval("ds_qual_target = (riv_flow * riv_qual + dis_flow * dis_qual_target) / ds_flow")
# Check the scale
scale = target / df["ds_qual_target"].quantile(0.9)

0.7855670280420233


In [1158]:
while round(scale, 3) != 1:
    print(scale)
    # Calculate scale factor and scale
    df["ds_qual_target"] = df["ds_qual_target"] * scale
    # Recalculate discharge quality target
    df = df.eval("dis_qual_target = (ds_flow * ds_qual_target - riv_flow * riv_qual) / dis_flow")
    # Recalculate dis_qual_target based keeping CoV
    adj_factor = df["dis_qual_target"].mean() / df["dis_qual"].mean()
    df["dis_qual_target"] = df["dis_qual"] * adj_factor
    # Re-calculate ds water quality
    df = df.eval("ds_qual_target = (riv_flow * riv_qual + dis_flow * dis_qual_target) / ds_flow")
    # Check the scale
    scale = target / df["ds_qual_target"].quantile(0.9)

1.0466389772775844
0.9915700226617462
1.0018435387833282


In [1161]:
stats = df.agg(["mean", "std"]).T
stats["90pc"] = df.quantile(0.90)
stats["95pc"] = df.quantile(0.95)
stats["99pc"] = df.quantile(0.99)
stats["99.5pc"] = df.quantile(0.995)
stats["cov"] = stats["std"] / stats["mean"]
stats

Unnamed: 0,mean,std,90pc,95pc,99pc,99.5pc,cov
riv_flow,100.113994,92.764728,202.159303,268.181671,460.06491,559.259464,0.926591
dis_flow,20.011945,8.007262,30.413576,34.932727,45.532806,50.111918,0.400124
riv_qual,1.994862,0.997699,3.264662,3.879874,5.356069,6.052525,0.500134
dis_qual,15.017385,7.014069,24.060626,28.222059,38.37605,42.795005,0.467063
ds_flow,120.125939,97.288842,228.503622,297.144872,493.783074,591.211972,0.80989
ds_qual,4.746556,2.285419,7.637795,9.054534,12.44363,13.903919,0.48149
ds_qual_target,3.815545,1.711832,6.002681,7.030035,9.412873,10.523106,0.448647
dis_qual_target,10.685485,4.990797,17.120122,20.081152,27.306134,30.450401,0.467063


In [1160]:
# https://stats.stackexchange.com/questions/212690/the-product-of-two-lognormal-random-variables
# https://stats.stackexchange.com/questions/627427/given-two-rvs-x-and-y-if-x-y-z-is-it-possible-to-change-the-mean-and-s?noredirect=1#comment1170511_627427
# https://stats.stackexchange.com/questions/381988/scaling-percentiles-of-log-normal-distribution
# https://stats.stackexchange.com/questions/344825/how-do-i-find-new-standard-deviation-from-two-means-and-their-sd

# TODO Try to do the same but converting to normal distribution first
# TODO Edit the question in CrossValidates, say that you need to scale the distribution
# https://stats.libretexts.org/Bookshelves/Probability_Theory/Probability_Mathematical_Statistics_and_Stochastic_Processes_(Siegrist)/05%253A_Special_Distributions/5.12%253A_The_Lognormal_Distribution

## Backward calculation (not sure about this)

https://stats.stackexchange.com/questions/393410/how-to-modify-the-mean-and-variance-dispersion-of-a-given-distribution

https://stats.stackexchange.com/questions/381988/scaling-percentiles-of-log-normal-distribution