In [675]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy
import seaborn as sns

# PyRQP

## Input parameters

In [676]:
# Flows
riv_flow_mean = 100
riv_flow_sd = 93  # New code uses 5th percentile
dis_flow_mean = 20
dis_flow_sd = 8

# Water quality
riv_wq_mean = 2
riv_wq_sd = 1
dis_wq_mean = 15
dis_wq_sd = 7

# Correlations
corr_riv_dis_flow = 0.6
corr_riv_flow_wq = -0.3
corr_dis_flow_wq = -0.2

## Prepare all functionality

In [677]:
def transform_log_to_normal(lg_mean, lg_sd):
    """ """
    mean = np.log(lg_mean / ((1 + ((lg_sd**2) / (lg_mean**2))) ** 0.5))
    sd = (np.log(1 + (lg_sd**2) / (lg_mean**2))) ** 0.5
    return mean, sd

In [678]:
def calculate_covariance(corr, std_1, std_2):
    """
    This formula takes a correlation and two std
    and calculates the covariance matrix
    """
    cov = corr * std_1 * std_2
    return cov

In [679]:
def calculate_multivariate_log_normal(
    mean1, std1, mean2, std2, mean3, std3, mean4, std4, corr1_2, corr1_3, corr2_4
):
    """ """
    # Transform to normal
    mean1, std1 = transform_log_to_normal(mean1, std1)
    mean2, std2 = transform_log_to_normal(mean2, std2)
    mean3, std3 = transform_log_to_normal(mean3, std3)
    mean4, std4 = transform_log_to_normal(mean4, std4)

    # Calculate covariances
    cov1_2 = calculate_covariance(corr1_2, std1, std2)
    cov1_3 = calculate_covariance(corr1_3, std1, std3)
    cov2_4 = calculate_covariance(corr2_4, std2, std4)

    # Build covariance matrix
    cov_matrix = [
        [std1**2, cov1_2, cov1_3, 0],
        [cov1_2, std2**2, 0, cov2_4],
        [cov1_3, 0, std3**2, 0],
        [0, cov2_4, 0, std4**2],
    ]
    cov_matrix = np.array(cov_matrix)

    # Generate normal random multivariate
    data = np.random.multivariate_normal(
        [mean1, mean2, mean3, mean4], cov_matrix, size=100000
    )

    # Transform to lognormal
    data = np.exp(data)

    df = pd.DataFrame(data, columns=["riv_flow", "dis_flow", "riv_qual", "dis_qual"])

    return df, cov_matrix

## Calculate downstream quality

In [680]:
df, cov_matrix = calculate_multivariate_log_normal(
    # Flow
    riv_flow_mean,
    riv_flow_sd,
    dis_flow_mean,
    dis_flow_sd,
    # Quality
    riv_wq_mean,
    riv_wq_sd,
    dis_wq_mean,
    dis_wq_sd,
    # Correlations
    corr_riv_dis_flow,
    corr_riv_flow_wq,
    corr_dis_flow_wq,
)

In [681]:
df = df.eval("ds_flow = riv_flow + dis_flow")
df = df.eval("ds_qual = (riv_flow * riv_qual + dis_flow * dis_qual) / ds_flow")

## Calculate descriptive statistics

In [682]:
stats = df.describe().T
stats["90pc"] = df.quantile(0.90)
stats["95pc"] = df.quantile(0.95)
stats["99pc"] = df.quantile(0.99)

In [683]:
stats

Unnamed: 0,count,mean,std,min,25%,50%,75%,max,90pc,95pc,99pc
riv_flow,100000.0,99.956187,92.374639,2.521841,43.016509,73.041981,124.45962,2064.332547,201.546061,268.247714,458.933344
dis_flow,100000.0,19.997746,8.015485,3.246961,14.281015,18.548984,24.090281,93.796116,30.457077,34.997604,45.598149
riv_qual,100000.0,1.996452,0.998429,0.214244,1.299685,1.784159,2.453765,12.321954,3.270071,3.872803,5.377014
dis_qual,100000.0,10.524189,4.924756,1.056923,7.073859,9.532929,12.874399,70.55131,16.878054,19.868973,26.88672
ds_flow,100000.0,119.953933,96.954023,8.13122,59.673003,92.506676,147.428722,2112.349863,227.942061,297.688363,493.253653
ds_qual,100000.0,3.776737,1.683795,0.473305,2.599897,3.451796,4.589575,24.945739,5.934064,6.921409,9.338501


In [692]:
df.corr()  # This is important information that should be part of the analysis

Unnamed: 0,riv_flow,dis_flow,riv_qual,dis_qual,ds_flow,ds_qual,dis_qual_target,ds_qual_target
riv_flow,1.0,0.542093,-0.229982,0.00254,0.997584,-0.443201,0.006972,-0.443201
dis_flow,0.542093,1.0,-0.001097,-0.179132,0.599161,-0.175155,-0.178062,-0.175155
riv_qual,-0.229982,-0.001097,1.0,-0.00324,-0.21921,0.620811,-0.000744,0.620811
dis_qual,0.00254,-0.179132,-0.00324,1.0,-0.012389,0.550539,0.999972,0.550539
ds_flow,0.997584,0.599161,-0.21921,-0.012389,1.0,-0.436748,-0.008078,-0.436748
ds_qual,-0.443201,-0.175155,0.620811,0.550539,-0.436748,1.0,0.549397,1.0
dis_qual_target,0.006972,-0.178062,-0.000744,0.999972,-0.008078,0.549397,1.0,0.549397
ds_qual_target,-0.443201,-0.175155,0.620811,0.550539,-0.436748,1.0,0.549397,1.0


## Backward calculation

In [685]:
# Targets in log-normal
target_mean = 3.8

# Calculate scale factor and scale
scale = target_mean / df["ds_qual"].mean()
df["ds_qual"] = df["ds_qual"] * scale

In [686]:
df = df.eval("dis_qual_target = (ds_flow * ds_qual - riv_flow * riv_qual) / dis_flow")

In [687]:
df = df.eval("ds_qual_target = (riv_flow * riv_qual + dis_flow * dis_qual_target) / ds_flow")

In [688]:
df

Unnamed: 0,riv_flow,dis_flow,riv_qual,dis_qual,ds_flow,ds_qual,dis_qual_target,ds_qual_target
0,64.785037,21.303749,1.079106,12.462789,86.088786,3.920139,12.559766,3.920139
1,57.782862,18.234880,0.997808,10.717216,76.017741,3.349773,10.802703,3.349773
2,108.034704,24.527751,1.262377,9.093755,132.562455,2.728101,9.184015,2.728101
3,103.611654,17.073312,1.470083,15.211507,120.684967,3.435112,15.360153,3.435112
4,30.373477,6.947968,1.625232,3.996276,37.321444,2.079368,4.064652,2.079368
...,...,...,...,...,...,...,...,...
99995,70.656387,12.783299,2.413165,13.006787,83.439686,4.061011,13.169057,4.061011
99996,46.526807,20.921144,1.439277,6.154326,67.447951,2.919674,6.211949,2.919674
99997,297.859097,28.565692,0.873885,11.615046,326.424789,1.825024,11.742714,1.825024
99998,41.425062,17.464655,2.477896,11.556428,58.889718,5.202121,11.663810,5.202121


In [689]:
stats = df.describe().T
stats["90pc"] = df.quantile(0.90)
stats["95pc"] = df.quantile(0.95)
stats["99pc"] = df.quantile(0.99)
stats["cov"] = stats["std"] / stats["mean"]

In [690]:
# TODO Need to maintain COV for discharge
# https://stats.stackexchange.com/questions/212690/the-product-of-two-lognormal-random-variables

In [691]:
stats

Unnamed: 0,count,mean,std,min,25%,50%,75%,max,90pc,95pc,99pc,cov
riv_flow,100000.0,99.956187,92.374639,2.521841,43.016509,73.041981,124.45962,2064.332547,201.546061,268.247714,458.933344,0.924151
dis_flow,100000.0,19.997746,8.015485,3.246961,14.281015,18.548984,24.090281,93.796116,30.457077,34.997604,45.598149,0.400819
riv_qual,100000.0,1.996452,0.998429,0.214244,1.299685,1.784159,2.453765,12.321954,3.270071,3.872803,5.377014,0.500102
dis_qual,100000.0,10.524189,4.924756,1.056923,7.073859,9.532929,12.874399,70.55131,16.878054,19.868973,26.88672,0.467946
ds_flow,100000.0,119.953933,96.954023,8.13122,59.673003,92.506676,147.428722,2112.349863,227.942061,297.688363,493.253653,0.80826
ds_qual,100000.0,3.8,1.694166,0.47622,2.615911,3.473058,4.617845,25.099391,5.970614,6.964041,9.396021,0.445833
dis_qual_target,100000.0,10.642112,4.959186,1.078347,7.168519,9.644241,13.006878,71.145382,17.036824,20.044952,27.103392,0.465996
ds_qual_target,100000.0,3.8,1.694166,0.47622,2.615911,3.473058,4.617845,25.099391,5.970614,6.964041,9.396021,0.445833


## Backward calculation (not sure about this)

https://stats.stackexchange.com/questions/393410/how-to-modify-the-mean-and-variance-dispersion-of-a-given-distribution

https://stats.stackexchange.com/questions/381988/scaling-percentiles-of-log-normal-distribution