In [525]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy
import seaborn as sns

# PyRQP (forward calculation)

## Input parameters

In [526]:
# River
riv_flow_mean = 20
riv_flow_sd = 5
riv_wq_mean = 0.1
riv_wq_sd = 0.05

# Discharge
dis_flow_mean = 5
dis_flow_sd = 1
dis_wq_mean = 1
dis_wq_sd = 0.5

# Correlations
corr_riv_flow_wq = 0.6
corr_dis_flow_wq = 0.6
corr_riv_dis_flow = 0.6

## Prepare all functionality

In [527]:
def calculate_covariance(corr, std_1, std_2):
    """
    This formula takes a correlation and two std
    and calculates the covariance matrix
    """
    x = corr * np.sqrt(std_1**2 * std_2**2)
    cov = [[std_1**2, x], [x, std_2**2]]
    return cov

In [528]:
def calculate_multivariate_normal(mean_1, mean_2, cov):
    """
    This formula takes a covariation matrix and two
    mean values and calculates a two random series of
    multivariate variables of the specified size
    """
    data = np.random.multivariate_normal([mean_1, mean_2], cov, size=10000)
    return data

In [529]:
def transform_log_to_normal(lg_mean, lg_sd):
    """ """
    mean = np.log(lg_mean / ((1 + ((lg_sd**2) / (lg_mean**2))) ** 0.5))
    sd = (np.log(1 + (lg_sd**2) / (lg_mean**2))) ** 0.5
    return mean, sd

In [530]:
def calculate_multivariate_log_normal(corr, mean_1, mean_2, std_1, std_2):
    """ """
    # Transform to 'normal' statistical moments
    mean_1, std_1 = transform_log_to_normal(mean_1, std_1)
    mean_2, std_2 = transform_log_to_normal(mean_2, std_2)
    # Calculate covariance
    cov = calculate_covariance(corr, std_1, std_2)
    # Calcualate random multivariate data
    pts = calculate_multivariate_normal(mean_1, mean_2, cov)
    # Transform to lognormal
    pts = np.exp(pts)
    return pts

In [531]:
# Example
# pts = calculate_multivariate_log_normal(
#     corr_riv_flow_wq, riv_flow_mean, riv_wq_mean, riv_flow_sd, riv_wq_sd
# )
# pts = pd.DataFrame(pts, columns=["Flow", "Quality"])
# pts.mean()
# pts.std()
# pts.corr()

## Calculate correlated lognormal random variable for each correlated series of data

In [532]:
# Correlation between river flow and quality
pts_1 = calculate_multivariate_log_normal(
    corr_riv_flow_wq, riv_flow_mean, riv_wq_mean, riv_flow_sd, riv_wq_sd
)
pts_1 = pd.DataFrame(pts_1, columns=["River flow", "River quality"]).sort_values(
    "River flow"
)

# Correlation between discharge flow and quality
pts_2 = calculate_multivariate_log_normal(
    corr_dis_flow_wq, dis_flow_mean, dis_wq_mean, dis_flow_sd, dis_wq_sd
)
pts_2 = pd.DataFrame(
    pts_2, columns=["Discharge flow", "Discharge quality"]
).sort_values("Discharge flow")

# Correlation between river flow and discharge flow
pts_3 = calculate_multivariate_log_normal(
    corr_riv_dis_flow, riv_flow_mean, dis_flow_mean, riv_flow_sd, dis_flow_sd
)
pts_3 = pd.DataFrame(pts_3, columns=["River flow", "Discharge flow"]).sort_values(
    "River flow"
)

## Build master dataframe with all data

In [533]:
df = pd.concat(
    [pts_1, pts_2["Discharge quality"], pts_3["Discharge flow"]],
    axis=1,
    ignore_index=True,
)
df.columns = ["River_flow", "River_quality", "Discharge_quality", "Discharge_flow"]

## Calculate downstream quality column

In [534]:
df = df.eval("Downstream_flow = River_flow + Discharge_flow")

In [535]:
df = df.eval(
    "Downstream_wq = (River_flow * River_quality + Discharge_flow * Discharge_quality) / Downstream_flow"
)

## Calculate descriptive statistics

In [539]:
df[["Downstream_flow", "Downstream_wq"]].describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Downstream_flow,10000.0,24.90471,5.050273,11.733161,21.313506,24.319397,27.85709,57.578773
Downstream_wq,10000.0,0.284778,0.115803,0.057715,0.205901,0.261068,0.337676,1.529244


In [537]:
df[["Downstream_flow", "Downstream_wq"]].quantile(0.05)

Downstream_flow    17.774755
Downstream_wq       0.147690
Name: 0.05, dtype: float64

In [538]:
df[["Downstream_flow", "Downstream_wq"]].quantile(0.95)

Downstream_flow    33.932806
Downstream_wq       0.500293
Name: 0.95, dtype: float64

# TODO

- Transform into a proper GitHub library
- Add extra functionality
    - Backward calculations
    - Decay (with optional decay rate and time or distance plus alpha/beta values)
    - Visualisation (e.g., histograms)