# Figures for Illustration of concepts

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats

In [None]:
import config as cfg

In [None]:
plt.rcParams.update({'xtick.labelsize': 'xx-large',
                     'ytick.labelsize': 'xx-large',
                     'axes.titlesize' : 'xx-large',
                     'axes.labelsize' : 'xx-large',
                    })
# {k:v for k,v in plt.rcParams.items() if 'tick' in k and 'size' in k}

## Imputation by random draw from normal distribution

- currently commenly used approach at NNF CPR for downstream experimentation task
- see also illustrations in [Lazar 2016, Figure 1](https://pubs.acs.org/doi/10.1021/acs.jproteome.5b00981#fig1)

In [None]:
mu = 25.0
stddev = 1.0

x = np.linspace(mu -5, mu + 5, num=101)

y_normal = scipy.stats.norm.pdf(x, loc=mu, scale=stddev)

mu_shifted = mu - (1.8*stddev)
stddev_shifted = 0.3*stddev
print(f"Downshifted: {mu_shifted = }, {stddev_shifted = }")
y_impute = scipy.stats.norm.pdf(x, loc=mu - (1.8*stddev), scale=0.3*stddev)

colors = plt.cm.viridis([0.25,0.75])    

fig, ax = plt.subplots(1, 1, figsize=(30, 15))

for y, c in zip([y_normal, y_impute], colors):
    ax.plot(x, y, color=c,)
    ax.fill_between(x, y, color=c)
    ax.set_xlabel('log2 intensity')
    ax.set_ylabel('density')
    ax.set_label("test")
    ax.legend(["original", "down shifted"])

In [None]:
fig.savefig(cfg.FIGUREFOLDER / 'illustration_normal_imputation')
fig.savefig(cfg.FIGUREFOLDER / 'illustration_normal_imputation_highres', dpi=600)

## Log transformations and errors

- what does log2 transformation mean for the error

If the error is calculated in log2 space, the larger values have to be predicted with higher precision (in comparison to the original space)

In [None]:
def get_original_error_log2(x:float, error_log2:float):
    return 2 ** (np.log2(x) + error_log2) - x 

print(
    f"{get_original_error_log2(1e9, 0.5) = :,.1f}",
    f"{get_original_error_log2(1e8, 0.5) = :,.1f}",
    sep='\n'
    )

If we try to find the rel log2 error equalling the original error, this can be done by 
equating: 

$$ \exp(\ln(a)+e) - a = \exp(\ln(a)+e^*) - b $$

Setting $a$, $e$ and $b$ we want to solve for $e^*$, which gives

$$ e^* = \ln \left(\frac{\exp\big(\ln(a)+e\big) - a + b}{a}  \right)$$

In [None]:
def rel_error(measurment, log_error, other_measurment):
    numerator = 2 ** (np.log2(measurment) + log_error)
    numerator-=measurment
    numerator+=other_measurment
    
    denominator = other_measurment
    return np.log2(numerator / denominator)

rel_error = rel_error(1.e9, 0.5, 1e8)
print(f"{rel_error = :.3f}")

In [None]:
print(
    f"0.500 rel to 1e9: {get_original_error_log2(1e9, 0.5) :,.1f}",
    f"{rel_error:.3f} rel to 1e8: {get_original_error_log2(1e8, rel_error) :,.1f}",
    sep='\n'
    )

So the relative error of 0.5 for $10^9$ is five times larger for $10^8$ in the logspace, 
whereas the error in the original space is the same