In [None]:
# default_exp quantification

# Quantification

> Functions related to quantification

This notebook contains everything to perform quantification

Current ToDo here:

- Most of the functions are not very well described yet
- Introductory text to give an overview / relevant papers would be nice.

In [None]:
#hide
from nbdev.showdoc import *

## LFQ

Algorithms related to label-free quantifications are motivated by the [MaxLFQ paper](https://doi.org/10.1074/mcp.m113.031591).


TODO: Include weighing?
TODO: Check boundaries

## In Silico Test data

To test the performance of different approaches we resort to simulating peptide intensities and adjusting them with a simulated normalization. We can then use different approaches to recover the normalization.

In [None]:
#export
import random
import numpy as np

def gaussian(mu, sigma, grid):
    norm = 0.3989422804014327 / sigma
    return norm * np.exp(-0.5 * ((grid - mu) / sigma) ** 2)


def return_elution_profile(timepoint, sigma, n_runs):
    """
    Simulation of a Gaussian Elution Profile
    """
    return gaussian(timepoint, sigma, np.arange(0, n_runs))


def simulate_sample_profiles(n_peptides, n_runs, n_samples, threshold=0.2, use_noise=True):
    """
    Generate random profiles to serve as test data

    """
    abundances = np.random.rand(n_peptides)*10e7
    true_normalization = np.random.normal(loc=1, scale=0.1, size=(n_runs, n_samples))
    
    true_normalization[true_normalization<0] = 0
    
    true_normalization = true_normalization/np.max(true_normalization)

    maxvals = np.max(true_normalization, axis=1)

    elution_timepoints = random.choices(list(range(n_runs)), k=n_peptides)

    profiles = np.empty((n_runs, n_samples, n_peptides))
    profiles[:] = np.nan

    for i in range(n_peptides):

        elution_timepoint = elution_timepoints[i]
        abundance = abundances[i]

        profile = return_elution_profile(elution_timepoint, 1, n_runs)
        profile = profile/np.max(profile)
        profile = profile * abundance
        elution_profiles = np.tile(profile, (n_samples, 1)).T

        # Some gaussian noise
        if use_noise:
            noise = np.random.normal(1, 0.2, elution_profiles.shape)
            noisy_profile = noise * elution_profiles
        else:
            noisy_profile = elution_profiles
            
        #print(noisy_profile)

        normalized_profile = noisy_profile * true_normalization

        normalized_profile[normalized_profile < threshold] = 0
        normalized_profile[normalized_profile == 0] = np.nan
        

        profiles[:,:,i] = normalized_profile

    return profiles, true_normalization

## Delayed Normalization

In [None]:
#export
from numba import njit, prange

@njit  
def get_peptide_error(profile, normalization): 
    
    pep_ints = np.zeros(profile.shape[1])

    normalized_profile = profile*normalization

    for i in range(len(pep_ints)):
        pep_ints[i] = np.nansum(normalized_profile[:,i])

    pep_ints = pep_ints[pep_ints>0]

    # Loop through all combinations
    n = len(pep_ints)

    error = 0
    for i in range(n):
        for j in range(i+1,n):
            error += np.abs(np.log(pep_ints[i]/pep_ints[j]))**2
            
    return error


@njit(parallel=True)
def get_total_error_parallel(normalization, profiles):
    
    normalization = normalization.reshape(profiles.shape[:2])
    
    total_error = 0
    
    for index in prange(profiles.shape[2]):
        total_error += get_peptide_error(profiles[:,:, index], normalization)
    
    return total_error


def get_total_error(normalization, profiles):
    
    normalization = normalization.reshape(profiles.shape[:2])
    
    total_error = 0
    
    for index in range(profiles.shape[2]):
        total_error += get_peptide_error(profiles[:,:, index], normalization)
    
    return total_error

## Benchmarking different optimiziers

In [None]:
from scipy.optimize import minimize
from time import time
from scipy.optimize import least_squares
import pandas as pd


n_peptides = 100
n_runs = 10
n_samples = 3

profiles, true_normalization = simulate_sample_profiles(n_peptides, n_runs, n_samples)

methods = ['L-BFGS-B', 'TNC', 'SLSQP','trf']

results = []

for method in methods:
    
    start = time()
    
    if method in ['trf']:
        x0 = np.ones(profiles.shape[0] * profiles.shape[1])
        bounds = (x0*0.1, x0)
        res = least_squares(get_total_error, args = [profiles], bounds = bounds, x0 = x0, verbose=0, method = method)

    else:
        x0 = np.ones(profiles.shape[0] * profiles.shape[1])
        bounds = [(0.1, 1) for _ in x0]
        res = minimize(get_total_error, args = profiles , x0 = x0, bounds=bounds, method=method)

    solution = res.x/np.max(res.x)
    solution = solution.reshape(profiles.shape[:2])
    
    end = time()
    
    time_elapsed_min = (end-start)/60

    optimality = get_total_error(solution, profiles) /get_total_error(x0, profiles)
    optimality_ = get_total_error(solution, profiles) / get_total_error(true_normalization, profiles)
    
    results.append((method, time_elapsed_min, optimality, optimality_))
    
pd.DataFrame(results, columns=['Method', 'Time Elapsed (min)','Error / Baseline Error','Error / Ground Truth'])

Unnamed: 0,Method,Time Elapsed (min),Error / Baseline Error,Error / Ground Truth
0,L-BFGS-B,0.055321,0.723333,0.505968
1,TNC,0.029185,0.813514,0.56905
2,SLSQP,0.004755,0.723332,0.505968
3,trf,0.258366,0.726323,0.50806


## Implementation

In [None]:
#export
from scipy.optimize import minimize
import pandas as pd
import numpy as np

minimum_occurence = 10

def normalize_experiment_SLSQP(profiles):
    """
    Calculate normalization with SLSQP approach
    """
    x0 = np.ones(profiles.shape[0] * profiles.shape[1])
    bounds = [(0.1, 1) for _ in x0]
    res = minimize(get_total_error, args = profiles , x0 = x0, bounds=bounds, method='SLSQP', options={'disp': False} )

    solution = res.x/np.max(res.x)
    solution = solution.reshape(profiles.shape[:2])
    
    return solution

def delayed_normalization(df, field='int_sum', minimum_occurence=None):
    """
    Returns normalization for given peptide intensities 
    """
    experiments = np.sort(df['experiment'].unique()).tolist()
    fractions = np.sort(df['fraction'].unique()).tolist()

    n_fractions = len(fractions)
    n_experiments = len(experiments)

    df_max = df.groupby(['precursor','fraction','experiment'])[field].max() #Maximum per fraction

    prec_count = df_max.index.get_level_values('precursor').value_counts()
    
    if not minimum_occurence:
        minimum_occurence = np.percentile(prec_count[prec_count>1], 75) #Take the 25% best datapoints
    
    shared_precs = prec_count[prec_count >= minimum_occurence]
    precs = prec_count[prec_count > minimum_occurence].index.tolist()

    n_profiles = len(precs)

    selected_precs = df_max.loc[precs]
    selected_precs = selected_precs.reset_index()

    profiles = np.empty((n_fractions, n_experiments, n_profiles))
    profiles[:] = np.nan

    #get dictionaries
    fraction_dict = {_:i for i,_ in enumerate(fractions)}
    experiment_dict = {_:i for i,_ in enumerate(experiments)}
    precursor_dict = {_:i for i,_ in enumerate(precs)}

    prec_id = [precursor_dict[_] for _ in selected_precs['precursor']]
    frac_id = [fraction_dict[_] for _ in selected_precs['fraction']]
    ex_id = [experiment_dict[_] for _ in selected_precs['experiment']]

    profiles[frac_id,ex_id, prec_id] = selected_precs[field]
    
    normalization = normalize_experiment_SLSQP(profiles)
    
    df[field+'_dn'] = df[field]*normalization[[fraction_dict[_] for _ in df['fraction']], [experiment_dict[_] for _ in df['experiment']]]
    
    return df, normalization

In [None]:
#hide
from nbdev.export import *
notebook2script()

Converted 01_chem.ipynb.
Converted 02_io.ipynb.
Converted 03_fasta.ipynb.
Converted 04_feature_finding.ipynb.
Converted 05_search.ipynb.
Converted 06_score.ipynb.
Converted 07_recalibration.ipynb.
Converted 08_quantification.ipynb.
Converted 09_matching.ipynb.
Converted 10_constants.ipynb.
Converted 11_settings.ipynb.
Converted 12_runner.ipynb.
Converted FF_parallel.ipynb.
Converted index.ipynb.
