In [1]:
#default_exp rescore.fdr

In [2]:
#export
import numba
import numpy as np
import pandas as pd

@numba.njit
def fdr_to_q_values(
    fdr_values:np.array
)->np.array:
    q_values = np.zeros_like(fdr_values)
    min_q_value = np.max(fdr_values)
    for i in range(len(fdr_values) - 1, -1, -1):
        fdr = fdr_values[i]
        if fdr < min_q_value:
            min_q_value = fdr
        q_values[i] = min_q_value
    return q_values

def calc_fdr(
    df:pd.DataFrame, 
    score_column:str, 
    decoy_column:str='decoy'
)->pd.DataFrame:
    df = df.reset_index(drop=True).sort_values(
        [score_column,decoy_column], ascending=False
    )
    target_values = 1-df[decoy_column].values
    decoy_cumsum = np.cumsum(df[decoy_column].values)
    target_cumsum = np.cumsum(target_values)
    fdr_values = decoy_cumsum/target_cumsum
    df['fdr'] = fdr_to_q_values(fdr_values)
    return df

calc_fdr_for_df = calc_fdr

@numba.njit
def fdr_from_ref(
    scores:np.array, 
    ref_scores:np.array, 
    ref_fdr_values:np.array
)->np.array:
    q_values = np.zeros_like(scores)
    i,j = 0,0
    while i < len(scores) and j < len(ref_scores):
        if scores[i] >= ref_scores[j]:
            q_values[i] = ref_fdr_values[j]
            i += 1
        else:
            j += 1
    while i < len(scores):
        q_values[i] = ref_fdr_values[-1]
        i += 1
    return q_values

def calc_fdr_from_ref(
    df: pd.DataFrame,
    ref_scores:np.array, 
    ref_fdr_values:np.array,
    score_column:str, 
    decoy_column:str='decoy'
)->pd.DataFrame:
    df = df.reset_index(drop=True).sort_values(
        [score_column,decoy_column], ascending=False
    )
    sorted_idxes = np.argsort(ref_fdr_values)
    ref_scores = ref_scores[sorted_idxes]
    ref_q_values = ref_fdr_values[sorted_idxes]
    df['fdr'] = fdr_from_ref(
        df.score.values, ref_scores, ref_q_values
    )
    return df

calc_fdr_from_ref_for_df = calc_fdr_from_ref

In [3]:
#hide
df = pd.DataFrame(
    {
        'score': np.random.random(500)*10+11,
        'decoy': 0,
        'kind': True,
    }
)
f_score = np.random.random(500)*9.9
df = df.append(
    pd.DataFrame(
        {
            'score': f_score+0.01,
            'decoy': 1,
            'kind': False
        }
    )
)
df = df.append(
    pd.DataFrame(
        {
            'score': f_score,
            'decoy': 0,
            'kind': False
        }
    )
)
df = df.append(
    pd.DataFrame(
        {
            'score': np.random.random(5)+10,
            'decoy': 1,
            'kind': False
        }
    )
)

df = calc_fdr(df, 'score', 'decoy')
df

Unnamed: 0,score,decoy,kind,fdr
408,20.989234,0,True,0.000000
108,20.984767,0,True,0.000000
226,20.984159,0,True,0.000000
381,20.984008,0,True,0.000000
332,20.978874,0,True,0.000000
...,...,...,...,...
539,0.028029,1,False,0.504505
1301,0.019223,0,False,0.504505
1039,0.018029,0,False,0.504505
670,0.011694,1,False,0.505000


In [4]:
df[(df.fdr < 0.01)&(df.decoy==0)]

Unnamed: 0,score,decoy,kind,fdr
408,20.989234,0,True,0.0
108,20.984767,0,True,0.0
226,20.984159,0,True,0.0
381,20.984008,0,True,0.0
332,20.978874,0,True,0.0
...,...,...,...,...
195,11.092966,0,True,0.0
96,11.078561,0,True,0.0
319,11.076630,0,True,0.0
106,11.062471,0,True,0.0


In [5]:
#hide
assert len(df[(df.fdr < 0.01)&(df.decoy==0)]) == 500

In [6]:
#hide
dff = pd.DataFrame(
    {
        'score': np.random.random(500)*10+11,
        'decoy': 0
    }
)
f_score = np.random.random(500)*9.9
dff = dff.append(
    pd.DataFrame(
        {
            'score': f_score+0.01,
            'decoy': 1
        }
    )
)
dff = dff.append(
    pd.DataFrame(
        {
            'score': f_score,
            'decoy': 0
        }
    )
)
dff = dff.append(
    pd.DataFrame(
        {
            'score': np.random.random(5)+10,
            'decoy': 1
        }
    )
)

dff['fdr'] = fdr_from_ref(dff.score.values, df.score.values, df.fdr.values)

assert len(dff[(dff.fdr < 0.01)&(dff.decoy==0)]) == 500

In [7]:
dff = calc_fdr_from_ref(dff, df.score.values, df.fdr.values, 'score')
assert len(dff[(dff.fdr < 0.01)&(dff.decoy==0)]) == 500