In [1]:
import sys
import refquant.multi_run_table_creation as multi_run_table_creation
import refquant.table_import as table_import
import refquant.refquant_classes as refquant_classes
import refquant.refquant_utils as refquant_utils

test_input = "../test_data/diann_test_input.tsv"


In [2]:
import pandas as pd
df_diann = pd.read_csv(test_input, sep="\t")
reference_table = table_import.TableReformatterDIANN(test_input).outfile_name

runs = refquant_utils.get_runs(reference_table)

run = runs[0]

input file
/Users/constantin/workspace/refquant/test_data
../test_data/diann_test_input.tsv
using input type diann_fragion_isotopes_mDIA_raw
using input type diann_precursors_mDIA


In [3]:

class TargetPrecursorAnnotatorFCAnnot(refquant_classes.TargetPrecursorAnnotator):
    def __init__(self, reference_precursor, target_precursor):
        super().__init__(reference_precursor, target_precursor)
        self._annotate_precursor()
    
    def _annotate_precursor(self):
        self._annotate_number_of_ratios_used_to_precursor()
        self._annotate_intensity_based_reference_ratio()
        if self.target_precursor.number_of_ratios_used>0:
            self._annotate_derived_ratio()
            self._annotate_comparison_derived_quantity_to_precursor()
            self._annotate_ratio_of_most_abundant_fragion_to_reference()
            self._annotate_cosine_similarity()
            self._annotate_number_of_fragment_ions_available()
            self._annotate_fragment_ion_ratios()
            self._annotate_intersect_ion_intensities()

    def _annotate_fragment_ion_ratios(self):
        self.target_precursor.fragion2ratio = dict(zip(self._list_of_intersection_ions,self._ratios_to_reference))

    def _annotate_intersect_ion_intensities(self):
        self.target_precursor.fragion2intensity_intersect = {fragion:intensity for fragion, intensity in self.target_precursor.fragion2quantity.items() if fragion in self._list_of_intersection_ions}


In [4]:
refquant_classes.TargetPrecursorAnnotator = TargetPrecursorAnnotatorFCAnnot

In [5]:
single_labelled_precursors_for_run = refquant_classes.get_single_labelled_precursors(run, reference_table)

run:  20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c1_AID8_01_S5-A1_1_3970
0/958


  self.target_precursor.summed_quantity_reference = np.log2(np.sum(2**sorted_intensities_descending[:5]))


In [6]:
print(len(single_labelled_precursors_for_run))

1916


In [7]:
import numpy as np
def check_precursor_consistencies(reference_table : str, run : str, single_labelled_precursors : list):

    precursor2channel2singlelabelledprecursors = get_precursor2channel2singlelabelledprecursor(single_labelled_precursors)
    reference_df = get_reference_df(reference_table)
    channels = ["target4", "target8"]

    for channel in channels:
        reference_df_channel = reference_df[[x > 0 for x in reference_df[channel]]]
        for precursor in reference_df_channel.index.unique():
            precursor_df = reference_df_channel.loc[[precursor]]
            slprec = precursor2channel2singlelabelledprecursors[precursor][channel]
            assert_that_intensities_in_target_channels_are_the_same(precursor_df, precursor, precursor2channel2singlelabelledprecursors, channel)
            assert_that_ratios_to_reference_are_the_same(precursor_df, precursor, precursor2channel2singlelabelledprecursors, channel)

    print("done checking precursor consistencies")

def get_reference_df(reference_table):
    reference_df = pd.read_csv(reference_table, sep="\t")
    reference_df = reference_df[reference_df["run"] == run]
    reference_df = reference_df[reference_df["reference"]>0]
    reference_df[["reference", "target4", "target8"]] = np.log2(reference_df[["reference", "target4", "target8"]])
    reference_df = reference_df.set_index("precursor")
    return reference_df

def get_precursor2channel2singlelabelledprecursor(single_labelled_precursors):
    precursor2channel2singlelabelledprecursor = {}
    for single_labelled_precursor in single_labelled_precursors:
        precursor = single_labelled_precursor.name
        channel = single_labelled_precursor.channel_name
        if precursor not in precursor2channel2singlelabelledprecursor:
            precursor2channel2singlelabelledprecursor[precursor] = {}
        if channel not in precursor2channel2singlelabelledprecursor[precursor]:
            precursor2channel2singlelabelledprecursor[precursor][channel] = single_labelled_precursor
    return precursor2channel2singlelabelledprecursor

def assert_that_intensities_in_target_channels_are_the_same(precursor_df, precursor, precursor2channel2singlelabelledprecursors, channel):
    ion2intensity_precursor_df = get_ion2intensity_from_precursor_df(precursor_df, channel)
    ion2intensity_single_labelled_precursor = precursor2channel2singlelabelledprecursors[precursor][channel].fragion2intensity_intersect
    #assert that keys are the same
    if  ion2intensity_precursor_df.keys() != ion2intensity_single_labelled_precursor.keys():
        print("keys are not the same")
    assert ion2intensity_precursor_df.keys() == ion2intensity_single_labelled_precursor.keys()
    #assert that values are the same
    count_checks = 0
    for ion in ion2intensity_precursor_df.keys():
        assert np.isclose(ion2intensity_precursor_df[ion], ion2intensity_single_labelled_precursor[ion])
        count_checks += 1
    
    assert count_checks == len(ion2intensity_precursor_df.keys())


def assert_that_ratios_to_reference_are_the_same(precursor_df, precursor, precursor2channel2singlelabelledprecursors, channel):
    ion2ratio_precursor_df = get_ion2ratio_from_precursor_df(precursor_df, channel)
    ion2ratio_single_labelled_precursor = precursor2channel2singlelabelledprecursors[precursor][channel].fragion2ratio
    #assert that keys are the same
    if not ion2ratio_precursor_df.keys() == ion2ratio_single_labelled_precursor.keys():
        print("alaaf2")
    assert ion2ratio_precursor_df.keys() == ion2ratio_single_labelled_precursor.keys()
    #assert that values are the same
    count_checks = 0
    for ion in ion2ratio_precursor_df.keys():
        assert np.isclose(ion2ratio_precursor_df[ion], ion2ratio_single_labelled_precursor[ion])
        count_checks += 1
    
    assert count_checks == len(ion2ratio_precursor_df.keys())



def get_ion2intensity_from_precursor_df(precursor_df : pd.DataFrame, channel : str):
    return dict(zip(precursor_df["ion"], precursor_df[channel]))

def get_ion2ratio_from_precursor_df(precursor_df : pd.DataFrame, channel : str):
    ratios_to_reference  = precursor_df[channel] - precursor_df["reference"]
    return dict(zip(precursor_df["ion"], ratios_to_reference))


check_precursor_consistencies(reference_table, run, single_labelled_precursors_for_run)


  result = func(self.values, **kwargs)


done checking precursor consistencies
