In [None]:
# default_exp classify_ions

In [None]:
from anytree import Node, findall
import anytree
import alphaquant.classify.classify_ions_stacked as aq_classify_ions_stacked

# Create a mock setup for the test
protein1 = Node("Protein1", level = "gene",fc=0.5)  # Protein node with fold change less than cutoff
protein2 = Node("Protein2", level = "gene",fc=0.8)  # Protein node with fold change greater than cutoff
protein3 = Node("Protein3", level = "gene",fc=0.8)  # Protein node with fold change greater than cutoff
precursor1 = Node("Precursor1", parent=protein1, level="mod_seq_charge")
precursor2 = Node("Precursor2", parent=protein1, level="mod_seq_charge")
precursor3 = Node("Precursor3", parent=protein2, level="mod_seq_charge")
precursor4 = Node("Precursor4", parent=protein2, level="mod_seq_charge")
precursor5 = Node("Precursor5", parent=protein2, level="mod_seq_charge")
precursor6 = Node("Precursor6", parent=protein3, level="mod_seq_charge")
precursor7 = Node("Precursor7", parent=protein3, level="mod_seq_charge")


# Instantiate the class with the protein nodes and specific cutoffs
selector = aq_classify_ions_stacked.PrecursorForTrainingSelector(protein_nodes=[protein1, protein2], min_num_precursors=3, prot_fc_cutoff=0.75)

# Now, you can assert to check if the selector has correctly populated the lists
assert precursor3 in selector.precursors_suitable_for_training
assert precursor4 in selector.precursors_suitable_for_training
assert precursor5 in selector.precursors_suitable_for_training
assert precursor1 in selector.precursors_not_suitable_for_training
assert precursor2 in selector.precursors_not_suitable_for_training

print("Test passed: Precursors correctly classified for training and not for training.")


In [None]:
import pandas as pd
import numpy as np
import alphaquant.classify.classify_ions_stacked as aq_classify_ions_stacked
from anytree import AnyNode

class MLInputTableCreatorTest(aq_classify_ions_stacked.MLInputTableCreator):
    def __init__(self, precursors, acquisition_info_df,replace_nans = False, numeric_threshold = 0.2):
        self._precursors = precursors
        self._acquisition_info_df = acquisition_info_df
        self._replace_nans = replace_nans
        self._numeric_threshold = numeric_threshold

        self._merged_df = None
        
        self.X = None # the input for the ML model which has corresponding y values, so it is possible to train with this table
        self.y = None
        self.featurenames = None
        self.ionnames = None

        self._define_merged_df()
        self._define_ionnames()
        self._remove_non_numeric_columns_from_merged_df()
        self._define_featurenames()
        self._define_X()
        self._define_y()

    def _define_merged_df(self):
        data = {
            "quant_id": ["Q1", "Q2", "Q3", "Q4", "Q5", "Q6"],
            "feature1": [1.0, 2.0, 2, 3.0, 3, np.nan],
            "feature2": [4, 5.0, 6.0, 2, 7.0, np.nan],
            "feature3": [7.0, 8.0, 9.0, 10.0, 11.0, np.nan],
            "feature4": [np.nan, 13.0, 14.0, 15.0,  16.0, np.nan],
            "feature5": [17.0, 2, 18.0, 19.0, 20.0, np.nan]
        }

        self._merged_df = pd.DataFrame(data)


def create_precursors():
    gene_parent = AnyNode(name="GeneParent", level="gene", fc=0.5)

    precursors = [
        AnyNode(name="Q1", fc=1.5, parent=gene_parent, level="mod_seq_charge"),
        AnyNode(name="Q2", fc=2.5, parent=gene_parent, level="mod_seq_charge"),
        AnyNode(name="Q3", fc=3.5, parent=gene_parent, level="mod_seq_charge"),
        AnyNode(name="Q4", fc=4.5, parent=gene_parent, level="mod_seq_charge"),
        AnyNode(name="Q5", fc=5.5, parent=gene_parent, level="mod_seq_charge"),
        AnyNode(name="Q6", fc=6.5, parent=gene_parent, level="mod_seq_charge")
    ]
    return precursors

def test_ml_input_table_creator():
    # Mock Precursors
    precursors = create_precursors()
    
    # Mock Acquisition Info DataFrame
    acquisition_info_df = pd.DataFrame({"quant_id": ["Q1", "Q2", "Q3", "Q4", "Q5", "Q6"], "extra_info": [100, 200, 100, 200, 100, 200]})
    
    # Initialize the test class with mock data
    ml_creator = MLInputTableCreatorTest(precursors, acquisition_info_df, replace_nans=True)
    
    # Assertions
    assert ml_creator._merged_df is not None, "Merged DataFrame should be correctly defined"
    assert "feature1" in ml_creator._merged_df.columns, "Merged DataFrame should contain mock features"
    assert len(ml_creator.y) == 6, "y should be correctly defined based on precursors"
    assert np.isnan(ml_creator.X).any() == False, "NaNs should be replaced in X if replace_nans=True"
    assert len(ml_creator.ionnames) == 6, "Ion names should be correctly extracted from quant_id"
    assert len(ml_creator.ionnames) == len(ml_creator.y), "Ion names and y should have the same length"
    assert len(ml_creator.featurenames) == ml_creator.X.shape[1], "Feature names should be correctly extracted"

    test_fcs_are_as_expected(precursors, gene_fc=0.5, y=ml_creator.y, ionnames=ml_creator.ionnames)


    print("All tests passed.")


def test_fcs_are_as_expected(precursors, gene_fc, y, ionnames):
    name2fc = {node.name: node.fc - gene_fc for node in precursors}
    assert np.allclose(y, [name2fc[ionname] for ionname in ionnames]), "Fold changes should be as expected"

test_ml_input_table_creator()



In [None]:
import alphaquant.classify.classify_ions as aqclassify
import numpy.random
import copy
import numpy as np
def test_iterative_cross_predict():
    X = numpy.random.rand( 200, 3)
    y = numpy.random.rand(200)
    ionnames = numpy.random.rand(200)
    select_idxs = numpy.random.randint(low = 0, high = 199, size = 15)
    control_ionnames = copy.copy(ionnames[select_idxs])
    

    y_test_all, _, ionnames_all, _ = aqclassify.random_forest_iterative_cross_predict(X, y, ionnames, 5, aqclassify.RandomForestRegressor())

    idxs_ionnames = [x for x in range(len(ionnames_all)) if ionnames_all[x] in control_ionnames]
    control_idxs_y = [x for x in select_idxs if ionnames[x] in ionnames_all]
    control_ys = y[control_idxs_y]

    y_test_all_control = np.array(y_test_all)[idxs_ionnames]

    assert set(control_ys).intersection(set(y_test_all_control)) == set(y_test_all_control)


test_iterative_cross_predict()
