### Test ProteoformConditionAligner

In [None]:
import alphaquant.multicond.median_condition_analysis as aqmca
import anytree



def test_ProteoformConditionAligner(cluster_matrix, expected_number_of_proteoforms, expected_peptide_groups):
    simulated_nodes = simulate_list_of_protein_nodes_from_cluster_matrix(cluster_matrix)
    aligner = aqmca.ProteoformConditionAligner(simulated_nodes)
    test_that_num_proteoforms_is_as_expected(aligner.proteoform_df, expected_number_of_proteoforms)
    test_that_expected_peptides_are_in_groups(aligner.proteoform_df, expected_peptide_groups)


def simulate_list_of_protein_nodes_from_cluster_matrix(cluster_matrix):
    list_of_protein_nodes = []
    num_condition = len(cluster_matrix[0])
    for cond_idx in range(num_condition):
        list_of_cluster_idxs_of_every_peptide = [x[cond_idx] for x in cluster_matrix]
        list_of_protein_nodes.append(simulate_protein_node_w_clustermatrix_subset(list_of_cluster_idxs_of_every_peptide, f"cond{cond_idx}"))
    return list_of_protein_nodes 
        

def simulate_protein_node_w_clustermatrix_subset(list_of_cluster_idxs_of_every_peptide, condition):
    condition = anytree.Node([condition, "median_reference"])
    protein = anytree.Node("protein", parent=condition)
    for idx, cluster in enumerate(list_of_cluster_idxs_of_every_peptide):
        peptide = anytree.Node(f"pep_{idx}", parent=protein)
        peptide.cluster = cluster
        peptide.fc = cluster
    return protein


def test_that_num_proteoforms_is_as_expected(peptide_cluster_df, num_proteoforms_exptected):
    assert len(set(peptide_cluster_df["proteoform_id"])) == num_proteoforms_exptected
    print("Test passed: Number of proteoforms is as expected.")


def test_that_expected_peptides_are_in_groups(peptide_cluster_df, list_of_expected_peptide_groups):
    expected_peptides_derived_from_peptide_groups = set(peptide_cluster_df["peptides"])
    assert expected_peptides_derived_from_peptide_groups == set(list_of_expected_peptide_groups)
    print("Test passed: Expected peptides are in groups.")





cluster_matrix = [[0, 0, 1, 0],
                  [1, 1, 0, 1],
                  [0, 0, 1, 1],
                  [1, 1, 0, 1],
                  [1, 2, 3, 4]
                  ]
expected_number_of_proteoforms = 3
expected_peptide_groups = ["pep_0;pep_1;pep_3", "pep_2", "pep_4"]

test_ProteoformConditionAligner(cluster_matrix, expected_number_of_proteoforms, expected_peptide_groups)


cluster_matrix = [[0, 1, 1, 0], 
                  [1, 1, 0, 1], 
                  [0, 0, 1, 0], 
                  [1, 1, 0, 1]]
expected_number_of_proteoforms = 2
expected_peptide_groups = ["pep_0", "pep_1;pep_2;pep_3"]

test_ProteoformConditionAligner(cluster_matrix, expected_number_of_proteoforms, expected_peptide_groups)






### Test ProteoformPeptideDfCreator

In [None]:
import alphaquant.multicond.median_condition_analysis as aqmca

def test_ProtoformPeptideDfCreator(list_of_cluster_and_fc, list_of_numpep_per_cluster, number_conditions ):
    creator = get_ProtoformPeptideDfCreator_on_specified_simulated_input(list_of_cluster_and_fc, list_of_numpep_per_cluster, number_conditions)

    test_dataframe_dimensions(creator, list_of_numpep_per_cluster, number_conditions)
    test_cluster_values(creator, list_of_numpep_per_cluster)
    test_fc_values(creator, list_of_numpep_per_cluster, list_of_cluster_and_fc)

def get_ProtoformPeptideDfCreator_on_specified_simulated_input(list_of_cluster_and_fc = [1.1, -1.1], list_of_numpep_per_cluster = [2, 4], number_conditions = 5):
    nodes_same_protein_different_conditions = simulate_nodes_same_protein_different_conditions(list_of_cluster_and_fc, list_of_numpep_per_cluster, number_conditions)
    return aqmca.ProteoformPeptideDfCreator(nodes_same_protein_different_conditions)


def simulate_nodes_same_protein_different_conditions(list_of_cluster_and_fc, list_of_numpep_per_cluster, number_conditions):
    list_of_conditions = [f"cond_{i}" for i in range(number_conditions)]

    return [simulate_protein_node(list_of_cluster_and_fc, list_of_numpep_per_cluster, condition) for condition in list_of_conditions]


def simulate_protein_node(list_of_cluster_and_fc = [1.1, -1.1], list_of_numpep_per_cluster = [2, 2], condition = "cond1"):
    condition = anytree.Node([condition, "median_reference"])
    protein = anytree.Node("protein", parent=condition)
    for cluster_idx in range(len(list_of_numpep_per_cluster)):
        numpep = list_of_numpep_per_cluster[cluster_idx]
        for pep_idx in range(numpep):
            peptide = anytree.Node(f"pep_{cluster_idx}_{pep_idx}", parent=protein)
            peptide.cluster = cluster_idx
            peptide.fc = list_of_cluster_and_fc[cluster_idx]
    return protein
        

def test_dataframe_dimensions(creator, list_of_numpep_per_cluster, num_conditions):
    num_peptides = sum(list_of_numpep_per_cluster)
    assert creator.peptide_cluster_df.shape == (num_peptides, num_conditions), "Incorrect dimensions for peptide_cluster_df"
    assert creator.peptide_fc_df.shape == (num_peptides, num_conditions), "Incorrect dimensions for peptide_fc_df"
    print("dimensions test passed")

def test_cluster_values(creator, list_of_numpep_per_cluster):
    expected_cluster_values = get_expected_cluster_values(list_of_numpep_per_cluster)
    for col in creator.peptide_cluster_df.columns:
        assert all(creator.peptide_cluster_df[col] == expected_cluster_values), f"Mismatch in clusters for {col}"
    print("clusters test passed")

def get_expected_cluster_values(list_of_numpep_per_cluster):
    expected_cluster_values = []
    for cluster_idx in range(len(list_of_numpep_per_cluster)):
        expected_cluster_values.extend([cluster_idx] * list_of_numpep_per_cluster[cluster_idx])
    return expected_cluster_values        

def test_fc_values(creator, list_of_numpeps_per_cluster, list_of_cluster_and_fc):
    expected_fcs = get_expected_fc_values(list_of_numpeps_per_cluster, list_of_cluster_and_fc)
    for col in creator.peptide_fc_df.columns:
        assert all(creator.peptide_fc_df[col] == expected_fcs), f"Mismatch in fc values for {col}"
    print("fcs test passed")

def get_expected_fc_values(list_of_numpeps_per_cluster, list_of_cluster_and_fc):
    expected_fcs = []
    for cluster_idx in range(len(list_of_numpeps_per_cluster)):
        expected_fcs.extend([list_of_cluster_and_fc[cluster_idx]] * list_of_numpeps_per_cluster[cluster_idx])
    return expected_fcs




test_ProtoformPeptideDfCreator(list_of_cluster_and_fc = [1.1, -1.1], list_of_numpep_per_cluster = [2, 4], number_conditions = 5)
test_ProtoformPeptideDfCreator(list_of_cluster_and_fc = [1.1, -1.1, 10, 3], list_of_numpep_per_cluster = [2, 4, 12, 200], number_conditions = 60)
test_ProtoformPeptideDfCreator(list_of_cluster_and_fc = [1.1], list_of_numpep_per_cluster = [1], number_conditions = 2)


### Test ProteoformDfCreator

In [None]:
import alphaquant.multicond.median_condition_analysis as aqmca
import numpy as np
import numpy.random

def test_ProteoformDfCreator(groups_of_peptide_clusters, peptide_fc_df, protein_name):
    creator = aqmca.ProteoformDfCreator(groups_of_peptide_clusters, peptide_fc_df, protein_name)

    test_initialization(creator)
    test_proteoform_grouping(creator, groups_of_peptide_clusters, protein_name)
    test_fold_change_data(creator, peptide_fc_df)
    test_dataframe_structure(creator, peptide_fc_df)

def test_initialization(creator):
    display(creator.proteoform_df)
    assert creator.proteoform_df is not None, "proteoform_df should not be None"
    print("Initialization test passed")

def test_proteoform_grouping(creator, groups_of_peptide_clusters, protein_name):
    for idx, group in enumerate(groups_of_peptide_clusters):
        proteoform_id = f"{protein_name}_{idx}"
        group_df = creator.proteoform_df[creator.proteoform_df['proteoform_id'] == proteoform_id]
        assert all(group_df['peptides'] == ';'.join(group)), "Incorrect peptides in proteoform"
        assert all(group_df['protein'] == protein_name), "Incorrect protein name"
    print("Proteoform grouping test passed")

def test_fold_change_data(creator, peptide_fc_df):
    for peptide in peptide_fc_df.index:
        for cond in peptide_fc_df.columns:
            assert np.isclose(creator.proteoform_df.loc[peptide, cond], peptide_fc_df.loc[peptide, cond]), f"Mismatch in fold change data for {peptide}, {cond}"
    print("Fold change data test passed")

def test_dataframe_structure(creator, peptide_fc_df):
    expected_columns = ['peptides', 'protein', 'proteoform_id'] + list(peptide_fc_df.columns)
    assert set(creator.proteoform_df.columns) == set(expected_columns), "Incorrect dataframe columns"
    expected_rows = sum(len(group) for group in groups_of_peptide_clusters)
    assert len(creator.proteoform_df) == expected_rows, "Incorrect number of rows in dataframe"
    print("Dataframe structure test passed")

def generate_peptide_fc_df(groups_of_peptide_clusters, number_conditions):

    peptide2conds = {}
    for idx in range(len(groups_of_peptide_clusters)):
        for peptide in groups_of_peptide_clusters[idx]:
            peptide2conds[peptide] = [numpy.random.random() for idx in range(number_conditions)]
    
    peptide2conds_df =pd.DataFrame(peptide2conds).T
    peptide2conds_df.columns = [f"cond_{idx}" for idx in range(number_conditions)]
    return peptide2conds_df


# Example usage
groups_of_peptide_clusters = [["A", "B", "C"], ["D", "E"]]
peptide_fc_df = generate_peptide_fc_df(groups_of_peptide_clusters, 3)
protein_name = "protein"

test_ProteoformDfCreator(groups_of_peptide_clusters, peptide_fc_df, protein_name)

groups_of_peptide_clusters = [["A", "B"], ["D", "E"], ["C"], ["F", "G", "H"]]
peptide_fc_df = generate_peptide_fc_df(groups_of_peptide_clusters, 3)
protein_name = "protein"

test_ProteoformDfCreator(groups_of_peptide_clusters, peptide_fc_df, protein_name)
