In [18]:
# default_exp multi_condition_analysis

In [19]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


We build up multi-condition analyses with AlphaQuant from pairwise comparisons. We want to create the following types of analyses or overviews:

Representations where we see all samples:
- Overview of protein pseudointensities
- Z-scored type of analysis. Meaning that you see the regulation between individual samples for every protein
Wo estimate both these maps from the directLFQ algorithm

Representations where we see the individual conditions:
- protein pseudointensities per condition
- step-wise indication of up- and downregulation
- We want to show some equivalent of one-way ANOVA, which tests the null hypothesis that two or more groups have the same population mean

Representations somewhat in-between:
- pseudo intensities, such that t-test p-values have same significance as alphaquant p-values. Intensities retain the fold changes


In [33]:
import alphaquant.runner.multi_condition_analysis as aqmca
import pandas as pd
import numpy as np

#test aqmca.MedianConditionCreator


def run_tests():
    replicate_numbers = [3, 2, 3]
    samplemap_df = generate_samplemap_df(replicate_numbers)
    input_df_aqformat = generate_normalized_input_df(samplemap_df, num_quant_ids=100, nan_fraction=0.3)
    mediancreator = aqmca.MedianConditionCreator(samplemap_df, input_df_aqformat)

    expected_number_replicates = min(replicate_numbers)  # Adjust this based on your expected outcome
    expected_number_ions = len(input_df_aqformat.index)
    test_determine_number_replicates(mediancreator, expected_number_replicates)

    display(mediancreator.extended_input_df)

    test_median_dataframe_shape(mediancreator, input_df_aqformat, expected_number_replicates, expected_number_ions)
    test_extended_samplemap_shape(mediancreator, samplemap_df, expected_number_replicates)


    print("All tests passed!")



def generate_samplemap_df(replicate_numbers):
        
    num_conditions = len(replicate_numbers)
    samplemap_data = {
        'sample': [],
        'condition': []
    }
    
    for condition_idx in range(num_conditions):
        condition_name = f'condition{condition_idx + 1}'
        for replicate_idx in range(replicate_numbers[condition_idx]):
            samplemap_data['sample'].append(f'sample{condition_idx + 1}_{replicate_idx + 1}')
            samplemap_data['condition'].append(condition_name)
            
    return pd.DataFrame(samplemap_data)

def generate_normalized_input_df(samplemap_df, num_quant_ids =10, nan_fraction = 0.1):
    np.random.seed(0)  # For reproducibility
    quant_ids = [f"quant_{i}" for i in range(num_quant_ids)]
    proteins = [f"protein_{i}" for i in range(num_quant_ids)]
    reformatted_input_data = {'quant_id': quant_ids, 'protein': proteins}
    
    for sample in samplemap_df['sample'].to_list():
        data = np.random.rand(num_quant_ids)
        # Introduce NaN values
        if nan_fraction > 0:
            nan_indices = np.random.choice(num_quant_ids, int(np.floor(nan_fraction * num_quant_ids)), replace=False)
            data[nan_indices] = np.nan
        reformatted_input_data[sample] = data
        
    return pd.DataFrame(reformatted_input_data)


def test_determine_number_replicates(mediancreator, expected_number):
    print(mediancreator._determine_number_replicates())
    assert mediancreator._determine_number_replicates() == expected_number, "Number of replicates is incorrect"

def test_median_dataframe_shape(mediancreator, input_df_aqformat,expected_number_replicates, expected_number_ions):
    print(input_df_aqformat.columns)
    expected_shape = (expected_number_ions, len(input_df_aqformat.columns)-2 +expected_number_replicates)
    assert mediancreator.extended_input_df.shape == expected_shape, "Median DataFrame shape is incorrect"

def test_extended_samplemap_shape(mediancreator, samplemap_df, expected_number_replicates):
    expected_shape = (len(samplemap_df.index) + expected_number_replicates, 2)
    assert mediancreator.extended_samplemap_df.shape == expected_shape, "Extended samplemap shape is incorrect"


run_tests()


2


Unnamed: 0_level_0,Unnamed: 1_level_0,sample1_1,sample1_2,sample1_3,sample2_1,sample2_2,sample3_1,sample3_2,sample3_3,median_rep0,median_rep1
protein,quant_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
protein_0,quant_0,,0.179490,,0.944707,0.561577,,,0.342921,0.944707,0.370534
protein_1,quant_1,0.715189,0.170987,0.044612,0.892862,,,0.471457,0.603649,0.804026,0.321222
protein_2,quant_2,0.602763,,0.799796,0.677114,,0.127689,,,0.602763,
protein_3,quant_3,0.544883,0.874573,0.076956,0.639027,0.085233,,0.943851,0.298614,0.591955,0.874573
protein_4,quant_4,0.423655,0.944120,0.518835,0.548361,0.665678,0.239337,0.964925,,0.423655,0.944120
...,...,...,...,...,...,...,...,...,...,...,...
protein_95,quant_95,0.183191,0.181631,0.287052,0.928219,,,0.630832,0.958146,0.555705,0.406232
protein_96,quant_96,0.586513,,0.706575,0.592081,,,0.997994,,0.589297,0.997994
protein_97,quant_97,0.020108,,0.414857,0.431785,0.437814,0.700928,0.987889,0.921060,0.431785,0.712852
protein_98,quant_98,0.828940,,,0.592780,0.974990,,,,0.710860,0.974990


Index(['quant_id', 'protein', 'sample1_1', 'sample1_2', 'sample1_3',
       'sample2_1', 'sample2_2', 'sample3_1', 'sample3_2', 'sample3_3'],
      dtype='object')
All tests passed!


In [42]:
import os
import tempfile

def test_add_and_save_median_condition():
    # Create a temporary directory
    with tempfile.TemporaryDirectory() as temp_dir:
        # Create temporary file paths
        input_file_path = os.path.join(temp_dir, 'input.tsv')
        samplemap_file_path = os.path.join(temp_dir, 'samplemap.tsv')

        # Generate test data and save to temporary files
        replicate_numbers = [3, 2, 3]
        samplemap_df = generate_samplemap_df(replicate_numbers)
        samplemap_df.to_csv(samplemap_file_path, sep="\t", index=False)

        input_df = generate_normalized_input_df(samplemap_df, num_quant_ids=100, nan_fraction=0.3)
        input_df.to_csv(input_file_path, sep="\t", index=False)

        # Call function under test
        aqmca.add_and_save_median_condition(input_file_path, samplemap_file_path)

        # Check that the files were created
        assert os.path.exists(input_file_path), "Input file was not created."
        assert os.path.exists(samplemap_file_path), "Samplemap file was not created."

        # Read the files back in and validate contents
        extended_input_df_saved = pd.read_csv(input_file_path, sep="\t")
        extended_samplemap_df_saved = pd.read_csv(samplemap_file_path, sep="\t")

        # Validate that the DataFrames have the added median conditions
        median_condition_creator = aqmca.MedianConditionCreator(samplemap_df, input_df)
        pd.testing.assert_frame_equal(extended_input_df_saved, median_condition_creator.extended_input_df.reset_index())
        pd.testing.assert_frame_equal(extended_samplemap_df_saved.reset_index(drop= True), median_condition_creator.extended_samplemap_df.reset_index(drop = True))

        display(extended_input_df_saved)
        display(extended_samplemap_df_saved)

        print("Test for add_and_save_median_condition passed.")

# Call the new test function
test_add_and_save_median_condition()


Unnamed: 0,protein,quant_id,sample1_1,sample1_2,sample1_3,sample2_1,sample2_2,sample3_1,sample3_2,sample3_3,median_rep0,median_rep1
0,protein_0,quant_0,,0.179490,,0.944707,0.561577,,,0.342921,0.944707,0.370534
1,protein_1,quant_1,0.715189,0.170987,0.044612,0.892862,,,0.471457,0.603649,0.804026,0.321222
2,protein_2,quant_2,0.602763,,0.799796,0.677114,,0.127689,,,0.602763,
3,protein_3,quant_3,0.544883,0.874573,0.076956,0.639027,0.085233,,0.943851,0.298614,0.591955,0.874573
4,protein_4,quant_4,0.423655,0.944120,0.518835,0.548361,0.665678,0.239337,0.964925,,0.423655,0.944120
...,...,...,...,...,...,...,...,...,...,...,...,...
95,protein_95,quant_95,0.183191,0.181631,0.287052,0.928219,,,0.630832,0.958146,0.555705,0.406232
96,protein_96,quant_96,0.586513,,0.706575,0.592081,,,0.997994,,0.589297,0.997994
97,protein_97,quant_97,0.020108,,0.414857,0.431785,0.437814,0.700928,0.987889,0.921060,0.431785,0.712852
98,protein_98,quant_98,0.828940,,,0.592780,0.974990,,,,0.710860,0.974990


Unnamed: 0,sample,condition
0,sample1_1,condition1
1,sample1_2,condition1
2,sample1_3,condition1
3,sample2_1,condition2
4,sample2_2,condition2
5,sample3_1,condition3
6,sample3_2,condition3
7,sample3_3,condition3
8,median_rep0,median_reference
9,median_rep1,median_reference


Test for add_and_save_median_condition passed.
