In [None]:
#| default_exp benchmarking

### Test input df creation

In [None]:
#| include: false
import numpy as np
import pandas as pd
import directlfq.benchmarking as lfq_benchmarking

quant_df  = pd.DataFrame({'sample_list' : [1, 1, 1, 2, 2, 2, 3], 'asd' : ['a', 'b', 'c', 'd', 'e', 'f', 'g'], 'cfs' : [11, 23, 4, 5, 7, 4, 9]})
samplelist_df = pd.DataFrame({'sample_list' : ['s1', 's2', 's3']})

def test_that_scaled_numbers_of_samples_are_as_expected(quant_df, samplelist_df,desired_num_samples):    
    scaled_df_creator = lfq_benchmarking.ScaledDFCreatorIQFormat(quant_df, samplelist_df, desired_num_samples)
    assert len(set(scaled_df_creator.scaled_quant_df["sample_list"])) == desired_num_samples
    assert len(scaled_df_creator.scaled_sample_list_df.index) == desired_num_samples


test_that_scaled_numbers_of_samples_are_as_expected(quant_df, samplelist_df, 1)
test_that_scaled_numbers_of_samples_are_as_expected(quant_df, samplelist_df, 7)
test_that_scaled_numbers_of_samples_are_as_expected(quant_df, samplelist_df, 13)


def test_that_repetition_worked_out(quant_df, samplelist_df):
    scaled_df_creator = lfq_benchmarking.ScaledDFCreatorIQFormat(quant_df, samplelist_df, 7)
    assert np.all(scaled_df_creator.scaled_quant_df["sample_list"][6] == [3])

test_that_repetition_worked_out(quant_df, samplelist_df)

In [None]:
#| include: false
import pandas as pd
import numpy as np
import directlfq.benchmarking as lfq_benchmarking

class TemplateDFCreator():
    def __init__(self):

        self.template_df = None
        self._template_df_location = "../test_data/unit_tests/protein_normalization/example_proteins.tsv"
        self._create_template_df()

    def _create_template_df(self):
        self.template_df = pd.read_csv(self._template_df_location, index_col= ["protein", "ion"], sep = "\t")


def test_that_shape_is_as_expected(num_samples):
    template_df = TemplateDFCreator().template_df
    size_adjusted_df = lfq_benchmarking.ScaledDFCreatorDirectLFQFormat(template_df=template_df, desired_number_of_samples= num_samples).scaled_df
    assert len(size_adjusted_df.columns) == num_samples
    assert len(size_adjusted_df.index) == len(template_df.index)
    print("performed_checks")


test_that_shape_is_as_expected(1)
test_that_shape_is_as_expected(100)
test_that_shape_is_as_expected(10000)

def test_that_values_are_as_expected(num_samples):
    template_df = TemplateDFCreator().template_df
    size_adjusted_df = lfq_benchmarking.ScaledDFCreatorDirectLFQFormat(template_df=template_df, desired_number_of_samples= num_samples).scaled_df
    assert np.allclose(template_df.loc[:,"BoxCar_02-01_2"], size_adjusted_df.loc[:, "BoxCar_02-01_2_AND_remainder"])


test_that_values_are_as_expected(5)
test_that_values_are_as_expected(100)


### Learning Tests

In [None]:
import seaborn as sns
import numpy as np


def check_taking_mean_of_pandas_columns():
    df = sns.load_dataset('iris')
    display(df)
    df.iloc[3:6, 1] = np.nan
    display(df)
    df['mean_sepal_width_petal_length'] = df[["sepal_width", "petal_length"]].median(axis = 1, skipna = True)
    assert df.loc[3, 'mean_sepal_width_petal_length'] == df.loc[3, 'petal_length']


check_taking_mean_of_pandas_columns()

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
def check_cv_calculation_along_a_dataframe_axis():
        
    df_test = sns.load_dataset("iris").drop(labels = "species", axis = 1)
    cv_function = lambda x: np.std(x, ddof=1,) / np.mean(x)
    cv_results = df_test.apply(cv_function, axis=1)
    display(df_test)
    display(cv_results)

In [None]:
import pandas as pd
import seaborn as sns

def check_denisty_plot(kind):
    df_data = sns.load_dataset("iris")
    arr1 = list(df_data["sepal_length"])
    arr2 = list(df_data["petal_length"])
    arr3 = list(df_data["petal_width"])
    df_1 = create_df_for_list(arr1, "A")
    df_2 = create_df_for_list(arr2, "B")
    df_3 = create_df_for_list(arr3, "C")
    #df_merged = pd.concat([df_1, df_2, df_3]).reset_index()
    df_merged = pd.concat([df_3]).reset_index()
    sns.displot(data=df_merged, x='lisast',hue='list_name', kind=kind)


def create_df_for_list(list, list_name):
    df = pd.DataFrame({'lisast' : list})
    df['list_name'] = list_name
    return df

check_denisty_plot('kde')
check_denisty_plot('hist')
