In [63]:
%load_ext autoreload
%autoreload 2

import logging

import numpy as np
import pandas as pd

from alphatools import pp, tl

logging.basicConfig(level=logging.INFO)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Data Loading & adding metadata



In [38]:
# load protein groups into an AnnData object with index & columns as obs & var
# Should be replaced by AlphaBase wrapper to directly read report.tsv files from DIANN
data_path = "./example_data/HeLa_QC_data.pkl"
adata = pp.load_diann_pg_matrix(data_path)

In [39]:
# Add sample metadata
sample_metadata = pd.read_pickle("./example_data/HeLa_QC_sample_metadata.pkl")
adata = pp.add_metadata(adata, sample_metadata, axis=0)

# Add feature metadata
feature_metadata = pd.read_pickle("./example_data/HeLa_QC_feature_metadata.pkl")
adata = pp.add_metadata(adata, feature_metadata, axis=1)

### Scale and center data

In [47]:
# inplace scaling with standard scaler
adata2 = adata.copy()
pp.scale_and_center(adata2, scaler="standard")

# inplace scaling with robust scaler
adata3 = adata.copy()
pp.scale_and_center(adata3, scaler="robust")

# layer-wise scaling with standard scaler
pp.scale_and_center(adata, to_layer="standard_scaled", scaler="standard")

# layer-wise scaling with robust scaler
pp.scale_and_center(adata, to_layer="robust_scaled", scaler="robust")

INFO:root:pp.scale_and_center(): Scaling data with standard scaler inplace.
INFO:root:pp.scale_and_center(): Scaling data with robust scaler inplace.
INFO:root:pp.scale_and_center(): Scaling data with standard scaler to layer 'standard_scaled'.
INFO:root:pp.scale_and_center(): Scaling data with robust scaler to layer 'robust_scaled'.


### Two sided independent t-test

In [87]:
from scipy.stats import ttest_ind

# scipy stats function
scipy_results = ttest_ind((1, 2, 3), (5, 6, 7))

# basic implementation of ttest_ind with nan handling
tl_results = tl.nan_safe_ttest_ind((1, 2, 3), (5, 6, 7))

# check if results are equal
if not scipy_results == tl_results:
    raise ValueError("ttest_ind results are not equal")

# use-case: ttest_ind cannot handle nans even with "omit" option
scipy_results = ttest_ind((1, 2, 3), (5, 6, np.nan))
print(scipy_results)
tl_results = tl.nan_safe_ttest_ind((1, 2, 3), (5, 6, np.nan))
print(tl_results)

# use-case: ttest_ind calculates statistics for single value arrays
scipy_results = ttest_ind((1, 2, 3), (5))
print(scipy_results)
tl_results = tl.nan_safe_ttest_ind((1, 2, 3), (5))
print(tl_results)

TtestResult(statistic=nan, pvalue=nan, df=nan)
TtestResult(statistic=-4.2, pvalue=0.02463207817693926, df=3.0)
TtestResult(statistic=-2.598076211353316, pvalue=0.1216899343463201, df=2.0)
(nan, nan)


In [70]:
ttest_ind((1, 2, 3), (5, np.nan))

TtestResult(statistic=nan, pvalue=nan, df=nan)