In [1]:
%load_ext autoreload
%autoreload 2

import logging

import pandas as pd

import alphatools as at

logging.basicConfig(level=logging.INFO)

### Data Loading & adding metadata



In [2]:
# load protein groups into an AnnData object with index & columns as obs & var
# Should be replaced by AlphaBase wrapper to directly read report.tsv files from DIANN
data_path = "./example_data/HeLa_QC_data.pkl"
adata = at.pp.load_diann_pg_matrix(data_path)

In [3]:
# Add sample metadata
sample_metadata = pd.read_pickle("./example_data/HeLa_QC_sample_metadata.pkl")
adata = at.pp.add_metadata(adata, sample_metadata, axis=0)

# Add feature metadata
feature_metadata = pd.read_pickle("./example_data/HeLa_QC_feature_metadata.pkl")
adata = at.pp.add_metadata(adata, feature_metadata, axis=1)

### Scale and center data

In [5]:
# inplace scaling with standard scaler
adata2 = adata.copy()
at.pp.scale_and_center(adata2, scaler="standard")

# inplace scaling with robust scaler
adata3 = adata.copy()
at.pp.scale_and_center(adata3, scaler="robust")

# layer-wise scaling with standard scaler
at.pp.scale_and_center(adata, to_layer="standard_scaled", scaler="standard")
at.pp.scale_and_center(adata, from_layer="standard_scaled", to_layer="standard_scaled_2", scaler="standard")

# layer-wise scaling with robust scaler
at.pp.scale_and_center(adata, to_layer="robust_scaled", scaler="robust")
at.pp.scale_and_center(adata, from_layer="robust_scaled", to_layer="robust_scaled_2", scaler="robust")

INFO:root:pp.scale_and_center(): Scaling data with standard scaler inplace.
INFO:root:pp.scale_and_center(): Scaling data with robust scaler inplace.
INFO:root:pp.scale_and_center(): Scaling data with standard scaler to layer 'standard_scaled'.
INFO:root:pp.scale_and_center(): Scaling data with standard scaler to layer 'standard_scaled_2'.
INFO:root:pp.scale_and_center(): Scaling data with robust scaler to layer 'robust_scaled'.
INFO:root:pp.scale_and_center(): Scaling data with robust scaler to layer 'robust_scaled_2'.


In [8]:
adata1 = adata.copy()

In [12]:
adata1.X == (adata2.X)

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])