## 01: Showcase basic data loading and manipulation

In [None]:
%load_ext autoreload
%autoreload 2

import logging

import pandas as pd

from alphatools.pp import add_metadata, load_diann_pg_matrix, transform

logging.basicConfig(level=logging.INFO)

In [None]:
# load protein groups into an AnnData object with index & columns as obs & var
# Should be replaced by AlphaBase wrapper to directly read report.tsv files from DIANN
data_path = "./example_data/HeLa_QC_data.pkl"
adata = load_diann_pg_matrix(data_path)

# Add sample metadata
sample_metadata = pd.read_csv("./example_data/HeLa_QC_sample_metadata.csv", index_col=0)
adata = add_metadata(adata, sample_metadata, axis=0)

# Add feature metadata
feature_metadata = pd.read_csv("./example_data/HeLa_QC_feature_metadata.csv", index_col=0)
adata = add_metadata(adata, feature_metadata, axis=1)

In [None]:
# Load PSM data

In [None]:
# Apply filters and transformations

In [None]:
display(adata.X)
display(transform.nanlog(adata).X)
display(transform.nanlog(adata.to_df()))
display(transform.nanlog(adata.to_df()["A0A024R1R8;Q9Y2S6"]))
display(transform.nanlog(adata.to_df()["A0A024R1R8;Q9Y2S6"].values))

array([[84788000. , 37541000. ,   975192. , ...,   130693. , 11476200. ,
        24149700. ],
       [86609700. , 37697200. ,   886428. , ...,    91687.5, 11217000. ,
        24096300. ],
       [87261000. , 38341900. ,   965717. , ...,    91642.7, 11154100. ,
        27586300. ],
       [86922600. , 38314100. ,  1139240. , ...,   168758. , 11793000. ,
        25791600. ]])

array([[26.33735676, 25.16196374, 19.89532677, ..., 16.99582235,
        23.45214168, 24.52550193],
       [26.36802528, 25.16795403, 19.75764393, ..., 16.48443744,
        23.41918354, 24.5223083 ],
       [26.37883367, 25.19241849, 19.88124095, ..., 16.48373234,
        23.41107077, 24.71744863],
       [26.37322799, 25.19137208, 20.11964028, ..., 17.36459637,
        23.49142743, 24.62039794]])

Protein.Group,A0A024R1R8;Q9Y2S6,A0A024R4E5,A0A024RBG1;Q9NZJ9-2,A0A024RBG1;Q9NZJ9;Q9NZJ9-2,A0A024RCL3;A0A0G2JK11;Q96QC4,A0A075B6E5;Q8N8S7,A0A087WT44,A0A087WT44;P30519;P30519-2,A0A087WUL8;Q6P3W6;P0DPF3;A0A087WVU4;A0A087WZJ2;A0A087WZE1;A0A8V8TMC1;H7BY70;A0A087WTW4;A0A075B6G5,A0A087WV86;Q6PGQ7;Q6PGQ7-2,...,U3KQ91;O60266;A0A0A0MSC1,U3KQP1;Q9NWL6;C9IYZ1,U3KQQ1,V9GY43,V9GY95;Q8IVH8;Q8IVH8-2;Q8IVH8-3,V9GYF0,V9GYM2,X6REB3,X6RLX0,X6RLX0;O15083;H7C4G9
Run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_14,26.337357,25.161964,19.895327,20.560146,21.218858,23.734093,17.494496,24.633095,22.837435,,...,20.279903,16.609551,16.413573,,10.567424,22.150192,14.973056,16.995822,23.452142,24.525502
20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_15,26.368025,25.167954,19.757644,20.279733,21.313716,23.771075,18.592734,24.648801,22.452604,,...,20.09294,16.865673,15.703479,19.739327,10.40663,22.194633,15.104255,16.484437,23.419184,24.522308
20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_16,26.378834,25.192418,19.881241,20.27776,21.050825,23.795348,,24.710928,23.684167,,...,19.730016,16.729422,16.482433,19.375188,11.07224,22.126469,18.199893,16.483732,23.411071,24.717449
20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_17,26.373228,25.191372,20.11964,20.279778,21.202558,23.694321,,24.623946,22.765175,,...,19.82235,16.460392,16.210875,,9.724224,22.517685,19.098847,17.364596,23.491427,24.620398


Run
20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_14    26.337357
20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_15    26.368025
20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_16    26.378834
20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_17    26.373228
Name: A0A024R1R8;Q9Y2S6, dtype: float64

array([26.33735676, 26.36802528, 26.37883367, 26.37322799])