# Data loading 

In this tutorial you learn how to load proteomics measurements and metadata into an :class:`anndata.AnnData` object, the basic data structure in `alphatools`

In [None]:
%load_ext autoreload
%autoreload 2

import logging

import pandas as pd

import alphatools as at

logging.basicConfig(level=logging.INFO)

## Data Loading & adding metadata



In [None]:
# load protein groups into an AnnData object with index & columns as obs & var
# Should be replaced by AlphaBase wrapper to directly read report.tsv files from DIANN
data_path = "./example_data/HeLa_QC_data.pkl"
adata = at.pp.load_diann_pg_matrix(data_path)

In [None]:
# Add sample metadata
sample_metadata = pd.read_csv("./example_data/HeLa_QC_sample_metadata.csv", index_col=0)
adata = at.pp.add_metadata(adata, sample_metadata, axis=0)

# Add feature metadata
feature_metadata = pd.read_csv("./example_data/HeLa_QC_feature_metadata.csv", index_col=0)
adata = at.pp.add_metadata(adata, feature_metadata, axis=1)

## Scale and center data

In [None]:
# inplace scaling with standard scaler
adata2 = adata.copy()
at.pp.scale_and_center(adata2, scaler="standard")

# inplace scaling with robust scaler
adata3 = adata.copy()
at.pp.scale_and_center(adata3, scaler="robust")

# layer-wise scaling with standard scaler
at.pp.scale_and_center(adata, to_layer="standard_scaled", scaler="standard")
at.pp.scale_and_center(adata, from_layer="standard_scaled", to_layer="standard_scaled_2", scaler="standard")

# layer-wise scaling with robust scaler
at.pp.scale_and_center(adata, to_layer="robust_scaled", scaler="robust")
at.pp.scale_and_center(adata, from_layer="robust_scaled", to_layer="robust_scaled_2", scaler="robust")

INFO:root:pp.scale_and_center(): Scaling data with standard scaler inplace.
INFO:root:pp.scale_and_center(): Scaling data with robust scaler inplace.
INFO:root:pp.scale_and_center(): Scaling data with standard scaler to layer 'standard_scaled'.
INFO:root:pp.scale_and_center(): Scaling data with standard scaler to layer 'standard_scaled_2'.
INFO:root:pp.scale_and_center(): Scaling data with robust scaler to layer 'robust_scaled'.
INFO:root:pp.scale_and_center(): Scaling data with robust scaler to layer 'robust_scaled_2'.


In [None]:
d1 = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=["a", "b", "c"])
d2 = pd.DataFrame({"C": [1, 2, 3], "B": [4, 5, 6]}, index=["a", "b", "c"])

d1.merge(d2, left_index=True, right_index=True, how="inner")

Unnamed: 0,A,B_x,C,B_y
a,1,4,1,4
b,2,5,2,5
c,3,6,3,6
