Annotating data which is in matrix (N x D) 
**obs** N observations - Samples or cell types - uni dimensional
**var** D dimensional vectors -  Variables - uni dimensional
**Layers**  different forms of our original core data - may be normalized data in log form and raw data

obs - One-dimensional annotation of observations (pd.DataFrame).
obs_names - Names of observations (alias for .obs.index).
obsm - Multi-dimensional annotation of observations (mutable structured ndarray).
obsp - Pairwise annotation of observations, a mutable mapping with array-like values.

var - One-dimensional annotation of variables/ features (pd.DataFrame).
var_names - Names of variables (alias for .var.index).
varm - Multi-dimensional annotation of variables/features (mutable structured ndarray).
varp - Pairwise annotation of variables/features, a mutable mapping with array-like values.

In [None]:
#Importing packages # Make sure that all required packages are downloaded in given environment (conda activate anndata)
import numpy as np
import pandas as pd
import anndata as ad
from scipy.sparse import csr_matrix

In [None]:
#INITIALIZING ANNDATA#

#Reading h5ad data
h5data = ad.read_h5ad("/Users/srivalli/Desktop/Heart/hca_heart_immune_download.h5ad")

#Data structure of anndata file i.e., summary stastics of the data
#To view
ad.AnnData(h5data)
#To assign
adata = ad.AnnData(h5data)

#Making matrix with the anndata
matrix = adata.X ##ONLY IF REQUIRED

In [None]:
#SUBSETTING DATA WHEN OBS AND VAR NAMES ARE AVAILABLE#

#Subsetting data when gender is female
bdata  = adata[adata.obs.cell_source == "Female"]
bdata

#To view it
#bdata.obs

#Subsetting data when scNym(Semi-supervised adversarial neural networks for single cell classification) is CD4+T_cell
cdata  = adata[adata.obs.scNym == "CD4+T_cell"]
cdata

#To view it
#cdata.obs

In [None]:
#SUBSETTING DATA WHEN OBS AND VAR NAMES ARE NOT AVAILABLE#

#Indexing file with numbers which can be used as obs/var names
 
testdata.obs_names = [f"Cell_{i:d}" for i in range(adata.n_obs)]
testdata.var_names = [f"Gene_{i:d}" for i in range(adata.n_vars)]

#To view
print(testdata.obs_names[:10])

#Subsetting with obs and var of interest(cells and genes)
testdata[["Cell_1", "Cell_10"], ["Gene_5", "Gene_1900"]]

In [None]:
#ADDING ALIGNED METADATA#   

#Adding more information to the dataset which is aligned 

#Adding randomly to obs
ct = np.random.choice(["B", "T", "Monocyte"], size=(adata.n_obs))
adata.obs["cell_type"] = pd.Categorical(ct)  # Categoricals are preferred for efficiency
adata.obs


In [None]:

#Making a new column cs_short and assigning HN values to it
adata.obs['cs_short'] = "HN"

#When I want to assign SC for cell name Sanger-CD45
sanger = ['Sanger-CD45']
adata.obs.loc[sanger,'cs_short'] = 'SC'
adata.obs

#When I want to assign apex for multiple cell name apex
apex = ['AAGACTCTCAGGACGA-1-H0015_apex' , 'AACAACCGTAATTGGA-1-H0015_apex']
adata.obs.loc[apex,'cs_short'] = 'Apex'
adata.obs

In [None]:
#MAKING LAYERS OF DATA

#Normalized data using log transformation
adata.layers["log_transformed"] = np.log1p(adata.X)
adata

#Making a dataframe of log values
logdata = adata.to_df(layer="log_transformed")
logdata


In [None]:
#To view the layers in anndata
ad.AnnData.adata

In [None]:
#Saving data into file
adata.write('/Users/srivalli/Documents/GitHub/Single-cell-data-analysis/Anndata/output/my_results.h5ad', compression="gzip")
logdata.to_csv('/Users/srivalli/Documents/GitHub/Single-cell-data-analysis/Anndata/output/logdata.txt')