## Notebook for the Khaliq 2022 anndata file creation
### Developed by: Anna Maguza

### Institute of Computational Biology - Computational Health Centre - Hemlholtz Munich

### 12 October 2022

#### Load required packages

In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import anndata as an
import scrublet

#### Setup Cells

In [None]:
%matplotlib inline

In [None]:
sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=80, facecolor='white')

scanpy==1.9.1 anndata==0.8.0 umap==0.5.3 numpy==1.21.6 scipy==1.8.1 pandas==1.4.2 scikit-learn==1.1.1 statsmodels==0.13.2 python-igraph==0.9.11 louvain==0.7.1 pynndescent==0.5.7


#### Upload Data

In [None]:
#Data Upload (csv)
UMI_counts = pd.read_csv('/Users/annamaguza/Desktop/Desktop-Anna/LMU/Master-Thesis/Anna-Master-Project/GSE200997_GEO_processed_CRC_10X_raw_UMI_count_matrix.csv', index_col=[0])

In [None]:
#Checking the file
UMI_counts

#### Creating the anndata file

In [None]:
#To create correct anndata file we need to have barcodes as rows names and genes as columns names, so we are transposing the file
UMI_counts = UMI_counts.T

In [None]:
#Checking the file
UMI_counts

In [None]:
#Data Upload (csv)
meta_data = pd.read_csv('/Users/annamaguza/Desktop/Desktop-Anna/LMU/Master-Thesis/Anna-Master-Project/GSE200997_GEO_processed_CRC_10X_cell_annotation.csv', index_col=[0])

In [None]:
#Checking the file
meta_data

In [None]:
#Create anndata file 
khaliq_2022_anndata = an.AnnData(X=UMI_counts,
                        obs=meta_data,
                        var=UMI_counts.T.iloc[:,:1])

In [None]:
#Checking anndata file
khaliq_2022_anndata.X

In [None]:
#Checking anndata file
khaliq_2022_anndata.var

In [None]:
#Checking anndata file
khaliq_2022_anndata.obs

In [None]:
#Save anndata
khaliq_2022_anndata.write('/Users/annamaguza/Desktop/Desktop-Anna/LMU/Master-Thesis/Anna-Master-Project/khaliq_2022_anndata_raw.h5ad', compression='gzip')