In [None]:
import scanpy as sc
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import random
import numpy as np
from math import sqrt, ceil, floor
import seaborn as sns
import glob
import os
import sys
import pickle as pkl
import json
from datetime import datetime
import warnings

# for white background of figures (only for docs rendering)
%config InlineBackend.print_figure_kwargs={'facecolor' : "w"}
%config InlineBackend.figure_format='retina'

#hpc figures
%matplotlib inline

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all" # print multiple outputs per code cell (not just last)
seed = 250
def set_seed(seed=int): # Set seed
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    print('Seed set to', seed)

set_seed(seed)

### Create loom for MDD

In [None]:
mdd_mat = sc.read_text("counts_MDD_only_norm.txt")
mdd_mat

In [3]:
# dimensions are in R format
genes = mdd_mat.obs.index
cells = mdd_mat.var.index

In [None]:
import anndata as ad

adata_mdd = ad.AnnData(mdd_mat.T)
adata_mdd

In [None]:
adata_mdd.obs.index = cells
adata_mdd.var.index = genes

adata_mdd

In [None]:
metadata_mdd = pd.read_csv("Metadata_MDD.txt", index_col=0, sep = "\t")
metadata_mdd.head()
metadata_mdd.shape

In [None]:
metadata_mdd.Diagnosis.value_counts()

In [None]:
pd.Series(adata_mdd.obs.index.isin(metadata_mdd.index)).value_counts()

In [None]:
adata_mdd.obs.index[ ~ adata_mdd.obs.index.isin(metadata_mdd.index)]

In [None]:
pd.Series(adata_mdd.obs.index.str.startswith('Micro')).value_counts()

In [None]:
pd.Series(metadata_mdd.index.str.startswith('Micro')).value_counts()

Microglial cells are called differently between expression matrix and metadata: 
* metadata: `Micro/Macro.*`
* exp. matrix: `Micro.Macro.*`
Maybe during the export of a file the `/` got coverted into a dot or vv. 
Anyways, since the prefix of all cells are the respective cell type, I don't really need the metadata file for celltype annotation.

In [None]:
sns.histplot(adata_mdd.X.sum(axis=1))

In [None]:
import loompy as lp

In [18]:
col_attrs = {
"CellID": np.array(adata_mdd.obs.index) ,
"nGene": np.array( np.sum(adata_mdd.X.transpose()>0 , axis=0)).flatten() ,
"nUMI": np.array( np.sum(adata_mdd.X.transpose() , axis=0)).flatten() ,
}

row_attrs = {
"Gene": np.array(adata_mdd.var.index) ,
}

loom_path = "/kyukon/scratch/gent/vo/000/gvo00027/projects/CBIGR/21HPP_GRN_neuroinfl/singlecell/SCENIC/input/counts_MDD_only_norm.loom"

lp.create(loom_path, adata_mdd.X.transpose(), row_attrs, col_attrs)

In [None]:
# Also save small subset for testing

subset_mdd = adata_mdd[np.random.choice(adata_mdd.obs_names, 500, replace=False)].copy()
subset_mdd

In [None]:
sc.pp.highly_variable_genes(subset_mdd, subset=True, n_top_genes=2000, flavor="seurat")
subset_mdd

In [15]:
col_attrs = {
"CellID": np.array(subset_mdd.obs.index) ,
"nGene": np.array( np.sum(subset_mdd.X.transpose()>0 , axis=0)).flatten() ,
"nUMI": np.array( np.sum(subset_mdd.X.transpose() , axis=0)).flatten() ,
}

row_attrs = {
"Gene": np.array(subset_mdd.var.index) ,
}

loom_path = "/kyukon/scratch/gent/vo/000/gvo00027/projects/CBIGR/21HPP_GRN_neuroinfl/singlecell/SCENIC/input/testing_MDD.loom"

lp.create(loom_path, subset_mdd.X.transpose(), row_attrs, col_attrs)

In [18]:
# also save as csv

subset_mdd.to_df().to_csv("/kyukon/scratch/gent/vo/000/gvo00027/projects/CBIGR/21HPP_GRN_neuroinfl/singlecell/SCENIC/input/testing_MDD.csv")

### Create loom for AD

In [None]:
ad_mat = sc.read_text("counts_AD_only_norm.txt")
ad_mat

In [None]:
# dimensions are in R format
genes = ad_mat.obs.index
cells = ad_mat.var.index
cells[:10]

In [None]:
cells = [i.replace(".", "-") for i in cells]
cells[:10]

In [None]:
adata_ad = ad.AnnData(ad_mat.T)
adata_ad

In [None]:
adata_ad.obs.index = cells
adata_ad.var.index = genes

adata_ad

In [None]:
metadata_ad = pd.read_csv("snRNA_metadta.csv", index_col=0)
metadata_ad.head()
metadata_ad.shape

In [None]:
metadata_ad.Diagnosis.value_counts()

In [None]:
pd.Series(adata_ad.obs.index.isin(metadata_ad.index)).value_counts()

In [None]:
sns.histplot(adata_ad.X.sum(axis=1))

In [73]:
col_attrs = {
"CellID": np.array(adata_ad.obs.index) ,
"nGene": np.array( np.sum(adata_ad.X.transpose()>0 , axis=0)).flatten() ,
"nUMI": np.array( np.sum(adata_ad.X.transpose() , axis=0)).flatten() ,
}

row_attrs = {
"Gene": np.array(adata_ad.var.index) ,
}

loom_path = "/kyukon/scratch/gent/vo/000/gvo00027/projects/CBIGR/21HPP_GRN_neuroinfl/singlecell/counts_AD_only_norm.loom"

lp.create(loom_path, adata_ad.X.transpose(), row_attrs, col_attrs)

In [None]:
!mv *loom SCE