In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import seaborn as sns
import sys

sns.set_style('white')
plt.rcParams['savefig.facecolor'] = 'w'

In [None]:
samples = ['pbmc6k', 'pbmc8k','human_brain']
prefix = samples[]
prefix

In [None]:
os.chdir('/B_ALL/')
sys.path.append(os.path.abspath('/B_ALL/script/'))
from scRNA_package import *
from scran_normalize import *

In [None]:
data_path =
out_dir = os.path.join('CG_project', 'processed_data', prefix)

data = fullpath_closure(data_path)
out = fullpath_closure(out_dir)

In [None]:
import scanpy as sc
sc.settings.verbosity = 3
sc.settings.set_figure_params(dpi=150, dpi_save=150)
sc.settings.figdir = out('fig_supp')

In [None]:
adata = sc.read(data_path + 'matrix.mtx').T
adata.var_names = pd.read_csv(data_path + 'genes.tsv', header=None, sep='\t')[1]
adata.obs_names = pd.read_csv(data_path + 'barcodes.tsv', header=None)[0]
adata.var_names_make_unique()
adata

In [None]:
sc.pp.filter_cells(adata, min_genes=500)
sc.pp.filter_genes(adata, min_cells=5)
adata.obs['n_counts'] = adata.X.sum(axis=1)
sc.pl.scatter(adata, x='n_counts', y='n_genes')

In [None]:
adata = adata[adata.obs['n_genes'] < , :]
adata = adata[adata.obs['n_counts'] < , :]

In [None]:
#brain data cell type assignment
metadata = pd.read_csv("/B_ALL/datasets/processed_data/human_brain/Front_metadata.csv",index_col = 0)
adata.obs['celltype'] = metadata['Celltype']

In [None]:
adata = scran_normalize(adata)
adata.layers['sf'] = adata.X
sc.pp.log1p(adata)
adata.X = adata.layers['counts']
adata.to_df().to_csv(out(f'{prefix}.qc.csv'))
sc.get.obs_df(adata, keys=adata.obs_keys()).to_csv(
    out(f'{prefix}.metadata.csv')
)

In [None]:
import subprocess
subprocess.call("Rscript /B_ALL/script/01.integration_liger.R",shell=True)
df_liger = pd.read_csv(
    out(f'{prefix}.liger.csv'), index_col=0,
)
adata.obsm['X_liger'] = df_liger.loc[adata.obs_names, :].to_numpy()

In [None]:
n_pcs = {
     'pbmc': 50,
     'pbmc6k': 50,
     'human_brain':50
}[prefix]
sc.pp.neighbors(
    adata,
    random_state=42,
)

#%%
sc.tl.umap(adata, random_state=42)
sc.pl.umap(adata, save='.batch.pdf')

res_array = list(np.linspace(.01, .09, 9)) + list(np.linspace(.1, 3, 30))
res_array = [round(r, 2) for r in res_array]
res_array
for res in res_array:
    sc.tl.leiden(adata, resolution=res, key_added=f'leiden_{res}')

In [None]:
sc.pl.umap(
    adata,
    color=[f'leiden_{r}' for r in res_array],
    save=f'.leiden.pdf',
    legend_loc='on data',
    ncols=6,
)

fname = out(f'fig_supp/{prefix}.cluster_metrics.pdf') 
plot_silhouette(adata, n_pcs, fname, rep='X_liger', algs=['leiden'], res_array=res_array)


In [None]:
if False:
    pass
elif prefix == 'pbmc6k':
    selected = ['0.6']
elif prefix == 'pbmc8k':
    selected = ['0.6']
sc.pl.umap(
    adata,
    color=[f'leiden_{r}' for r in selected],
    save=f'.leiden.selected.pdf',
    legend_loc='on data',
)
res_prefix = {
    'pbmc6k': 0.6,
    'pbmc8k': 0.3,
}
res = res_prefix[prefix]
prefix, res

In [None]:
adata.obs[f'leiden_{res}'].value_counts(normalize=True)

In [None]:
sc.tl.rank_genes_groups(adata, 'leiden', groups=['0'],  method='wilcoxon',n_genes = 200)
top_genes = pd.DataFrame(adata.uns['rank_genes_groups']['names'])
top_genes.to_csv()

In [None]:
sc.pl.umap(adata, color=['IL7R', 'LYZ', 'MS4A1', 'GNLY', 'FCER1A', 'FCGR3A', 'CST3', 'CD8A', 'CCL5'])

In [None]:
metadata = sc.get.obs_df(adata, keys=adata.obs_keys())
celltype = metadata['celltype']
celltype.to_csv()