In [None]:
import numpy as np
import pandas as pd
import scanpy as sc

# khxl9wph 

In [None]:
sc.settings.verbosity = 4            # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=80, facecolor='white')

In [None]:
results_file = 'write/kidney2.h5ad'  # the file that will store the analysis results

In [None]:
# adata = sc.read_h5ad('../human-other-kidney2.h5ad')                              # write a cache file for faster subsequent reading

adata = sc.read_10x_mtx(
    '../kidney-data/',  # the directory with the `.mtx` file             # use gene symbols for the variable names (variables-axis index)
    var_names='gene_symbols',  
    cache=True)                              # write a cache file for faster subsequent reading

In [None]:
adata

In [None]:
adata.var_names_make_unique()  # this is unnecessary if using `var_names='gene_ids'` in `sc.read_10x_mtx`

In [None]:
sc.pl.highest_expr_genes(adata, n_top=15 ) # shows the n_top x most expressed genes

In [None]:
adata

## Preprocessing

In [None]:
sc.pp.normalize_total(adata, target_sum=1e4)

In [None]:
sc.pp.log1p(adata)

In [None]:
adata.var['mt'] = adata.var_names.str.startswith('MT-')  # annotate the group of mitochondrial genes as 'mt'
sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)

## PCA

In [None]:
sc.tl.pca(adata, svd_solver='arpack')

In [None]:
sc.pl.pca(adata, color='HAVCR2')

In [None]:
sc.pl.pca_variance_ratio(adata, log=True)

In [None]:
adata.write(results_file)

In [None]:
adata

## Neighborhood + Clustering

In [None]:
sc.pp.neighbors(adata, n_neighbors=10, n_pcs=40)

In [None]:
sc.tl.umap(adata)

In [None]:

# sc.tl.umap(adata)
# sc.pl.umap(adata, color=['CST3', 'NKG7', 'PPBP'])

In [None]:
# sc.pp.neighbors(adata, n_neighbors=10, n_pcs=40)

In [None]:
sc.tl.leiden(adata)

In [None]:
sc.pl.umap(adata, color=['leiden', 'HAVCR2'])

In [None]:
adata

In [None]:
sc.tl.rank_genes_groups(adata, 'leiden', method='t-test')
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)

## top 

In [None]:
cholangiocytes = adata[adata.obs['cell_type']=="Cholangiocytes"]

In [None]:
cholangiocytes

In [None]:
sc.pl.highest_expr_genes(cholangiocytes, n_top=30 ) # shows the n_top x most expressed genes

In [None]:
sc.pp.log1p(data)

In [None]:
sc.tl.rank_genes_groups(data, 'cell_type', method='wilcoxon')

In [None]:
data2 = adata[adata.obs['cell_type2']=="Cholangiocytes"]
data2

## Marker Genes

In [None]:
marker_genes = [ 'HAVCR2', 'C1QA', 'C1QB', 'CD63', 'LYVE1', 'TREM2', 'DAB2', 'CD1C', 'CD1D']

In [None]:
adata = sc.read(results_file)

## Mito??

In [None]:
adata.var['mt'] = adata.var_names.str.startswith('MT-')  # annotate the group of mitochondrial genes as 'mt'
sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)

In [None]:
adata = adata[adata.obs.n_genes_by_counts < 2500, :]
adata = adata[adata.obs.pct_counts_mt < 5, :]

In [None]:
adata

In [None]:
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

In [None]:
sc.pp.neighbors(adata, n_neighbors=10, n_pcs=40)

In [None]:
sc.tl.umap(adata) 
sc.tl.leiden(adata) 
adata 
sc.tl.rank_genes_groups(adata, 'cell_type', method='t-test') 
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False) 
cholangiocytes2 = adata[adata.obs['cell_type']=="Cholangiocytes"]
sc.pl.highest_expr_genes(cholangiocytes2, n_top=15 ) # shows the n_top x most expressed genes

In [None]:
adata

In [None]:
cell2_ax = sc.pl.stacked_violin(adata, marker_genes, groupby='cell_type2', save="celltype2-featureplot-marker-violin.png")

In [None]:
adata

In [None]:
cell2_ax = sc.pl.stacked_violin(adata, marker_genes, groupby='leiden', save="celltype-listing.png")

In [None]:
cell2_ax = sc.pl.stacked_violin(adata, marker_genes, groupby='batch', save="batch-featureplot-marker-violin.png")