In [None]:
!pip install scanpy
!pip install scikit-misc --force 
!pip install matplotlib==3.5.3
!pip install leidenalg numpy python-igraph

In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import scanpy.tools as tl
import os

bladder_path = "/content/drive/MyDrive/zip_python/"
bladder_fl = os.listdir(bladder_path)

sc.settings.verbosity = 3             
sc.logging.print_header()
sc.settings.set_figure_params(dpi=80, facecolor='white')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
adata_bladder = sc.read_10x_mtx(bladder_path,   ## the directory with the `.mtx` file
    var_names='gene_symbols',                # use gene symbols for the variable names (variables-axis index)
    cache=True)                              # write a cache file for faster subsequent reading
    

In [None]:
adata_bladder.var_names_make_unique() 

In [None]:
sc.pl.highest_expr_genes(adata_bladder, n_top=500, )   #Show those genes that yield the highest fraction of counts in each single cell, across all cells.

In [None]:
#sc.pp.filter_cells(adata_bladder, min_genes=200)
#sc.pp.filter_genes(adata_bladder, min_cells=3)

In [None]:
adata_bladder.var['mt'] = adata_bladder.var_names.str.startswith('MT-')  # annotate the group of mitochondrial genes as 'mt'
sc.pp.calculate_qc_metrics(adata_bladder, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)

In [None]:
sc.pl.violin(adata_bladder, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt'], jitter=0.4, multi_panel=True) # plots fields of .obs (cells)

In [None]:
sc.pl.scatter(adata_bladder, x='total_counts', y='pct_counts_mt')
sc.pl.scatter(adata_bladder, x='total_counts', y='n_genes_by_counts')

In [None]:
adata_bladder = adata_bladder[adata_bladder.obs.n_genes_by_counts < 2500, :]
adata_bladder = adata_bladder[adata_bladder.obs.pct_counts_mt < 5, :]

In [None]:
sc.pp.normalize_total(adata_bladder, target_sum=1e4)  #Total-count normalize (library-size correct) the data matrix X to 10,000 reads per cell, so that counts become comparable among cells.

In [None]:
sc.pp.log1p(adata_bladder)  #Logarithmize the data

In [None]:
sc.pp.highly_variable_genes(adata_bladder, min_mean=0.0125, max_mean=3, min_disp=0.5)

In [None]:
sc.pl.highly_variable_genes(adata_bladder)

In [None]:
adata_bladder.raw = adata_bladder

In [None]:
adata_bladder= adata_bladder[:, adata_bladder.var.highly_variable]

In [None]:
sc.pp.regress_out(adata_bladder, ['total_counts', 'pct_counts_mt'])

In [None]:
sc.pp.scale(adata_bladder, max_value=10)

In [None]:
sc.tl.pca(adata_bladder, svd_solver='arpack')
sc.pl.pca(adata_bladder, color='ACE2')
sc.pl.pca_variance_ratio(adata_bladder, log=True)

In [None]:
# adata_bladder.write("drive/MyDrive/bladder_results/bladder_results_file")

In [None]:
sc.pp.neighbors(adata_bladder, n_neighbors=10, n_pcs=40)
sc.tl.leiden(adata_bladder) 

In [None]:
sc.tl.paga(adata_bladder)
sc.pl.paga(adata_bladder, plot=False)  # remove `plot=False` if you want to see the coarse-grained graph
sc.tl.umap(adata_bladder, init_pos='paga')

sc.pl.umap(adata_bladder, color=['leiden', 'CLDN4','ACE2'])   
sc.pl.umap(adata_bladder, color = ["SPINK1", 'CLDN4', 'ACE2'])

In [None]:
sc.pl.violin(adata_bladder, ['SPINK1', 'CLDN4', 'ACE2'], groupby='leiden')