In [1]:
import numpy as np
import pandas as pd
import scanpy as sc

In [2]:
sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=80, facecolor='white')

scanpy==1.7.2 anndata==0.7.5 umap==0.5.1 numpy==1.19.5 scipy==1.6.2 pandas==1.1.5 scikit-learn==0.24.1 statsmodels==0.12.2 python-igraph==0.8.3 louvain==0.7.0 leidenalg==0.8.3


In [3]:
results_file = 'write/220301_lung_cancer_analysis.h5ad'

In [4]:
adata = sc.read_h5ad(
    '/mnt/d/Nishino/DRY/data/analysis/Omar_san/220224_lung_cancer_scanpy/write/220224_lung_cancer_processing.h5ad')        

KeyboardInterrupt: 

In [None]:
adata

In [None]:
sc.pp.normalize_per_cell(adata, counts_per_cell_after=1e4)
sc.pp.log1p(adata)

In [None]:
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
sc.pl.highly_variable_genes(adata)

In [None]:
adata.raw = adata

In [None]:
adata = adata[:, adata.var.highly_variable]

In [None]:
adata

In [None]:
adata.obs

In [None]:
adata.var

In [None]:
sc.pp.regress_out(adata, ['total_counts', 'pct_counts_mt'])

In [None]:
sc.pp.scale(adata)

In [None]:
sc.tl.pca(adata, svd_solver='arpack')

In [None]:
sc.pl.pca_variance_ratio(adata, log=True)

In [None]:
adata.write(results_file)
adata

In [None]:
sc.pp.neighbors(adata, n_neighbors=10, n_pcs=30)

In [None]:
sc.tl.umap(adata)

In [None]:
sc.tl.leiden(adata, resolution = 0.6, key_added = "leiden_0.6")

In [None]:
sc.pl.umap(adata, color='leiden_0.6')

In [None]:
sc.pl.umap(adata, color='MAFB')

In [None]:
sc.tl.rank_genes_groups(adata, 'leiden_0.6', method='logreg')
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)

In [None]:
marker_genes = ['EPCAM','KRT19','KRT18','CDH1', # Epithelial cells
                'DCN','THY1','COL1A1','COL1A2', # Fibroblasts
                'PECAM1','CLDN5','FLT1','RAMP2', # Endothelial cells
                'CD3D','CD3E','CD3G','TRAC', # T lymphocytes
                'NKG7','GNLY','NCAM1','KLRD1', # NK cells
                'CD79A','IGHM','IGHG3','IGHA2', # B lymphocytes
                'LYZ','MARCO','CD68','FCGR3A', # Myeloid cells
                'KIT','MS4A2','GATA2', # MAST cells
                'OLIG1','OLIG2','MOG','CLDN11' # oligodendrocytes
               ]

In [None]:
sc.pl.umap(adata, color=marker_genes )

In [None]:
sc.pl.dotplot(adata, marker_genes, groupby='leiden_0.6')

In [None]:
sc.pl.umap(adata, color='leiden_0.6')

In [None]:
sc.pl.umap(adata, color='CCR2',color_map='RdGy_r' )

In [None]:
cluster4 = adata.obs['leiden_0.6'] == '4'
cluster5 = adata.obs['leiden_0.6'] == '5'
cluster6 = adata.obs['leiden_0.6'] == '6'
cluster25 = adata.obs['leiden_0.6'] == '25'
cluster30 = adata.obs['leiden_0.6'] == '30'
cluster33 = adata.obs['leiden_0.6'] == '33'

myeloid = np.add(cluster4, cluster5)
myeloid = np.add(myeloid, cluster6)
myeloid = np.add(myeloid, cluster25)
myeloid = np.add(myeloid, cluster30)
myeloid = np.add(myeloid, cluster33)


adata_myelo = adata[myeloid,:]
adata_myelo

In [None]:
adata_myelo.var

In [None]:
adata_myelo.obs

In [None]:
adata_myelo.write('write/220301_myeloids.h5ad')