In [1]:
import pandas as pd
import anndata as ad
import scanpy as sc
import matplotlib.pyplot as plt
import scanpy as sc

In [None]:
exp_matrix = pd.read_csv("exprMatrix.tsv.gz", sep="\t", index_col=0)
metadata = pd.read_csv("meta.tsv", sep="\t", index_col=0)
assert all(exp_matrix.columns == metadata.index), "Mismatch between expression matrix and metadata."
healthy_kidney = sc.AnnData(X=exp_matrix.T, obs=metadata)

In [None]:
counts_data = pd.read_csv("counts_data.csv", index_col=0)
counts_data.columns = counts_data.columns.str.replace('^SeuratProject', '', regex=True)
counts_data = counts_data.T
metadata = pd.read_csv("metadata.csv", index_col=0)
metadata.index = metadata.index.str.replace('^SeuratProject', '', regex=True)

adata = ad.AnnData(X=counts_data.values, obs=metadata, var=pd.DataFrame(index=counts_data.columns))
adata = adata[adata.obs['sample'] == 222107]

In [None]:
types = pd.read_csv('adpkd_clustering.csv')
types['Barcode'] = types['Barcode'] + '_2'
types.index = types['Barcode']
types = types.drop('Barcode', axis = 1)
adata.obs['cell_types'] = types['Kidney group 222107 analysis']
adata = adata[adata.obs['cell_types'].isna() == False]

In [None]:
healthy_kidney = healthy_kidney[healthy_kidney.obs['sampletype'].isna() == False]

In [None]:
sc.pp.normalize_total(healthy_kidney)
sc.pp.log1p(healthy_kidney)

In [None]:
sc.tl.pca(healthy_kidney)
sc.pp.neighbors(healthy_kidney)
sc.tl.umap(healthy_kidney)

In [None]:
adata.X = adata.X.astype(float)
sc.pp.normalize_per_cell(adata)  
sc.pp.log1p(adata)  
sc.tl.pca(adata)
sc.pp.neighbors(adata)
sc.tl.umap(adata)

In [None]:
common_genes = set(healthy_kidney.var.index).intersection(set(adata.var.index))
healthy_kidney = healthy_kidney[:, healthy_kidney.var.index.isin(common_genes)]
adata = adata[:, adata.var.index.isin(common_genes)]

In [None]:
genes = [
    "KRT7", "KRT17", "SLPI", "TACSTD2", "ITGB6", "MMP7", "COL1A1", 
    "KRT19", "CLDN4", "NNMT"
]

for gene in genes:
    fig, axs = plt.subplots(2, 1, figsize=(8, 10))  
    
 
    sc.pl.umap(healthy_kidney, color=gene, ax=axs[0], show=False, color_map="viridis", vmin=0, vmax=5, size=100)
    axs[0].set_title(f" {gene}", fontweight='bold', fontsize = 35)
    axs[0].set_aspect('auto')  


    sc.pl.umap(adata, color=gene, ax=axs[1], show=False, color_map="viridis", vmin=0, vmax=5, size=200)
    axs[1].set_title(f"{gene}", fontweight='bold', fontsize = 35)
    axs[1].set_aspect('auto')

   
    axs[1].set_xlim(adata.obsm["X_umap"][:, 0].min()-1, adata.obsm["X_umap"][:, 0].max()+1)
    axs[1].set_ylim(adata.obsm["X_umap"][:, 1].min()-1, adata.obsm["X_umap"][:, 1].max()+1)

    plt.tight_layout()
    

    plt.savefig(f"{gene}_comparison.jpg", dpi=600)
    plt.close(fig)  