# **Global settings**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from matplotlib import rcParams
from matplotlib.pyplot import rc_context
import scanpy as sc
import scrublet as scr
import scvelo as scv
import seaborn as sns
import scipy.io
import os
import dotplot
import dotplot.utils
import math
import gseapy as gp
from gseapy.plot import barplot, dotplot, gseaplot
from gseapy.scipalette import SciPalette
from pylab import *
from matplotlib.colors import ListedColormap,LinearSegmentedColormap 

# Data

In [None]:
os.chdir('/disk213/xieqq/JINHUA138.sc')

In [None]:
os.chdir('/disk213/xieqq/JINHUA138.sc/scTenifoldKnk')

In [None]:
concat = sc.read_h5ad('/disk213/xieqq/JINHUA138.sc/adata_rowcounts.h5ad')
adata = sc.read_h5ad('/disk213/xieqq/JINHUA138.sc/adata_CellLineage.h5ad')
common_cells = set(concat.obs_names) & set(adata.obs_names)
concat = concat[concat.obs_names.isin(common_cells), :]
adata = adata[adata.obs_names.isin(common_cells), :]
adata.layers['counts']=concat.X

In [None]:
Epithelial = sc.read_h5ad('/disk213/xieqq/JINHUA138.sc/Epithelial_CellType.h5ad')
adata = adata[adata.obs['CellLineage'].isin(['Epithelial'])]
adata.obs['CellType'] = Epithelial.obs['CellType']

In [None]:
Enterocytes = adata[adata.obs['CellType'].isin(['Enterocytes'])]
counts = Enterocytes.layers['counts']

In [None]:
counts_df = pd.DataFrame(counts.toarray(), index=Enterocytes.obs_names, columns=Enterocytes.var_names)
counts_df.to_csv("Enterocytes_counts.csv")

# scTenifoldKnk

In [None]:
library(dplyr)
library(tidyr)
library(qvalue)
library(ggplot2)
library(ggbreak)
library(stringr)
library(Seurat)
library(ggrepel)
library(scTenifoldNet)
library(scTenifoldKnk)

In [None]:
setwd("/disk213/xieqq/JINHUA138.sc/scTenifoldKnk")

In [None]:
countMatrix <- read.csv("Enterocytes_counts.csv", row.names=1)

In [None]:
countMatrix1 <- countMatrix[grepl("_0$", rownames(countMatrix)),]
write.csv(countMatrix1, file="Enterocytes_0d_counts.csv")
countMatrix1 <- countMatrix[grepl("_60$", rownames(countMatrix)),]
write.csv(countMatrix1, file="Enterocytes_60d_counts.csv")
countMatrix1 <- countMatrix[grepl("_90$", rownames(countMatrix)),]
write.csv(countMatrix1, file="Enterocytes_90d_counts.csv")
countMatrix1 <- countMatrix[grepl("_180$", rownames(countMatrix)),]
write.csv(countMatrix1, file="Enterocytes_180d_counts.csv")
countMatrix1 <- countMatrix[grepl("_240$", rownames(countMatrix)),]
write.csv(countMatrix1, file="Enterocytes_240d_counts.csv")
countMatrix1 <- countMatrix[grepl("_60$|_90$|_180$", rownames(countMatrix)),]
write.csv(countMatrix1, file="Enterocytes_60d-180d_counts.csv")

In [None]:
# count
countMatrix <- countMatrix1
countMatrix <- read.csv("Enterocytes_90d_counts.csv", row.names=1)
countMatrix <- as.matrix(t(countMatrix))
#countMatrix[1:5,1:5]

In [None]:
genename="PCK1"

In [None]:
result <- scTenifoldKnk(countMatrix=countMatrix, gKO=genename) 
df <- result$diffRegulation %>% mutate(log_pval=-log10(p.adj))
write.csv(df, file="scTenifoldKnk.diffRegulation.csv", row.names=F)

In [None]:
out=NULL
for (i in c("0d","60d","90d","180d","240d")){
  df <- read.csv(paste0("scTenifoldKnk.diffRegulation.",i,".csv"),check.names=F) %>% filter(p.value<0.05) %>% mutate(time=i)
  out <- rbind(out,df)
}
write.csv(out, file="scTenifoldKnk.diffRegulation.alltime.csv", row.names=F)

out <- out[!is.infinite(out$log_pval), ]
label_genes <- out %>% group_by(time) %>% arrange(desc(Z)) %>% slice_head(n=10) %>% ungroup() %>% filter(!str_starts(gene, "ENSSSCG"))
out$time <- factor(out$time, levels=c("0d","60d","90d","180d","240d"))

P1 <- ggplot(out, aes(x=Z,y=log_pval,color=time)) +
  geom_point(alpha=0.5) +
  geom_hline(yintercept=-log10(0.05), linetype="dashed", color="black") +
  geom_vline(xintercept=c(2), linetype="dashed", color="black") +
  geom_text_repel(data=label_genes, aes(label=gene),size=3, max.overlaps=50) +
  scale_color_manual(values=c("0d"="#FF595E","60d"='#FFCA3A',"90d"='#8AC926',"180d"='#1982C4',"240d"="#6A4C93"))+
  labs(title="", x="Z-score", y="-log10(p-value)") +
  theme_bw()+ 
  theme(axis.text.x=element_text(color="black",size=10),
        axis.text.y=element_text(color="black",size=10),
        axis.title.x=element_text(color="black",size=12),
        axis.title.y=element_text(color="black",size=12),
        legend.text=element_text(color="black",size=12),
        legend.title=element_text(color="black",size=10),
        panel.grid.major=element_blank(),
        panel.grid.minor=element_blank())

pdf(file="scTenifoldKnk_Z&P.pdf", width=6.5, height=6)
P1
dev.off()

#out <- read.csv("scTenifoldKnk.diffRegulation.60d-180d.csv",check.names=F) %>% filter(p.value<0.05)
out <- df %>% filter(p.value<0.05) %>% filter(!str_starts(gene, "PCK1")) 
label_genes <- out %>% filter(!str_starts(gene, "ENSSSCG")) %>% arrange(desc(Z)) %>% slice_head(n=10) %>% ungroup()

P2 <- ggplot(out, aes(x=Z,y=log_pval)) +
  geom_point(alpha=0.5) +
  geom_hline(yintercept=-log10(0.05), linetype="dashed", color="black") +
  geom_vline(xintercept=c(2), linetype="dashed", color="black") +
  geom_text_repel(data=label_genes, aes(label=gene),size=3, max.overlaps=50) +
  labs(title="", x="Z-score", y="-log10(p-value)") +
  theme_bw()+ 
  theme(axis.text.x=element_text(color="black",size=10),
        axis.text.y=element_text(color="black",size=10),
        axis.title.x=element_text(color="black",size=12),
        axis.title.y=element_text(color="black",size=12),
        legend.text=element_text(color="black",size=12),
        legend.title=element_text(color="black",size=10),
        panel.grid.major=element_blank(),
        panel.grid.minor=element_blank())

pdf(file="scTenifoldKnk_Z&P_60d-180d.pdf", width=6, height=6)
P2
dev.off()