# Identifying ligand effects in PDAC validation with batch effects regressed out
#### _CK 2022_

### What this does:
- run_module_lm_by_ligand_and_day: function to identify ligand effects on cNMF modules with batch effects regressed out
- Runs on all single-ligands for the validation cNMF modules and saves results in a csv file

In [None]:
library(mltools)
library(data.table)
library(Seurat)
library(tidyverse)
library(SeuratDisk)

In [None]:
# Function to assess validation ligand effects while regressing out the effect batch

run_module_lm_by_ligand_and_day = function(sobj_meta, cols_to_predict,threshold){
  design_matrix = one_hot(as.data.table(data.frame(as.factor(sobj_meta$sample))))
  colnames(design_matrix) = gsub("as.factor.sobj_meta.sample._","",colnames(design_matrix))
  design_matrix = design_matrix %>% select(-Control)
  
  
  days = one_hot(as.data.table(data.frame(as.factor(sobj_meta$day))))
  colnames(days) = gsub("as.factor.sobj_meta.day._","",colnames(days))
  # will remove the cs column since everythign needs to be relative to something
  days = days %>% select(-cs)
  design_matrix = cbind(design_matrix, days)
  
  
  # initial matrix of 0s with covariates as rows and prediction variables as columns
  coefs = matrix(0,nrow = ncol(design_matrix)+1,ncol=length(cols_to_predict))
  pvals = matrix(0,nrow = ncol(design_matrix)+1,ncol=length(cols_to_predict))
  for(j in 1:length(cols_to_predict)){
    data = cbind(sobj_meta[,cols_to_predict[j]],design_matrix)
    colnames(data)[1] = "y"
    model = lm(y ~ . , data=data)
    x = summary(model)
    df_coefs = data.frame(x$coefficients)
    coefs[,j] = df_coefs$Estimate
    pvals[,j] = df_coefs$Pr...t..
  }
  
  
  
  coefs = data.frame(coefs)
  colnames(coefs) = cols_to_predict
  coefs = cbind(rownames(df_coefs),coefs)
  colnames(coefs)[1] = "ligand"
  coefs = as_tibble(coefs)
  
  pvals = data.frame(pvals)
  colnames(pvals) = cols_to_predict
  pvals = cbind(rownames(df_coefs),pvals)
  colnames(pvals)[1] = "ligand"
  pvals = as_tibble(pvals)
  
  ligand = pvals$ligand
  adjusted_pvals = pvals[,2:ncol(pvals)]
  for(i in 1:nrow(pvals)){
    adjusted_pvals[i,] = as.list(p.adjust(adjusted_pvals[i,],method="bonferroni"))
  }
  adjusted_pvals = cbind(ligand,adjusted_pvals)
  
  sig_coefs = coefs
  threshold = 0.001
  for(i in 1:nrow(coefs)){
    for(j in 2:ncol(coefs)){
      if(adjusted_pvals[i,j]> threshold){
        sig_coefs[i,j] = 0
      }
    }
  }
  return(list(coefs=coefs,pvals=pvals,adjusted_pvals=adjusted_pvals,sig_coefs=sig_coefs))
}



Identifying ligand effects while regressing out batch in the validation dataset

In [None]:
k = 26
timepoint= 7
sobj_meta = read.csv(paste0("single_ligand_meta_and_cNMF_usages_k",k,".csv"))
sobj_meta = sobj_meta %>% filter(time==timepoint)
rownames(sobj_meta) = sobj_meta$X
sobj_meta = sobj_meta[,-1]
cols_to_predict = paste0("GEP_Module_",1:k)

cnmf_reg_results = run_module_lm_by_ligand_and_day(sobj_meta,cols_to_predict,1e-5)
cnmf_coefs = cnmf_reg_results$sig_coefs

saveRDS(cnmf_reg_results,paste0("cnmf_regression_results_d",timepoint,"_k",k,".rds"))
write.csv(cnmf_coefs,paste0("cnmf_linear_model_coeficients_d",timepoint,"_k",k,".csv"))