In [1]:
library(dplyr)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [136]:
# Read in the L_R_OmniPathFull.csv file or LR_database.rda and assign it to a data frame called LR_database
LR_database <- read.csv("../../data/LR_database.rda")

In [137]:
# Create a new column in the LR_database data frame that concatenates the Receptor and Ligand columns with an underscore
# this is a vice-versa pairs, it will help us to check the swapped pairs
LR_database$dup <- paste(LR_database$Receptor, LR_database$Ligand, sep="_")

In [138]:
# Subset the LR_database data frame to only include rows where we have swapped interactions

subset_lr <- LR_database[LR_database$Pair.Name %in% LR_database$dup,  ] 

In [139]:
nrow(subset_lr)

In [140]:
# remove the subsetted partition from the original df

LR_database <- LR_database[!LR_database$Pair.Name %in% subset_lr$Pair.Name,  ] 

In [141]:
# Extract all the unique proteins/genes from the subset_lr data frame that have "plexin",
# "neuroligin" or "ADAM" in their Receptor.Name column
# these will be corrected as ligands

plexin_family <- as.vector(subset_lr[grep("plexin", subset_lr$Receptor.Name), ]$Receptor)

neuroligin_family <- as.vector(subset_lr[grep("neuroligin", subset_lr$Receptor.Name), ]$Receptor)

adam_family <- as.vector(subset_lr[grep("ADAM", subset_lr$Receptor.Name), ]$Receptor)

#extract receptors that has annotation of "receptor" under Ligand.Name
receptor_anno <- as.vector(subset_lr[grep("receptor", subset_lr$Ligand.Name), ]$Ligand)

In [142]:
ligand <- c("AGRN", "BMP2", "BMP4", "VTCN1", "CD244", "CD38", "GAS6", "GDNF", "GUCA2A", 
"HHLA2", "IHH", "PSEN1", "NLGN", "NRTN", "RPH3A", "SHH","FLT3LG")

receptor <- c("CD2", "CD27", "CD80", "CD86", "SELL", "CD44", "CD81", "CD8A", "CLEC1B", 
"GLG1", "TYROBP", "FLT3")

In [143]:
# Combine the additional ligands with the plexin, neuroligin, and ADAM families into a vector called ligand
ligand <- unique(c(ligand,plexin_family,neuroligin_family,adam_family))
receptor <- unique(c(receptor,receptor_anno))

In [144]:
# Subset the subset_lr data frame to only include rows where the consensus_direction column is 1
dir <- subset_lr %>% filter(Pair.Name %in% subset_lr$dup & consensus_direction == 1)

In [145]:
# Subset the subset_lr data frame to only include rows where the consensus_direction column is 0
no_dir <- subset_lr %>% filter(Pair.Name %in% subset_lr$dup & consensus_direction == 0)

In [146]:
# Remove rows from no_dir where the pair is already present in dir
no_dir <- no_dir[!no_dir$Pair.Name %in% dir$dup,  ]

In [147]:
# remove the interactions where receptor is annotated as ligand
no_dir <- no_dir[!no_dir$Receptor %in% ligand,]

In [148]:
# remove the interactions where ligand is annotated as receptor
no_dir <- no_dir[!no_dir$Ligand %in% receptor,]

In [149]:
# Create an empty data frame called df with the same column names as no_dir
df <- data.frame(matrix(ncol = ncol(no_dir), nrow = 0))
x <- colnames(no_dir)
colnames(df) <- x

In [150]:
# Loop through unique ligands in the no_dir data frame
for (lig in unique(no_dir$Ligand)) {
    vec_clean <- vector()
    # Filter the no_dir data frame to include only rows where Ligand or Receptor column matches the current ligand in the loop
    # this yields the pairs that are swapped.
    r1 <- filter(no_dir, Ligand==lig | Receptor == lig)
    
    vec <- r1$Pair.Name
    
    for (component in vec) {
        comp <- unlist(strsplit(component, split = "_"))
        pair1 <- paste(comp[1], comp[2], sep = "_")
        pair2 <- paste(comp[2], comp[1], sep = "_")
        
        if (!pair1 %in% vec_clean & !pair2 %in% vec_clean) {
            vec_clean <- append(vec_clean, pair1)
        }
    }
    
    # Filter the r1 data frame to include only the unique pairs in vec_clean and append the resulting data frame to df
    c1 <- r1[r1$Pair.Name %in% vec_clean,]
    
    df <- rbind(df,c1)
}

In [151]:
no_dir_df <- df[!duplicated(df$Pair.Name),]

In [152]:
# Combine the dir and no_dir_df data frames
subset_lr <- rbind(dir,no_dir_df)

In [153]:
# Add the remaining rows of LR_database to subset_lr
LR_database <- rbind(subset_lr,LR_database)

In [154]:
# reset index
rownames(LR_database) <- 1:nrow(LR_database) 

In [162]:
save(LR_database, file = "LR_database.rda")

In [163]:
write.csv(LR_database, "LR_database.csv")