In [2]:
library(data.table)
library(dplyr)
library(stringr)
library(seqinr)

In [3]:
# read predifined data

stem = toupper(snakemake@wildcards$stem)

df = fread(snakemake@params$predefined) %>%
    mutate(PDB=toupper(PDB)) %>%
    filter(PDB==toupper(stem))

if (rlang::is_empty(df$Partner1)) {
    partner1d = NA
} else {
    partner1d = df$Partner1
}
if (rlang::is_empty(df$Partner2)) {
    partner2d = NA
} else {
    partner2d = df$Partner2
}



In [4]:
#read antibody matches
df = fread(cmd=sprintf("grep -v '#' %s ",snakemake@input$ig_data), header=F,sep=" ",fill = T)
igchains = list()
if (nrow(df) > 0) {
    igchains = lapply(X = df$V3, function(s){
      s2=strsplit(x = s,split = "|",fixed = T)[[1]][[2]]
    })
    igchains=unlist(unique(list(unlist(igchains))))
    
    
}
igchains = unique(as.list(igchains))
igchains

In [5]:
seqs = read.fasta(snakemake@input$fa,as.string = T,strip.desc = T,seqtype = 'AA')

sdf = as.data.frame(t(as.data.table(seqs)))
ids <- rownames(sdf)
ids <- lapply(ids,function(s){
    s2=strsplit(x = s,split = "|",fixed = T)[[1]][[2]]
}
)
sdf$chain <- ids
rownames(sdf) <- NULL 
sdf <- sdf%>%
    select(chain,sequence=V1) %>%
    rowwise %>%
    mutate(length=nchar(sequence)) %>%
    select(chain,length) %>%
    arrange(length)

In [6]:
shortest_chain = sdf$chain[[1]]
number_of_chains = nrow(sdf)
all_chains <- sdf$chain
chains_having_Ig_motif <- igchains
chains_nothaving_Ig_motif <- setdiff(all_chains,chains_having_Ig_motif)
other_than_shortest_chans <- setdiff(all_chains,shortest_chain)

In [21]:
partner1 = NA
partner2 = NA
comment = NA
if (number_of_chains == 1) {
    comment = "One chain. Impossoble to define interacting chains"
} else if (number_of_chains==2) {
    partner1 = shortest_chain
    partner2 = paste(other_than_shortest_chans,collapse = ",")
    if (length(chains_having_Ig_motif) == number_of_chains) {
        comment = "WARNING. All two chains has Ig related sequences" 
    } else {
        comment = "Clear! Two chains"
    }
} else if (number_of_chains==3 & length(chains_having_Ig_motif) > 0 ) {
    if (length(chains_having_Ig_motif) == 3) {
        comment = "WARNING. All three chains has Ig related sequences"
    }
    if (length(chains_having_Ig_motif) == 1) {
        comment = "WARNING. Only one out of three chains has Ig related sequences"
    }
    if (length(chains_having_Ig_motif) == 2) {
        partner1 = paste(chains_nothaving_Ig_motif,collapse = ",")
        partner2 = paste(chains_having_Ig_motif,collapse = ",")
    }
} else if (number_of_chains==3 & length(chains_having_Ig_motif) == 0 ) {
    comment = "WARNING. Any of the three chans has an Ig related motif. Choosing based on short sequence"
    partner1 = shortest_chain
    partner2 = paste(other_than_shortest_chans,collapse = ",")
} else if (number_of_chains > 3) {
    comment = "WARNING. More than three chains. Unable to infer interactions" 

}

#if data is available take from predefined

if (!is.na(partner1d) & !is.na(partner2d)) {
    comment = paste("Predifined data superseded this:",partner1,partner2,comment,sep=" ")
    partner1 = paste(strsplit(partner1d, split="")[[1]],collapse=",")
    partner2 = paste(strsplit(partner2d, split="")[[1]],collapse=",")
    
}

outl = paste(c(partner1,partner2,comment), collapse="\t")
writeLines(outl, snakemake@output[[1]])
#system(sprintf("cat %s",snakemake@output[[1]]), intern = T)
print(outl)
