In [1]:
library(tidyr)
library(data.table)
library(dplyr)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:data.table’:

    between, first, last


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [2]:
allsv <- read.csv("allcis.csv")
chromo <- read.csv("chromo.csv")

In [3]:
allsv <- allsv %>% select('chrom1', 'donor_id', 'start', 'end')
chromo <- chromo %>% select('chrom1', 'donor_id', 'Start_cl','End_cl', 'start', 'end')
setnames(allsv, "chrom1", "chrom")
setnames(chromo, "chrom1", "chrom")
allsv$chrom <- sub("^", "chr", allsv$chrom)
chromo$chrom <- sub("^", "chr", chromo$chrom)

Discard clusters informations into separate dataframe (will be added later):

In [4]:
clusters <- chromo %>% select('donor_id', 'chrom', 'Start_cl', 'End_cl')

In [None]:
write.csv(clusters, "clusters.csv", row.names = FALSE)

In [5]:
chromo <- chromo %>% select('chrom', 'donor_id', 'start', 'end')
chromo

chrom,donor_id,start,end
<chr>,<fct>,<int>,<int>
chr12,EOPC-01,112938845,115173997
chr6,EOPC-011,145595779,155524408
chr6,EOPC-011,146079806,162656105
chr6,EOPC-011,157841816,168829759
chr6,EOPC-011,154619263,163736198
chr2,EOPC-018,198293468,198898130
chr2,EOPC-018,198255510,199873273
chr2,EOPC-018,193357450,196205886
chr2,EOPC-018,195688807,198591429
chr1,EOPC-021,6735716,10338062


Discard chromo-SV from all-cis file: leave only cis-SV

In [6]:
sv <- setdiff(allsv, chromo)
sv

“Column `donor_id` joining factors with different levels, coercing to character vector”


chrom,donor_id,start,end
<chr>,<chr>,<int>,<int>
chr1,EOPC-057,106653565,106904695
chr12,EOPC-057,40812118,40848670
chr13,EOPC-057,21519008,45907846
chr13,EOPC-057,22053868,45907625
chr13,EOPC-057,26555256,35985073
chr13,EOPC-057,26555552,36074012
chr13,EOPC-057,26556133,43302509
chr13,EOPC-057,26557574,26565008
chr13,EOPC-057,26564468,35975490
chr13,EOPC-057,35934582,36086709


In [7]:
sv <- sv %>% mutate(label = 'sv')
chromo <- chromo %>% mutate(label = 'chromo')

In [8]:
all <- rbind(chromo, sv)
all

chrom,donor_id,start,end,label
<chr>,<fct>,<int>,<int>,<chr>
chr12,EOPC-01,112938845,115173997,chromo
chr6,EOPC-011,145595779,155524408,chromo
chr6,EOPC-011,146079806,162656105,chromo
chr6,EOPC-011,157841816,168829759,chromo
chr6,EOPC-011,154619263,163736198,chromo
chr2,EOPC-018,198293468,198898130,chromo
chr2,EOPC-018,198255510,199873273,chromo
chr2,EOPC-018,193357450,196205886,chromo
chr2,EOPC-018,195688807,198591429,chromo
chr1,EOPC-021,6735716,10338062,chromo


Making labels to each position of rearrangement:

In [9]:
all$label1 <- all$label
all$type <- all$label
all$donor <- all$donor_id
all

chrom,donor_id,start,end,label,label1,type,donor
<chr>,<fct>,<int>,<int>,<chr>,<chr>,<chr>,<fct>
chr12,EOPC-01,112938845,115173997,chromo,chromo,chromo,EOPC-01
chr6,EOPC-011,145595779,155524408,chromo,chromo,chromo,EOPC-011
chr6,EOPC-011,146079806,162656105,chromo,chromo,chromo,EOPC-011
chr6,EOPC-011,157841816,168829759,chromo,chromo,chromo,EOPC-011
chr6,EOPC-011,154619263,163736198,chromo,chromo,chromo,EOPC-011
chr2,EOPC-018,198293468,198898130,chromo,chromo,chromo,EOPC-018
chr2,EOPC-018,198255510,199873273,chromo,chromo,chromo,EOPC-018
chr2,EOPC-018,193357450,196205886,chromo,chromo,chromo,EOPC-018
chr2,EOPC-018,195688807,198591429,chromo,chromo,chromo,EOPC-018
chr1,EOPC-021,6735716,10338062,chromo,chromo,chromo,EOPC-021


In [10]:
all <- all %>% unite(x, donor_id, label, start, sep = "//", remove = TRUE)
all <- all %>% unite(y, donor, label1, end, sep = "//", remove = TRUE)
all

chrom,x,y,type
<chr>,<chr>,<chr>,<chr>
chr12,EOPC-01//chromo//112938845,EOPC-01//chromo//115173997,chromo
chr6,EOPC-011//chromo//145595779,EOPC-011//chromo//155524408,chromo
chr6,EOPC-011//chromo//146079806,EOPC-011//chromo//162656105,chromo
chr6,EOPC-011//chromo//157841816,EOPC-011//chromo//168829759,chromo
chr6,EOPC-011//chromo//154619263,EOPC-011//chromo//163736198,chromo
chr2,EOPC-018//chromo//198293468,EOPC-018//chromo//198898130,chromo
chr2,EOPC-018//chromo//198255510,EOPC-018//chromo//199873273,chromo
chr2,EOPC-018//chromo//193357450,EOPC-018//chromo//196205886,chromo
chr2,EOPC-018//chromo//195688807,EOPC-018//chromo//198591429,chromo
chr1,EOPC-021//chromo//6735716,EOPC-021//chromo//10338062,chromo


Duplicate df with swaped columns x & y:

In [11]:
aly <- select(all, chrom, y, x)
colnames(aly) <- c("chrom", "x", "y")
aly

chrom,x,y
<chr>,<chr>,<chr>
chr12,EOPC-01//chromo//115173997,EOPC-01//chromo//112938845
chr6,EOPC-011//chromo//155524408,EOPC-011//chromo//145595779
chr6,EOPC-011//chromo//162656105,EOPC-011//chromo//146079806
chr6,EOPC-011//chromo//168829759,EOPC-011//chromo//157841816
chr6,EOPC-011//chromo//163736198,EOPC-011//chromo//154619263
chr2,EOPC-018//chromo//198898130,EOPC-018//chromo//198293468
chr2,EOPC-018//chromo//199873273,EOPC-018//chromo//198255510
chr2,EOPC-018//chromo//196205886,EOPC-018//chromo//193357450
chr2,EOPC-018//chromo//198591429,EOPC-018//chromo//195688807
chr1,EOPC-021//chromo//10338062,EOPC-021//chromo//6735716


In [12]:
df <- bind_rows(all, aly)
df

chrom,x,y,type
<chr>,<chr>,<chr>,<chr>
chr12,EOPC-01//chromo//112938845,EOPC-01//chromo//115173997,chromo
chr6,EOPC-011//chromo//145595779,EOPC-011//chromo//155524408,chromo
chr6,EOPC-011//chromo//146079806,EOPC-011//chromo//162656105,chromo
chr6,EOPC-011//chromo//157841816,EOPC-011//chromo//168829759,chromo
chr6,EOPC-011//chromo//154619263,EOPC-011//chromo//163736198,chromo
chr2,EOPC-018//chromo//198293468,EOPC-018//chromo//198898130,chromo
chr2,EOPC-018//chromo//198255510,EOPC-018//chromo//199873273,chromo
chr2,EOPC-018//chromo//193357450,EOPC-018//chromo//196205886,chromo
chr2,EOPC-018//chromo//195688807,EOPC-018//chromo//198591429,chromo
chr1,EOPC-021//chromo//6735716,EOPC-021//chromo//10338062,chromo


Make all possible combinations between 2 columns x & y by chromosome:

In [13]:
allcomb <- df %>% group_by(chrom) %>% complete(x,y)
allcomb

chrom,x,y,type
<chr>,<chr>,<chr>,<chr>
chr1,0065_CRUK_PC_0065//chromo//100136559,0065_CRUK_PC_0065//chromo//100136559,
chr1,0065_CRUK_PC_0065//chromo//100136559,0065_CRUK_PC_0065//chromo//101980197,
chr1,0065_CRUK_PC_0065//chromo//100136559,0065_CRUK_PC_0065//chromo//105306563,
chr1,0065_CRUK_PC_0065//chromo//100136559,0065_CRUK_PC_0065//chromo//105338362,
chr1,0065_CRUK_PC_0065//chromo//100136559,0065_CRUK_PC_0065//chromo//106066705,
chr1,0065_CRUK_PC_0065//chromo//100136559,0065_CRUK_PC_0065//chromo//106709343,
chr1,0065_CRUK_PC_0065//chromo//100136559,0065_CRUK_PC_0065//chromo//116599471,chromo
chr1,0065_CRUK_PC_0065//chromo//100136559,0065_CRUK_PC_0065//chromo//119502745,
chr1,0065_CRUK_PC_0065//chromo//100136559,0065_CRUK_PC_0065//chromo//119512733,
chr1,0065_CRUK_PC_0065//chromo//100136559,0065_CRUK_PC_0065//chromo//119512941,


In [14]:
write.csv(allcomb, "allcombR.csv", row.names = FALSE)