In [42]:
library(tidyverse)
library(readr)
library(fs)
library(magrittr)
library(ggplot2)
library(openxlsx)
library(dplyr)


In [2]:
setwd("/workspace/hraijc/Blueberry/Blueberry_trio/contigbin/LinkageMap")


In [32]:
excel_file <- "LinkageMap_clean_phased_all_info.xlsx"


# Get sheet names
sheet_names <- getSheetNames(excel_file)

# Initialize an empty list to store data frames
data_frames_list <- list()

# Loop through each sheet and read data into data frames
for (sheet_name in sheet_names) {
  data <- read.xlsx(excel_file, sheet = sheet_name)
  data_frames_list[[sheet_name]] <- data
}

# Combine data frames by row
lm <- do.call(rbind, data_frames_list)


In [33]:
# Add column for name of contig in reference.
lm <- separate(lm,
         marker,
         sep = "_",
         c("refContig", "refPosition"))


In [34]:
head(lm)
tail(lm)

Unnamed: 0_level_0,refContig,refPosition,position,h1,h2,h3,h4,h5,h6,h7,h8,chr,bp,M7,Nui,V06.A002-186,Ref,Alt,LG
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>
chr1.1,seq-0-001,1068804,0.0,0,0,0,1,0,0,1,0,1,1068804,1,1,1,C,T,LG1
chr1.2,seq-0-001,1215280,1.739313,0,1,0,0,0,1,1,0,1,1215280,1,2,1,C,T,LG1
chr1.3,seq-0-001,3688823,3.07242,0,0,0,0,1,0,0,0,1,3688823,0,1,1,T,A,LG1
chr1.4,seq-0-001,2717632,3.286541,0,0,1,0,0,0,1,1,1,2717632,1,2,0,C,G,LG1
chr1.5,seq-0-001,2176934,4.536024,0,1,0,0,0,0,0,0,1,2176934,1,0,0,T,C,LG1
chr1.6,seq-0-001,400216,4.551335,0,1,0,0,0,1,0,0,1,400216,1,1,2,T,A,LG1


Unnamed: 0_level_0,refContig,refPosition,position,h1,h2,h3,h4,h5,h6,h7,h8,chr,bp,M7,Nui,V06.A002-186,Ref,Alt,LG
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>
chr12.1043,seq-0-012,50866596,138.3927,0,0,0,0,0,0,0,1,12,50866596,0,1,0,A,T,LG11
chr12.1044,seq-0-012,47892807,139.1278,0,0,0,0,1,0,0,1,12,47892807,0,2,1,G,A,LG11
chr12.1045,seq-0-012,46789320,140.0528,0,0,0,0,0,1,1,0,12,46789320,0,2,1,T,A,LG11
chr12.1046,seq-0-012,50404642,140.2262,0,0,0,0,1,0,0,0,12,50404642,0,1,1,C,T,LG11
chr12.1047,seq-0-012,46789344,140.9332,0,0,0,0,0,1,1,0,12,46789344,0,2,1,C,T,LG11
chr12.1048,seq-0-012,50866557,144.5528,0,0,0,0,0,0,0,1,12,50866557,0,1,0,C,T,LG11


In [43]:
# Quick check to see that the values in refPostion match bp from the original
lm[lm$refPosition != lm$bp, ]

refContig,refPosition,position,h1,h2,h3,h4,h5,h6,h7,h8,chr,bp,M7,Nui,V06.A002-186,Ref,Alt,LG
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>


In [36]:
print(lm[lm$refPosition == 25253729, ])

       refContig refPosition position h1 h2 h3 h4 h5 h6 h7 h8 chr       bp M7
chr3.6 seq-0-002    25253729 4.391946  0  0  0  0  1  0  0  0   2 25253729  0
       Nui V06.A002-186 Ref Alt  LG
chr3.6   1            1   G   A LG3


In [37]:
# Some of the SNPs are in smaller contigs of the Vcae v1.3 assembly. Most are in the 12 Largest scaffolds. 
value_counts <- table(lm$chr)
print(value_counts)


   1    2    3    4    5    6    7    8    9   10   11   12   16   17   23   30 
1186 1477 1334 1198 1259 1558 1082 1030 1460 1330 1614 1053    4    1    1    2 
  57   79   86   97  105  107  124  125  139  150  201  270 
   1    3    1    1    1    1    2    2    1    1    1    4 


In [51]:
# Make new dataframes of the simplex SNPs for M7 and Nui.
simplex_M7_df <- lm[lm$M7 == 1, ]
simplex_Nui_df <- lm[lm$Nui == 1, ]


# only keep the snp info for that parent.
simplex_M7_df <- simplex_M7_df %>%
  select(-h5, -h6, -h7, -h8)

simplex_Nui_df <- simplex_Nui_df %>%
  select(-h1, -h2, -h3, -h4)

In [52]:
head(simplex_Nui_df)

Unnamed: 0_level_0,refContig,refPosition,position,h5,h6,h7,h8,chr,bp,M7,Nui,V06.A002-186,Ref,Alt,LG
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>
chr1.1,seq-0-001,1068804,0.0,0,0,1,0,1,1068804,1,1,1,C,T,LG1
chr1.3,seq-0-001,3688823,3.07242,1,0,0,0,1,3688823,0,1,1,T,A,LG1
chr1.6,seq-0-001,400216,4.551335,0,1,0,0,1,400216,1,1,2,T,A,LG1
chr1.7,seq-0-001,1098337,5.138439,1,0,0,0,1,1098337,0,1,1,G,A,LG1
chr1.9,seq-0-001,968984,6.215677,1,0,0,0,1,968984,0,1,2,A,G,LG1
chr1.10,seq-0-001,3101638,6.431929,1,0,0,0,1,3101638,0,1,1,A,G,LG1


In [53]:
# Create the "Hx" column
simplex_Nui_df <- simplex_Nui_df %>%
  mutate(Hx = paste0("h", 
                     ifelse(h5 == 1, "5", ""),
                      ifelse(h6 == 1, "6", ""),
                      ifelse(h7 == 1, "7", ""),
                      ifelse(h8 == 1, "8", "")))

head(simplex_Nui_df)


Unnamed: 0_level_0,refContig,refPosition,position,h5,h6,h7,h8,chr,bp,M7,Nui,V06.A002-186,Ref,Alt,LG,Hx
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>
chr1.1,seq-0-001,1068804,0.0,0,0,1,0,1,1068804,1,1,1,C,T,LG1,h7
chr1.3,seq-0-001,3688823,3.07242,1,0,0,0,1,3688823,0,1,1,T,A,LG1,h5
chr1.6,seq-0-001,400216,4.551335,0,1,0,0,1,400216,1,1,2,T,A,LG1,h6
chr1.7,seq-0-001,1098337,5.138439,1,0,0,0,1,1098337,0,1,1,G,A,LG1,h5
chr1.9,seq-0-001,968984,6.215677,1,0,0,0,1,968984,0,1,2,A,G,LG1,h5
chr1.10,seq-0-001,3101638,6.431929,1,0,0,0,1,3101638,0,1,1,A,G,LG1,h5


In [54]:
# Repeat for M7
simplex_M7_df <- simplex_M7_df %>%
  mutate(Hx = paste0("h", 
                     ifelse(h1 == 1, "1", ""),
                      ifelse(h2 == 1, "2", ""),
                      ifelse(h3 == 1, "3", ""),
                      ifelse(h4 == 1, "4", "")))

head(simplex_M7_df)

Unnamed: 0_level_0,refContig,refPosition,position,h1,h2,h3,h4,chr,bp,M7,Nui,V06.A002-186,Ref,Alt,LG,Hx
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>
chr1.1,seq-0-001,1068804,0.0,0,0,0,1,1,1068804,1,1,1,C,T,LG1,h4
chr1.2,seq-0-001,1215280,1.739313,0,1,0,0,1,1215280,1,2,1,C,T,LG1,h2
chr1.4,seq-0-001,2717632,3.286541,0,0,1,0,1,2717632,1,2,0,C,G,LG1,h3
chr1.5,seq-0-001,2176934,4.536024,0,1,0,0,1,2176934,1,0,0,T,C,LG1,h2
chr1.6,seq-0-001,400216,4.551335,0,1,0,0,1,400216,1,1,2,T,A,LG1,h2
chr1.11,seq-0-001,516531,6.661429,0,1,0,0,1,516531,1,1,1,T,C,LG1,h2


In [55]:
write.csv(simplex_M7_df, file = "simplex_M7.csv", row.names = TRUE)
write.csv(simplex_Nui_df, file = "simplex_Nui.csv", row.names = TRUE)