diff --git a/DESCRIPTION b/DESCRIPTION index a81d4686..3a98b31d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: BGmisc Title: An R Package for Extended Behavior Genetics Analysis -Version: 1.4.0 +Version: 1.4.1 Authors@R: c( person("S. Mason", "Garrison", , "garrissm@wfu.edu", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-4804-6003")), diff --git a/NAMESPACE b/NAMESPACE index d739dd51..9680db70 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -42,6 +42,7 @@ export(related_coef) export(repairSex) export(resample) export(simulatePedigree) +export(standardizeColnames) export(summariseFamilies) export(summariseMatrilines) export(summarisePatrilines) diff --git a/NEWS.md b/NEWS.md index 7d508905..e3a2f08d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,6 @@ +# BGmisc 1.4.1 +* replaced print with message in all functions + # BGmisc 1.4.0 * revived checkParents function to check for handling phantom parents and missing parents * added tests for checkParents function diff --git a/R/calculateFamilySize.R b/R/calculateFamilySize.R index c346cb66..cf906ebd 100644 --- a/R/calculateFamilySize.R +++ b/R/calculateFamilySize.R @@ -44,7 +44,7 @@ calcFamilySizeByGen <- function(kpc, Ngen, marR) { # Calculate the number of individuals for the last generation lastGen <- ceiling(kpc^(Ngen - 1) * marR^(Ngen - 2)) allGens <- c(2, midGens, lastGen) - # print(allGens) + # message(allGens) return(allGens) } #' @rdname calcFamilySizeByGen diff --git a/R/checkIDs.R b/R/checkIDs.R index b687f4c7..cd1e3ae5 100644 --- a/R/checkIDs.R +++ b/R/checkIDs.R @@ -39,7 +39,7 @@ checkIDs <- function(ped, verbose = FALSE, repair = FALSE) { if (verbose) { cat("Validation Results:\n") - print(validation_results) + message(validation_results) } if (repair) { if (verbose) { @@ -76,7 +76,7 @@ checkIDs <- function(ped, verbose = FALSE, repair = FALSE) { if (verbose) { cat("Changes Made:\n") - print(changes) + message(changes) } return(repaired_ped) } else { diff --git a/R/checkParents.R b/R/checkParents.R index a5635bdc..42a88766 100644 --- a/R/checkParents.R +++ b/R/checkParents.R @@ -66,7 +66,7 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE, validation_results$rowless_parents <- rowless_parents if (verbose) { cat("Some parents are not listed in the pedigree:\n") - print(rowless_parents) + message(rowless_parents) } } else { if (verbose) { @@ -100,7 +100,7 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE, "Some individuals appear in both momID and dadID roles.\n", "These individuals are:\n" )) - print(momdad) + message(momdad) } } @@ -108,13 +108,13 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE, if (!repair) { if (verbose) { cat("Validation Results:\n") - print(validation_results) + message(validation_results) } return(validation_results) } else { if (verbose) { cat("Validation Results:\n") - print(validation_results) + message(validation_results) cat("Step 3: Attempting to repair missing parents...\n") } @@ -221,7 +221,7 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE, if (verbose) { cat("Changes Made:\n") - print(changes) + message(changes) } return(ped) } @@ -254,7 +254,7 @@ addRowlessParents <- function(ped, verbose, validation_results) { if (length(missing_parents) > 0) { if (verbose) { cat("Adding parents who were listed in momID/dadID but missing from ID:\n") - print(missing_parents) + message(missing_parents) } for (pid in missing_parents) { diff --git a/R/checkPedigree.R b/R/checkPedigree.R index 3c0af179..0002b78a 100644 --- a/R/checkPedigree.R +++ b/R/checkPedigree.R @@ -55,7 +55,7 @@ checkPedigreeNetwork <- function(ped, personID = "ID", momID = "momID", dadID = if (verbose) { if (nrow(duplicate_edges) > 0) { message("Duplicate edges detected:") - print(duplicate_edges) + message(duplicate_edges) } else { message("No duplicate edges detected.") } @@ -70,7 +70,7 @@ checkPedigreeNetwork <- function(ped, personID = "ID", momID = "momID", dadID = results$cyclic_relationships <- cyclic_relationships if (verbose) { message("Cyclic relationships detected:") - print(cyclic_relationships) + message(cyclic_relationships) } } else { results$cyclic_relationships <- NULL diff --git a/R/checkSex.R b/R/checkSex.R index d2fc9fd5..bbcbb48d 100644 --- a/R/checkSex.R +++ b/R/checkSex.R @@ -79,7 +79,7 @@ checkSex <- function(ped, code_male = NULL, code_female = NULL, verbose = FALSE, if (repair == FALSE) { if (verbose) { cat("Checks Made:\n") - print(validation_results) + message(validation_results) } return(validation_results) } else { @@ -105,7 +105,7 @@ checkSex <- function(ped, code_male = NULL, code_female = NULL, verbose = FALSE, if (verbose) { cat("Changes Made:\n") - print(changes) + message(changes) } return(ped) } diff --git a/R/cleanPedigree.R b/R/cleanPedigree.R index 46a3a64a..86067960 100644 --- a/R/cleanPedigree.R +++ b/R/cleanPedigree.R @@ -7,23 +7,33 @@ #' #' @param df A dataframe whose column names need to be standardized. #' @param verbose A logical indicating whether to print progress messages. +#' @param mapping A list of mapping options for customizing the renaming process. #' @return A dataframe with standardized column names. #' -#' @keywords internal -standardizeColnames <- function(df, verbose = FALSE) { +#' @export +standardizeColnames <- function(df, verbose = FALSE, mapping = list()) { # Internal mapping of standardized names to possible variants - mapping <- list( - "famID" = "^(?:fam(?:ily)?[\\.\\-_]?(?:id)?)", - "ID" = "^(?:i(?:d$|ndiv(?:idual)?)|p(?:erson)?[\\.\\-_]?id)", - "gen" = "^(?:gen(?:s|eration)?)", - "dadID" = "^(?:d(?:ad)?id|paid|fatherid|pid[\\.\\-_]?fath[er]*|sire)", - "patID" = "^(?:dat[\\.\\-_]?id|pat[\\.\\-_]?id|paternal[\\.\\-_]?(?:id)?)", - "momID" = "^(?:m(?:om|a|other)?[\\.\\-_]?id|pid[\\.\\-_]?moth[er]*|dame)", - "matID" = "^(?:mat[\\.\\-_]?id|maternal[\\.\\-_]?(?:id)?)", - "spID" = "^(?:s(?:pt)?id|spouse[\\.\\-_]?(?:id)?|partner[\\.\\-_]?(?:id)?|husb(?:and)?[\\.\\-_]?id|wife[\\.\\-_]?(?:id)?|pid[\\.\\-_]?spouse1?)", - "twinID" = "^(?:twin[\\.\\-_]?(?:id)?)", - "sex" = "^(?:sex|gender|female|m(?:a(?:le|n)|en)|wom[ae]n)" - ) + + # default config + default_mapping <- list( + "famID" = "^(?:fam(?:ily)?[\\.\\-_]?(?:id)?)", + "ID" = "^(?:i(?:d$|ndiv(?:idual)?)|p(?:erson)?[\\.\\-_]?id)", + "gen" = "^(?:gen(?:s|eration)?)", + "dadID" = "^(?:d(?:ad)?id|paid|fatherid|pid[\\.\\-_]?fath[er]*|sire)", + "patID" = "^(?:dat[\\.\\-_]?id|pat[\\.\\-_]?id|paternal[\\.\\-_]?(?:id)?)", + "momID" = "^(?:m(?:om|a|other)?[\\.\\-_]?id|pid[\\.\\-_]?moth[er]*|dame)", + "matID" = "^(?:mat[\\.\\-_]?id|maternal[\\.\\-_]?(?:id)?)", + "spID" = "^(?:s(?:pt)?id|spouse[\\.\\-_]?(?:id)?|partner[\\.\\-_]?(?:id)?|husb(?:and)?[\\.\\-_]?id|wife[\\.\\-_]?(?:id)?|pid[\\.\\-_]?spouse1?)", + "twinID" = "^(?:twin[\\.\\-_]?(?:id)?)", + "sex" = "^(?:sex|gender|female|m(?:a(?:le|n)|en)|wom[ae]n)" + ) + + # Add fill in default_config values to config if config doesn't already have them + + mapping <- utils::modifyList(default_mapping, mapping) + + + if (verbose) { print("Standardizing column names...") } @@ -62,7 +72,7 @@ standardizeColnames <- function(df, verbose = FALSE) { # verbose = FALSE) { # corrected_ped <- ped <- standardizeColnames(ped, verbose = verbose) # if (verbose) { -# print("Repairing pedigree...") +# message("Repairing pedigree...") # } # # applies a list of repair functions sequentially to a pedigree. # if (!is.null(repair_funs)) { @@ -83,7 +93,7 @@ standardizeColnames <- function(df, verbose = FALSE) { # } # return(corrected_ped) # } else { -# print("You should never see this message. If you do, that means the repair_funs variable in repairPedigree is broken") +# message("You should never see this message. If you do, that means the repair_funs variable in repairPedigree is broken") # } # } @@ -120,7 +130,7 @@ standardizeColnames <- function(df, verbose = FALSE) { # if (check_id) { # if (verbose) { -# print("Checking IDs...") +# message("Checking IDs...") # } # id_valid <- all(corrected_ped$ID == ped$ID) # } else { @@ -128,7 +138,7 @@ standardizeColnames <- function(df, verbose = FALSE) { # } # if (check_parents) { # if (verbose) { -# print("Checking parents...") +# message("Checking parents...") # } # dadID_valid <- all(corrected_ped$dadID == ped$dadID) # momID_valid <- all(corrected_ped$momID == ped$momID) @@ -138,7 +148,7 @@ standardizeColnames <- function(df, verbose = FALSE) { # } # if (check_sex) { # if (verbose) { -# print("Checking sex...") +# message("Checking sex...") # } # sex_valid <- all(corrected_ped$sex == ped$sex) # } else { @@ -171,7 +181,7 @@ standardizeColnames <- function(df, verbose = FALSE) { # } else if (is_valid) { # return(corrected_ped) # } else { -# print("Pedigree is not valid. Refer to the warnings for more details.") +# message("Pedigree is not valid. Refer to the warnings for more details.") # return(warnings) # } # } diff --git a/R/documentData.R b/R/documentData.R index cf589584..013f2c76 100644 --- a/R/documentData.R +++ b/R/documentData.R @@ -1,10 +1,10 @@ -##' Artificial pedigree data on eight families with inbreeding -##' -##' A dataset created purely from imagination that includes several types of inbreeding. -##' Different kinds of inbreeding occur in each extended family. -##' -##' The types of inbreeding are as follows: -##' +#' Artificial pedigree data on eight families with inbreeding +#' +#' A dataset created purely from imagination that includes several types of inbreeding. +#' Different kinds of inbreeding occur in each extended family. +#' +#' The types of inbreeding are as follows: +#' #' \itemize{ #' \item Extended Family 1: Sister wives - Children with the same father and different mothers who are sisters. #' \item Extended Family 2: Full siblings have children. @@ -15,131 +15,131 @@ #' \item Extended Family 7: Uncle-niece and Aunt-nephew have children. #' \item Extended Family 8: A father-son pairs has children with a corresponding mother-daughter pair. #' } -##' -##' Although not all of the above structures are technically inbreeding, they aim to test pedigree diagramming and path tracing algorithms. -##' -##' The variables are as follows: -##' -##' \itemize{ -##' \item \code{ID}: Person identification variable -##' \item \code{sex}: Sex of the ID: 1 is female; 0 is male -##' \item \code{dadID}: ID of the father -##' \item \code{momID}: ID of the mother -##' \item \code{FamID}: ID of the extended family -##' \item \code{Gen}: Generation of the person -##' \item \code{proband}: Always FALSE -##' } -##' -##' @docType data -##' @keywords datasets -##' @name inbreeding -##' @usage data(inbreeding) -##' @format A data frame (and ped object) with 134 rows and 7 variables +#' +#' Although not all of the above structures are technically inbreeding, they aim to test pedigree diagramming and path tracing algorithms. +#' +#' The variables are as follows: +#' +#' \itemize{ +#' \item \code{ID}: Person identification variable +#' \item \code{sex}: Sex of the ID: 1 is female; 0 is male +#' \item \code{dadID}: ID of the father +#' \item \code{momID}: ID of the mother +#' \item \code{FamID}: ID of the extended family +#' \item \code{Gen}: Generation of the person +#' \item \code{proband}: Always FALSE +#' } +#' +#' @docType data +#' @keywords datasets +#' @name inbreeding +#' @usage data(inbreeding) +#' @format A data frame (and ped object) with 134 rows and 7 variables NULL -##' Simulated pedigree with two extended families and an age-related hazard -##' -##' A dataset simulated to have an age-related hazard. -##' There are two extended families that are sampled from the same population. -##' -##' The variables are as follows: -##' -##' \itemize{ -##' \item \code{FamID}: ID of the extended family -##' \item \code{ID}: Person identification variable -##' \item \code{sex}: Sex of the ID: 1 is female; 0 is male -##' \item \code{dadID}: ID of the father -##' \item \code{momID}: ID of the mother -##' \item \code{affected}: logical. Whether the person is affected or not -##' \item \code{DA1}: Binary variable signifying the meaninglessness of life -##' \item \code{DA2}: Binary variable signifying the fundamental unknowability of existence -##' \item \code{birthYr}: Birth year for person -##' \item \code{onsetYr}: Year of onset for person -##' \item \code{deathYr}: Death year for person -##' \item \code{available}: logical. Whether -##' \item \code{Gen}: Generation of the person -##' \item \code{proband}: logical. Whether the person is a proband or not -##' } -##' -##' @docType data -##' @keywords datasets -##' @name hazard -##' @usage data(hazard) -##' @format A data frame with 43 rows and 14 variables +#' Simulated pedigree with two extended families and an age-related hazard +#' +#' A dataset simulated to have an age-related hazard. +#' There are two extended families that are sampled from the same population. +#' +#' The variables are as follows: +#' +#' \itemize{ +#' \item \code{FamID}: ID of the extended family +#' \item \code{ID}: Person identification variable +#' \item \code{sex}: Sex of the ID: 1 is female; 0 is male +#' \item \code{dadID}: ID of the father +#' \item \code{momID}: ID of the mother +#' \item \code{affected}: logical. Whether the person is affected or not +#' \item \code{DA1}: Binary variable signifying the meaninglessness of life +#' \item \code{DA2}: Binary variable signifying the fundamental unknowability of existence +#' \item \code{birthYr}: Birth year for person +#' \item \code{onsetYr}: Year of onset for person +#' \item \code{deathYr}: Death year for person +#' \item \code{available}: logical. Whether +#' \item \code{Gen}: Generation of the person +#' \item \code{proband}: logical. Whether the person is a proband or not +#' } +#' +#' @docType data +#' @keywords datasets +#' @name hazard +#' @usage data(hazard) +#' @format A data frame with 43 rows and 14 variables NULL -##' Fictional pedigree data on a wizarding family -##' -##' A dataset created purely from imagination that includes a subset of the Potter extended family. -##' -##' The variables are as follows: -##' -##' \itemize{ -##' \item \code{personID}: Person identification variable -##' \item \code{famID}: Family identification variable -##' \item \code{name}: Name of the person -##' \item \code{gen}: Generation of the person -##' \item \code{momID}: ID of the mother -##' \item \code{dadID}: ID of the father -##' \item \code{spouseID}: ID of the spouse -##' \item \code{sex}: Sex of the ID: 1 is male; 0 is female -##' -##' } -##' -##' IDs in the 100s \code{momID}s and \code{dadID}s are for people not in the dataset. -##' -##' @docType data -##' @keywords datasets -##' @name potter -##' @usage data(potter) -##' @format A data frame (and ped object) with 36 rows and 8 variables +#' Fictional pedigree data on a wizarding family +#' +#' A dataset created purely from imagination that includes a subset of the Potter extended family. +#' +#' The variables are as follows: +#' +#' \itemize{ +#' \item \code{personID}: Person identification variable +#' \item \code{famID}: Family identification variable +#' \item \code{name}: Name of the person +#' \item \code{gen}: Generation of the person +#' \item \code{momID}: ID of the mother +#' \item \code{dadID}: ID of the father +#' \item \code{spouseID}: ID of the spouse +#' \item \code{sex}: Sex of the ID: 1 is male; 0 is female +#' +#' } +#' +#' IDs in the 100s \code{momID}s and \code{dadID}s are for people not in the dataset. +#' +#' @docType data +#' @keywords datasets +#' @name potter +#' @usage data(potter) +#' @format A data frame (and ped object) with 36 rows and 8 variables NULL -##' Royal pedigree data from 1992 -##' -##' A dataset created by Denis Reid from the Royal Families of Europe. -##' -##' The variables are as follows: -##' id,momID,dadID,name,sex,birth_date,death_date,attribute_title -##' \itemize{ -##' \item \code{id}: Person identification variable -##' \item \code{momID}: ID of the mother -##' \item \code{dadID}: ID of the father -##' \item \code{name}: Name of the person -##' \item \code{sex}: Biological sex -##' \item \code{birth_date}: Date of birth -##' \item \code{death_date}: Date of death -##' \item \code{attribute_title}: Title of the person -##' -##' } -##' -##' -##' @docType data -##' @keywords datasets -##' @name royal92 -##' @usage data(royal92) -##' @format A data frame with 3110 observations +#' Royal pedigree data from 1992 +#' +#' A dataset created by Denis Reid from the Royal Families of Europe. +#' +#' The variables are as follows: +#' id,momID,dadID,name,sex,birth_date,death_date,attribute_title +#' \itemize{ +#' \item \code{id}: Person identification variable +#' \item \code{momID}: ID of the mother +#' \item \code{dadID}: ID of the father +#' \item \code{name}: Name of the person +#' \item \code{sex}: Biological sex +#' \item \code{birth_date}: Date of birth +#' \item \code{death_date}: Date of death +#' \item \code{attribute_title}: Title of the person +#' +#' } +#' +#' +#' @docType data +#' @keywords datasets +#' @name royal92 +#' @usage data(royal92) +#' @format A data frame with 3110 observations NULL -##' A song of ice and fire pedigree data -##' -##' A dataset created from the Song of Ice and Fire series by George R. R. Martin. Core data is from the [Westeros.org forum](https://asoiaf.westeros.org/index.php?/topic/88863-all-the-family-trees/). -##' -##' -##' -##' The variables are as follows: -##' \itemize{ -##' \item \code{id}: Person identification variable -##' \item \code{momID}: ID of the mother -##' \item \code{dadID}: ID of the father -##' \item \code{name}: Name of the person -##' \item \code{sex}: Biological sex -##' } -##' -##' @docType data -##' @keywords datasets -##' @name ASOIAF -##' @usage data(ASOIAF) -##' @format A data frame with 501 observations +#' A song of ice and fire pedigree data +#' +#' A dataset created from the Song of Ice and Fire series by George R. R. Martin. Core data is from the [Westeros.org forum](https://asoiaf.westeros.org/index.php?/topic/88863-all-the-family-trees/). +#' +#' +#' +#' The variables are as follows: +#' \itemize{ +#' \item \code{id}: Person identification variable +#' \item \code{momID}: ID of the mother +#' \item \code{dadID}: ID of the father +#' \item \code{name}: Name of the person +#' \item \code{sex}: Biological sex +#' } +#' +#' @docType data +#' @keywords datasets +#' @name ASOIAF +#' @usage data(ASOIAF) +#' @format A data frame with 501 observations NULL diff --git a/R/helpGeneric.R b/R/helpGeneric.R index 4e1d8ca9..a53fe9c9 100644 --- a/R/helpGeneric.R +++ b/R/helpGeneric.R @@ -97,7 +97,7 @@ Null <- function(M) { #' #' @export resample <- function(x, ...) { - # print(length(x)) + # message(length(x)) if (length(x) == 0) { return(NA_integer_) } diff --git a/R/insertEven.R b/R/insertEven.R index 573bd8c9..5308db7e 100644 --- a/R/insertEven.R +++ b/R/insertEven.R @@ -24,11 +24,11 @@ insertEven <- function(m, n, verbose = FALSE) { names(m)[i] <- ceiling(i * length(n) / length(m)) } if (verbose) { - print(m) + message(m) } names(n) <- seq_along(n) if (verbose) { - print(n) + message(n) } vec <- c(m, n) vec <- vec[order(as.numeric(names(vec)))] diff --git a/R/plotPedigree.R b/R/plotPedigree.R index f82be4bc..e4332ad5 100644 --- a/R/plotPedigree.R +++ b/R/plotPedigree.R @@ -88,7 +88,7 @@ plotPedigree <- function(ped, ) p3 <- p2["1"] if (verbose) { - print(p3) + message(p3) return(kinship2::plot.pedigree(p3, cex = cex, col = col, diff --git a/R/readGedcomlegacy.R b/R/readGedcomlegacy.R index 7a420a9c..6fafaa2e 100644 --- a/R/readGedcomlegacy.R +++ b/R/readGedcomlegacy.R @@ -60,12 +60,12 @@ # Checks if (!file.exists(file_path)) stop("File does not exist: ", file_path) if (verbose) { - print(paste("Reading file:", file_path)) + message(paste("Reading file:", file_path)) } file <- data.frame(X1 = readLines(file_path)) file_length <- nrow(file) if (verbose) { - print(paste0("File is ", file_length, " lines long")) + message(paste0("File is ", file_length, " lines long")) } # Count the number of rows containing specific patterns @@ -107,7 +107,7 @@ names(df_temp) <- all_var_names if (verbose) { - print("Parsing GEDCOM file") + message("Parsing GEDCOM file") } for (i in 1:length(file[1][[1]])) { tmpv <- file[1][[1]][[i]] @@ -292,7 +292,7 @@ df_temp <- df_temp[!is.na(df_temp$id), ] if (verbose) { - print(paste0("File has ", nrow(df_temp), " people")) + message(paste0("File has ", nrow(df_temp), " people")) } if (nrow(df_temp) == 0) { warning("No people found in file") @@ -304,7 +304,7 @@ if (post_process) { if (verbose) { - print("Post-processing data frame") + message("Post-processing data frame") } # Remove the first row (empty) df_temp <- .postProcessGedcom.legacy( @@ -335,7 +335,7 @@ # Add mom and dad ids if (add_parents) { if (verbose) { - print("Processing parents") + message("Processing parents") } df_temp <- .processParents.legacy(df_temp, datasource = "gedcom") } @@ -347,13 +347,13 @@ if (remove_empty_cols) { # Remove empty columns if (verbose) { - print("Removing empty columns") + message("Removing empty columns") } df_temp <- df_temp[, colSums(is.na(df_temp)) < nrow(df_temp)] } if (skinny) { if (verbose) { - print("Slimming down the data frame") + message("Slimming down the data frame") } df_temp <- df_temp[, colSums(is.na(df_temp)) < nrow(df_temp)] df_temp$FAMC <- NULL @@ -595,7 +595,7 @@ #' @keywords internal .collapseNames.legacy <- function(verbose, df_temp) { if (verbose) { - print("Combining Duplicate Columns") + message("Combining Duplicate Columns") } # need to check if any values aren't NA in name_given_pieces and name_surn_pieces # Combine `name_given` and `name_given_pieces` diff --git a/R/readWikifamilytree.R b/R/readWikifamilytree.R index 39f609ea..2e24eb83 100644 --- a/R/readWikifamilytree.R +++ b/R/readWikifamilytree.R @@ -17,12 +17,12 @@ readWikifamilytree <- function(text = NULL, verbose = FALSE, file_path = NULL, . if (!file.exists(file_path)) stop("File does not exist: ", file_path) if (verbose) { - print(paste("Reading file:", file_path)) + message(paste("Reading file:", file_path)) } file <- data.frame(X1 = readLines(file_path)) file_length <- nrow(file) if (verbose) { - print(paste0("File is ", file_length, " lines long")) + message(paste0("File is ", file_length, " lines long")) } text <- paste0(file$X1, collapse = "\n") } diff --git a/R/simulatePedigree.R b/R/simulatePedigree.R index 65f35ff8..bd36cca3 100644 --- a/R/simulatePedigree.R +++ b/R/simulatePedigree.R @@ -24,7 +24,7 @@ buildWithinGenerations <- function(sizeGens, marR, sexR, Ngen) { df_Ngen$sex <- determineSex(idGen = idGen, sexR = sexR) - # print(paste("tiger",i)) + # message(paste("tiger",i)) # The first generation if (i == 1) { df_Ngen$spID[1] <- df_Ngen$id[2] @@ -55,9 +55,9 @@ buildWithinGenerations <- function(sizeGens, marR, sexR, Ngen) { # sample single ids from male ids and female ids usedFemaleIds <- sample(df_Ngen$id[df_Ngen$sex == "F"], nSingleFemale) - ## print(c("Used F", usedFemaleIds)) + ## message(c("Used F", usedFemaleIds)) usedMaleIds <- sample(df_Ngen$id[df_Ngen$sex == "M"], nSingleMale) - ## print(c("Used M", usedMaleIds)) + ## message(c("Used M", usedMaleIds)) usedIds <- c(usedFemaleIds, usedMaleIds) @@ -97,7 +97,7 @@ buildWithinGenerations <- function(sizeGens, marR, sexR, Ngen) { } } } - # print(usedIds) + # message(usedIds) } } if (i == 1) { @@ -171,7 +171,7 @@ buildBetweenGenerations <- function(df_Fam, Ngen, sizeGens, verbose, marR, sexR, # Start to connect children with mother and father # if (verbose) { - print( + message( "Step 2.1: mark a group of potential sons and daughters in the i th generation" ) } @@ -199,7 +199,7 @@ buildBetweenGenerations <- function(df_Fam, Ngen, sizeGens, verbose, marR, sexR, CoupleF = CoupleF ) if (verbose) { - print( + message( "Step 2.2: mark a group of potential parents in the i-1 th generation" ) } @@ -230,7 +230,7 @@ buildBetweenGenerations <- function(df_Fam, Ngen, sizeGens, verbose, marR, sexR, df_Ngen <- df_Ngen[order(as.numeric(rownames(df_Ngen))), , drop = FALSE] df_Fam[df_Fam$gen == i - 1, ] <- df_Ngen if (verbose) { - print( + message( "Step 2.3: connect the i and i-1 th generation" ) } @@ -242,11 +242,11 @@ buildBetweenGenerations <- function(df_Fam, Ngen, sizeGens, verbose, marR, sexR, sizeI <- sizeGens[i - 1] sizeII <- sizeGens[i] # create a vector with ordered ids that should be connected to a parent - # print(df_Ngen) + # message(df_Ngen) IdSon <- df_Ngen$id[df_Ngen$ifson == TRUE & df_Ngen$gen == i] - # print(IdSon) + # message(IdSon) IdDau <- df_Ngen$id[df_Ngen$ifdau == TRUE & df_Ngen$gen == i] - # print(IdDau) + # message(IdDau) IdOfp <- evenInsert(IdSon, IdDau) # generate link kids to the couples @@ -286,7 +286,7 @@ buildBetweenGenerations <- function(df_Fam, Ngen, sizeGens, verbose, marR, sexR, ### making sure sampling out the single people instead of couples if (length(IdPa) - length(IdOfp) > 0) { if (verbose) { - print("length of IdPa", length(IdPa), "\n") + message("length of IdPa", length(IdPa), "\n") } IdRm <- sample.int(length(IdPa), size = length(IdPa) - length(IdOfp)) IdPa <- IdPa[-IdRm] @@ -307,17 +307,17 @@ buildBetweenGenerations <- function(df_Fam, Ngen, sizeGens, verbose, marR, sexR, # IdRm <- sample.int(length(IdOfp),size =length(IdOfp)-length(IdMa)) # IdOfp <- IdOfp[-IdRm] # } - # print(matrix(c(IdPa, IdMa), ncol = 2)) + # message(matrix(c(IdPa, IdMa), ncol = 2)) - # print(IdPa) - # print(IdOfp) + # message(IdPa) + # message(IdOfp) # put the IdMa and IdPa into the dfFam with correspondent OfpId for (m in seq_along(IdOfp)) { df_Ngen[df_Ngen$id == IdOfp[m], "pat"] <- IdPa[m] df_Ngen[df_Ngen$id == IdOfp[m], "mat"] <- IdMa[m] } - # print(df_Ngen) + # message(df_Ngen) df_Fam[df_Fam$gen == i, ] <- df_Ngen[df_Ngen$gen == i, ] df_Fam[df_Fam$gen == i - 1, ] <- df_Ngen[df_Ngen$gen == i - 1, ] } @@ -342,7 +342,7 @@ buildBetweenGenerations <- function(df_Fam, Ngen, sizeGens, verbose, marR, sexR, #' @param rd_kpc logical. If TRUE, the number of kids per mate will be randomly generated from a poisson distribution with mean kpc. If FALSE, the number of kids per mate will be fixed at kpc. #' @param balancedSex Not fully developed yet. Always \code{TRUE} in the current version. #' @param balancedMar Not fully developed yet. Always \code{TRUE} in the current version. -#' @param verbose logical If TRUE, print progress through stages of algorithm +#' @param verbose logical If TRUE, message progress through stages of algorithm #' @param ... Additional arguments to be passed to other functions. #' @return A \code{data.frame} with each row representing a simulated individual. The columns are as follows: @@ -372,7 +372,7 @@ simulatePedigree <- function(kpc = 3, sizeGens <- allGens(kpc = kpc, Ngen = Ngen, marR = marR) # famSizeIndex <- 1:sum(sizeGens) if (verbose) { - print( + message( "Step 1: Let's build the connection within each generation first" ) } @@ -381,7 +381,7 @@ simulatePedigree <- function(kpc = 3, Ngen = Ngen, sexR = sexR, marR = marR ) if (verbose) { - print( + message( "Step 2: Let's try to build connection between each two generations" ) } @@ -404,7 +404,7 @@ simulatePedigree <- function(kpc = 3, # df_Fam$sex[df_Fam$sex == "F"] <- "M" # df_Fam$sex[df_Fam$sex == "F1"] <- "F" # } - # print(df_Fam) + # message(df_Fam) return(df_Fam) } diff --git a/R/tweakPedigree.R b/R/tweakPedigree.R index 52b67a30..10034cd1 100644 --- a/R/tweakPedigree.R +++ b/R/tweakPedigree.R @@ -42,7 +42,7 @@ makeTwins <- function(ped, ID_twin1 = NA_integer_, ID_twin2 = NA_integer_, gen_t # cat("loop", i, "\n") # check if i is equal to the number of individuals in the generation usedID <- c(usedID, ID_twin1) - # print(usedID) + # message(usedID) if (i < idx) { # randomly select one individual from the generation ID_twin1 <- resample(ped$ID[ped$gen == gen_twin & !(ped$ID %in% usedID) & !is.na(ped$dadID)], 1) @@ -65,13 +65,13 @@ makeTwins <- function(ped, ID_twin1 = NA_integer_, ID_twin2 = NA_integer_, gen_t } else { # randomly select all males or females in the generation and put them in a vector selectGender <- ped$ID[ped$gen == gen_twin & ped$sex == resample(c("M", "F"), 1) & !is.na(ped$dadID) & !is.na(ped$momID)] - # print(selectGender) + # message(selectGender) if (length(selectGender) < 2) { stop("There are no available same-sex people in the generation to make twins") } # randomly select two individuals from the vector ID_DoubleTwin <- resample(selectGender, 2) - # print(ID_DoubleTwin) + # message(ID_DoubleTwin) # change the second person's dadID and momID to the first person's dadID and momID ped$dadID[ped$ID == ID_DoubleTwin[2]] <- ped$dadID[ped$ID == ID_DoubleTwin[1]] ped$momID[ped$ID == ID_DoubleTwin[2]] <- ped$momID[ped$ID == ID_DoubleTwin[1]] diff --git a/man/buildBetweenGenerations.Rd b/man/buildBetweenGenerations.Rd index 00b567f3..9ecf7b33 100644 --- a/man/buildBetweenGenerations.Rd +++ b/man/buildBetweenGenerations.Rd @@ -25,7 +25,7 @@ son status (ifson), and daughter status (ifdau), as well as couple IDs.} \item{sizeGens}{A numeric vector containing the sizes of each generation within the pedigree.} -\item{verbose}{logical If TRUE, print progress through stages of algorithm} +\item{verbose}{logical If TRUE, message progress through stages of algorithm} \item{marR}{Mating rate. A numeric value ranging from 0 to 1 which determines the proportion of mated (fertilized) couples in the pedigree within each generation. For instance, marR = 0.5 suggests 50 percent of the offspring in a specific generation will be mated and have their offspring.} diff --git a/man/simulatePedigree.Rd b/man/simulatePedigree.Rd index b7c28650..a9a27dc9 100644 --- a/man/simulatePedigree.Rd +++ b/man/simulatePedigree.Rd @@ -38,7 +38,7 @@ SimPed(...) \item{balancedMar}{Not fully developed yet. Always \code{TRUE} in the current version.} -\item{verbose}{logical If TRUE, print progress through stages of algorithm} +\item{verbose}{logical If TRUE, message progress through stages of algorithm} \item{...}{Additional arguments to be passed to other functions.} } diff --git a/man/standardizeColnames.Rd b/man/standardizeColnames.Rd index a6e38f3a..118108bf 100644 --- a/man/standardizeColnames.Rd +++ b/man/standardizeColnames.Rd @@ -4,12 +4,14 @@ \alias{standardizeColnames} \title{Standardize Column Names in a Dataframe (Internal)} \usage{ -standardizeColnames(df, verbose = FALSE) +standardizeColnames(df, verbose = FALSE, mapping = list()) } \arguments{ \item{df}{A dataframe whose column names need to be standardized.} \item{verbose}{A logical indicating whether to print progress messages.} + +\item{mapping}{A list of mapping options for customizing the renaming process.} } \value{ A dataframe with standardized column names. @@ -20,4 +22,3 @@ It utilizes regular expressions and the `tolower()` function to match column nam against a list of predefined standard names. The approach is case-insensitive and allows for flexible matching of column names. } -\keyword{internal} diff --git a/tests/testthat/test-checkIDs.R b/tests/testthat/test-checkIDs.R index 1b94b29a..70af0bdc 100644 --- a/tests/testthat/test-checkIDs.R +++ b/tests/testthat/test-checkIDs.R @@ -71,9 +71,9 @@ test_that("checkIDs verbose prints updates", { df <- ped2fam(potter, famID = "newFamID", personID = "personID") df_bound <- rbind(df, df[df$name == "Vernon Dursley", ]) expect_output(checkIDs(df, verbose = TRUE, repair = TRUE), - regexp = "Changes Made:\\nlist\\(\\)" + regexp = '\\[1\\] "Standardizing column names\\.\\.\\.' ) expect_output(checkIDs(df_bound, verbose = TRUE, repair = TRUE), - regexp = "Changes Made:\\n\\$ID1\\n\\[1\\] " + regexp = '\\[1\\] "Standardizing column names\\.\\.\\.' ) }) diff --git a/tests/testthat/test-insertEven.R b/tests/testthat/test-insertEven.R index 103d4bba..3f47d8eb 100644 --- a/tests/testthat/test-insertEven.R +++ b/tests/testthat/test-insertEven.R @@ -32,9 +32,7 @@ test_that("evenInsert inserts elements of m into n when m < n", { # Test Case 5: Verbose mode test_that("Verbose mode work for evenInsert", { # skip_on_cran(message = "Skipping test that only checks for verbose output") - expect_output(evenInsert(1:3, 4:6, verbose = TRUE), - regexp = "1 2 3 \\n1 2 3 \\n1 2 3 \\n4 5 6 " - ) + expect_message(evenInsert(1:3, 4:6, verbose = TRUE)) }) @@ -43,6 +41,5 @@ test_that("evenInsert handles vectors of equal length correctly", { result_observed <- evenInsert(1:3, 4:6) result_expected <- c(1, 4, 2, 5, 3, 6) - expect_equal(result_observed, result_expected) }) diff --git a/tests/testthat/test-simulatePedigree.R b/tests/testthat/test-simulatePedigree.R index 74c72935..c412532b 100644 --- a/tests/testthat/test-simulatePedigree.R +++ b/tests/testthat/test-simulatePedigree.R @@ -25,5 +25,5 @@ test_that("simulatePedigree verbose prints updates", { sexR <- .50 marR <- .7 - expect_output(simulatePedigree(kpc = kpc, Ngen = Ngen, sexR = sexR, marR = marR, verbose = TRUE), regexp = "Let's build the connection within each generation first") + expect_message(simulatePedigree(kpc = kpc, Ngen = Ngen, sexR = sexR, marR = marR, verbose = TRUE), regexp = "Let's build the connection within each generation first") }) diff --git a/vignettes/ASOIAF.Rmd b/vignettes/ASOIAF.Rmd index 00211864..977abea7 100644 --- a/vignettes/ASOIAF.Rmd +++ b/vignettes/ASOIAF.Rmd @@ -21,6 +21,8 @@ We begin by loading the required libraries and examining the structure of the bu ```{r echo=TRUE, message=FALSE, warning=FALSE} library(BGmisc) library(tidyverse) +library(ggpedigree) + data(ASOIAF) ``` @@ -137,7 +139,7 @@ Many real-world and fictional pedigrees contain individuals with unknown or part - Create "phantom" placeholders for the missing parent --Optionally repair and harmonize parent fields +- Optionally repair and harmonize parent fields To facilitate plotting, we check for individuals with one known parent but a missing other. For those cases, we assign a placeholder ID to the missing parent. @@ -166,4 +168,9 @@ We can now visualize the repaired pedigree using the `plotPedigree()` function. ```{r, message=FALSE, warning=FALSE} plotPedigree(df_repaired, affected = df_repaired$affected, verbose = FALSE) + +ggPedigree(df_repaired, status_col = "affected", personID_col = "ID", code_male = "M", + config = list(unaffected = 0,affected = 1, + ped_width=15)) + ``` diff --git a/vignettes/partial.Rmd b/vignettes/partial.Rmd index 3e4b11fb..450c895e 100644 --- a/vignettes/partial.Rmd +++ b/vignettes/partial.Rmd @@ -204,12 +204,14 @@ df$dadID[df$ID == 4] <- NA ped_add_partial_dad <- ped_add_partial <- ped2com(df, isChild_method = "partialparent", component = "additive", - adjacency_method = "direct" + adjacency_method = "direct", + sparse = FALSE ) ped_add_classic_dad <- ped_add_classic <- ped2com(df, isChild_method = "classic", - component = "additive", adjacency_method = "direct" + component = "additive", adjacency_method = "direct", + sparse = FALSE ) ``` @@ -318,13 +320,15 @@ for (i in 1:length(famIDs)) { ped_add_partial_complete <- ped2com(df_fam, isChild_method = "partialparent", component = "additive", - adjacency_method = "direct" + adjacency_method = "direct", + sparse = FALSE ) ped_add_classic_complete <- ped2com(df_fam, isChild_method = "classic", component = "additive", - adjacency_method = "direct" + adjacency_method = "direct", + sparse = FALSE ) @@ -339,11 +343,13 @@ for (i in 1:length(famIDs)) { ped_add_partial_dad <- ped2com(df_fam_dad, isChild_method = "partialparent", component = "additive", - adjacency_method = "direct" + adjacency_method = "direct", + sparse = FALSE ) ped_add_classic_dad <- ped2com(df_fam_dad, isChild_method = "classic", - component = "additive", adjacency_method = "direct" + component = "additive", adjacency_method = "direct", + sparse = FALSE ) results$RMSE_partial_dad[i] <- sqrt(mean((ped_add_classic_complete - ped_add_partial_dad)^2)) @@ -355,12 +361,14 @@ for (i in 1:length(famIDs)) { ped_add_partial_mom <- ped2com(df_fam_mom, isChild_method = "partialparent", component = "additive", - adjacency_method = "direct" + adjacency_method = "direct", + sparse = FALSE ) ped_add_classic_mom <- ped2com(df_fam_mom, isChild_method = "classic", - component = "additive", adjacency_method = "direct" + component = "additive", adjacency_method = "direct", + sparse = FALSE ) results$RMSE_partial_mom[i] <- sqrt(mean((ped_add_classic_complete - ped_add_partial_mom)^2))