R-Computing-Lab · smasongarrison · May 7, 2025 · May 18, 2025 · May 18, 2025
diff --git a/NAMESPACE b/NAMESPACE
@@ -42,6 +42,7 @@ export(related_coef)
 export(repairSex)
 export(resample)
 export(simulatePedigree)
+export(standardizeColnames)
 export(summariseFamilies)
 export(summariseMatrilines)
 export(summarisePatrilines)

diff --git a/R/cleanPedigree.R b/R/cleanPedigree.R
@@ -7,23 +7,33 @@
 #'
 #' @param df A dataframe whose column names need to be standardized.
 #' @param verbose A logical indicating whether to print progress messages.
+#' @param mapping A list of mapping options for customizing the renaming process.
 #' @return A dataframe with standardized column names.
 #'
-#' @keywords internal
-standardizeColnames <- function(df, verbose = FALSE) {
+#' @export
+standardizeColnames <- function(df, verbose = FALSE, mapping = list()) {
   # Internal mapping of standardized names to possible variants
-  mapping <- list(
-    "famID" = "^(?:fam(?:ily)?[\\.\\-_]?(?:id)?)",
-    "ID" = "^(?:i(?:d$|ndiv(?:idual)?)|p(?:erson)?[\\.\\-_]?id)",
-    "gen" = "^(?:gen(?:s|eration)?)",
-    "dadID" = "^(?:d(?:ad)?id|paid|fatherid|pid[\\.\\-_]?fath[er]*|sire)",
-    "patID" = "^(?:dat[\\.\\-_]?id|pat[\\.\\-_]?id|paternal[\\.\\-_]?(?:id)?)",
-    "momID" = "^(?:m(?:om|a|other)?[\\.\\-_]?id|pid[\\.\\-_]?moth[er]*|dame)",
-    "matID" = "^(?:mat[\\.\\-_]?id|maternal[\\.\\-_]?(?:id)?)",
-    "spID" = "^(?:s(?:pt)?id|spouse[\\.\\-_]?(?:id)?|partner[\\.\\-_]?(?:id)?|husb(?:and)?[\\.\\-_]?id|wife[\\.\\-_]?(?:id)?|pid[\\.\\-_]?spouse1?)",
-    "twinID" = "^(?:twin[\\.\\-_]?(?:id)?)",
-    "sex" = "^(?:sex|gender|female|m(?:a(?:le|n)|en)|wom[ae]n)"
-  )
+
+    # default config
+    default_mapping <- list(
+      "famID" = "^(?:fam(?:ily)?[\\.\\-_]?(?:id)?)",
+      "ID" = "^(?:i(?:d$|ndiv(?:idual)?)|p(?:erson)?[\\.\\-_]?id)",
+      "gen" = "^(?:gen(?:s|eration)?)",
+      "dadID" = "^(?:d(?:ad)?id|paid|fatherid|pid[\\.\\-_]?fath[er]*|sire)",
+      "patID" = "^(?:dat[\\.\\-_]?id|pat[\\.\\-_]?id|paternal[\\.\\-_]?(?:id)?)",
+      "momID" = "^(?:m(?:om|a|other)?[\\.\\-_]?id|pid[\\.\\-_]?moth[er]*|dame)",
+      "matID" = "^(?:mat[\\.\\-_]?id|maternal[\\.\\-_]?(?:id)?)",
+      "spID" = "^(?:s(?:pt)?id|spouse[\\.\\-_]?(?:id)?|partner[\\.\\-_]?(?:id)?|husb(?:and)?[\\.\\-_]?id|wife[\\.\\-_]?(?:id)?|pid[\\.\\-_]?spouse1?)",
+      "twinID" = "^(?:twin[\\.\\-_]?(?:id)?)",
+      "sex" = "^(?:sex|gender|female|m(?:a(?:le|n)|en)|wom[ae]n)"
+    )
+
+    # Add fill in default_config values to config if config doesn't already have them
+
+  mapping <- utils::modifyList(default_mapping, mapping)
+
+
+
   if (verbose) {
     print("Standardizing column names...")
   }

diff --git a/R/documentData.R b/R/documentData.R
@@ -1,10 +1,10 @@
-##' Artificial pedigree data on eight families with inbreeding
-##'
-##' A dataset created purely from imagination that includes several types of inbreeding.
-##' Different kinds of inbreeding occur in each extended family.
-##'
-##' The types of inbreeding are as follows:
-##'
+#' Artificial pedigree data on eight families with inbreeding
+#'
+#' A dataset created purely from imagination that includes several types of inbreeding.
+#' Different kinds of inbreeding occur in each extended family.
+#'
+#' The types of inbreeding are as follows:
+#'
 #' \itemize{
 #'     \item Extended Family 1: Sister wives - Children with the same father and different mothers who are sisters.
 #'     \item Extended Family 2: Full siblings have children.
@@ -15,131 +15,131 @@
 #'     \item Extended Family 7: Uncle-niece and Aunt-nephew have children.
 #'     \item Extended Family 8: A father-son pairs has children with a corresponding mother-daughter pair.
 #' }
-##'
-##' Although not all of the above structures are technically inbreeding, they aim to test pedigree diagramming and path tracing algorithms.
-##'
-##' The variables are as follows:
-##'
-##' \itemize{
-##'   \item \code{ID}:  Person identification variable
-##'   \item \code{sex}:  Sex of the ID: 1 is female; 0 is male
-##'   \item \code{dadID}:  ID of the father
-##'   \item \code{momID}:  ID of the mother
-##'   \item \code{FamID}:  ID of the extended family
-##'   \item \code{Gen}:  Generation of the person
-##'   \item \code{proband}:  Always FALSE
-##' }
-##'
-##' @docType data
-##' @keywords datasets
-##' @name inbreeding
-##' @usage data(inbreeding)
-##' @format A data frame (and ped object) with 134 rows and 7 variables
+#'
+#' Although not all of the above structures are technically inbreeding, they aim to test pedigree diagramming and path tracing algorithms.
+#'
+#' The variables are as follows:
+#'
+#' \itemize{
+#'   \item \code{ID}:  Person identification variable
+#'   \item \code{sex}:  Sex of the ID: 1 is female; 0 is male
+#'   \item \code{dadID}:  ID of the father
+#'   \item \code{momID}:  ID of the mother
+#'   \item \code{FamID}:  ID of the extended family
+#'   \item \code{Gen}:  Generation of the person
+#'   \item \code{proband}:  Always FALSE
+#' }
+#'
+#' @docType data
+#' @keywords datasets
+#' @name inbreeding
+#' @usage data(inbreeding)
+#' @format A data frame (and ped object) with 134 rows and 7 variables
 NULL
 
-##' Simulated pedigree with two extended families and an age-related hazard
-##'
-##' A dataset simulated to have an age-related hazard.
-##' There are two extended families that are sampled from the same population.
-##'
-##' The variables are as follows:
-##'
-##' \itemize{
-##'   \item \code{FamID}:  ID of the extended family
-##'   \item \code{ID}:  Person identification variable
-##'   \item \code{sex}:  Sex of the ID: 1 is female; 0 is male
-##'   \item \code{dadID}:  ID of the father
-##'   \item \code{momID}:  ID of the mother
-##'   \item \code{affected}:  logical.  Whether the person is affected or not
-##'   \item \code{DA1}:  Binary variable signifying the meaninglessness of life
-##'   \item \code{DA2}:  Binary variable signifying the fundamental unknowability of existence
-##'   \item \code{birthYr}:  Birth year for person
-##'   \item \code{onsetYr}:  Year of onset for person
-##'   \item \code{deathYr}:  Death year for person
-##'   \item \code{available}:  logical.  Whether
-##'   \item \code{Gen}:  Generation of the person
-##'   \item \code{proband}:  logical.  Whether the person is a proband or not
-##' }
-##'
-##' @docType data
-##' @keywords datasets
-##' @name hazard
-##' @usage data(hazard)
-##' @format A data frame with 43 rows and 14 variables
+#' Simulated pedigree with two extended families and an age-related hazard
+#'
+#' A dataset simulated to have an age-related hazard.
+#' There are two extended families that are sampled from the same population.
+#'
+#' The variables are as follows:
+#'
+#' \itemize{
+#'   \item \code{FamID}:  ID of the extended family
+#'   \item \code{ID}:  Person identification variable
+#'   \item \code{sex}:  Sex of the ID: 1 is female; 0 is male
+#'   \item \code{dadID}:  ID of the father
+#'   \item \code{momID}:  ID of the mother
+#'   \item \code{affected}:  logical.  Whether the person is affected or not
+#'   \item \code{DA1}:  Binary variable signifying the meaninglessness of life
+#'   \item \code{DA2}:  Binary variable signifying the fundamental unknowability of existence
+#'   \item \code{birthYr}:  Birth year for person
+#'   \item \code{onsetYr}:  Year of onset for person
+#'   \item \code{deathYr}:  Death year for person
+#'   \item \code{available}:  logical.  Whether
+#'   \item \code{Gen}:  Generation of the person
+#'   \item \code{proband}:  logical.  Whether the person is a proband or not
+#' }
+#'
+#' @docType data
+#' @keywords datasets
+#' @name hazard
+#' @usage data(hazard)
+#' @format A data frame with 43 rows and 14 variables
 NULL
 
-##' Fictional pedigree data on a wizarding family
-##'
-##' A dataset created purely from imagination that includes a subset of the Potter extended family.
-##'
-##' The variables are as follows:
-##'
-##' \itemize{
-##'   \item \code{personID}:  Person identification variable
-##'   \item \code{famID}: Family identification variable
-##'   \item \code{name}:  Name of the person
-##'   \item \code{gen}: Generation of the person
-##'   \item \code{momID}:  ID of the mother
-##'   \item \code{dadID}:  ID of the father
-##'   \item \code{spouseID}: ID of the spouse
-##'   \item \code{sex}:  Sex of the ID: 1 is male; 0 is female
-##'
-##' }
-##'
-##' IDs in the 100s \code{momID}s and \code{dadID}s are for people not in the dataset.
-##'
-##' @docType data
-##' @keywords datasets
-##' @name potter
-##' @usage data(potter)
-##' @format A data frame (and ped object) with 36 rows and 8 variables
+#' Fictional pedigree data on a wizarding family
+#'
+#' A dataset created purely from imagination that includes a subset of the Potter extended family.
+#'
+#' The variables are as follows:
+#'
+#' \itemize{
+#'   \item \code{personID}:  Person identification variable
+#'   \item \code{famID}: Family identification variable
+#'   \item \code{name}:  Name of the person
+#'   \item \code{gen}: Generation of the person
+#'   \item \code{momID}:  ID of the mother
+#'   \item \code{dadID}:  ID of the father
+#'   \item \code{spouseID}: ID of the spouse
+#'   \item \code{sex}:  Sex of the ID: 1 is male; 0 is female
+#'
+#' }
+#'
+#' IDs in the 100s \code{momID}s and \code{dadID}s are for people not in the dataset.
+#'
+#' @docType data
+#' @keywords datasets
+#' @name potter
+#' @usage data(potter)
+#' @format A data frame (and ped object) with 36 rows and 8 variables
 NULL
 
 
-##' Royal pedigree data from 1992
-##'
-##' A dataset created by Denis Reid from the Royal Families of Europe.
-##'
-##' The variables are as follows:
-##' id,momID,dadID,name,sex,birth_date,death_date,attribute_title
-##' \itemize{
-##'   \item \code{id}:  Person identification variable
-##'   \item \code{momID}:  ID of the mother
-##'   \item \code{dadID}:  ID of the father
-##'   \item \code{name}:  Name of the person
-##'   \item \code{sex}: Biological sex
-##'   \item \code{birth_date}:  Date of birth
-##'   \item \code{death_date}:  Date of death
-##'   \item \code{attribute_title}:  Title of the person
-##'
-##' }
-##'
-##'
-##' @docType data
-##' @keywords datasets
-##' @name royal92
-##' @usage data(royal92)
-##' @format A data frame with 3110 observations
+#' Royal pedigree data from 1992
+#'
+#' A dataset created by Denis Reid from the Royal Families of Europe.
+#'
+#' The variables are as follows:
+#' id,momID,dadID,name,sex,birth_date,death_date,attribute_title
+#' \itemize{
+#'   \item \code{id}:  Person identification variable
+#'   \item \code{momID}:  ID of the mother
+#'   \item \code{dadID}:  ID of the father
+#'   \item \code{name}:  Name of the person
+#'   \item \code{sex}: Biological sex
+#'   \item \code{birth_date}:  Date of birth
+#'   \item \code{death_date}:  Date of death
+#'   \item \code{attribute_title}:  Title of the person
+#'
+#' }
+#'
+#'
+#' @docType data
+#' @keywords datasets
+#' @name royal92
+#' @usage data(royal92)
+#' @format A data frame with 3110 observations
 NULL
 
-##' A song of ice and fire pedigree data
-##'
-##' A dataset created from the Song of Ice and Fire series by George R. R. Martin. Core data is from the [Westeros.org forum](https://asoiaf.westeros.org/index.php?/topic/88863-all-the-family-trees/).
-##'
-##'
-##'
-##' The variables are as follows:
-##' \itemize{
-##'   \item \code{id}:  Person identification variable
-##'   \item \code{momID}:  ID of the mother
-##'   \item \code{dadID}:  ID of the father
-##'   \item \code{name}:  Name of the person
-##'   \item \code{sex}: Biological sex
-##'   }
-##'
-##' @docType data
-##' @keywords datasets
-##' @name ASOIAF
-##' @usage data(ASOIAF)
-##' @format A data frame with 501 observations
+#' A song of ice and fire pedigree data
+#'
+#' A dataset created from the Song of Ice and Fire series by George R. R. Martin. Core data is from the [Westeros.org forum](https://asoiaf.westeros.org/index.php?/topic/88863-all-the-family-trees/).
+#'
+#'
+#'
+#' The variables are as follows:
+#' \itemize{
+#'   \item \code{id}:  Person identification variable
+#'   \item \code{momID}:  ID of the mother
+#'   \item \code{dadID}:  ID of the father
+#'   \item \code{name}:  Name of the person
+#'   \item \code{sex}: Biological sex
+#'   }
+#'
+#' @docType data
+#' @keywords datasets
+#' @name ASOIAF
+#' @usage data(ASOIAF)
+#' @format A data frame with 501 observations
 NULL
diff --git a/man/standardizeColnames.Rd b/man/standardizeColnames.Rd
diff --git a/vignettes/ASOIAF.Rmd b/vignettes/ASOIAF.Rmd
@@ -21,6 +21,8 @@ We begin by loading the required libraries and examining the structure of the bu
 ```{r echo=TRUE, message=FALSE, warning=FALSE}
 library(BGmisc)
 library(tidyverse)
+library(ggpedigree)
+
 data(ASOIAF)
 ```
 
@@ -137,7 +139,7 @@ Many real-world and fictional pedigrees contain individuals with unknown or part
 
 - Create "phantom" placeholders for the missing parent
 
--Optionally repair and harmonize parent fields
+- Optionally repair and harmonize parent fields
 
 To facilitate plotting, we check for individuals with one known parent but a missing other. For those cases, we assign a placeholder ID to the missing parent.
 
@@ -166,4 +168,9 @@ We can now visualize the repaired pedigree using the `plotPedigree()` function.
 
 ```{r, message=FALSE, warning=FALSE}
 plotPedigree(df_repaired, affected = df_repaired$affected, verbose = FALSE)
+
+ggPedigree(df_repaired,  status_col = "affected",  personID_col = "ID",  code_male = "M", 
+           config = list(unaffected =  0,affected = 1,
+                         ped_width=15))
+
 ```