# R notebook to create the field name list
Author: Dat Tran

Last updated: 4/Feb/2025

The field_list_column was extracted from columns name on our downloaded ukbiobank file

### Set up
Install necessary packages:

In [1]:
pkg <- c("data.table", "dplyr", "stringr")
# Check if packages are not installed and assign the
# names of the packages not installed to the variable new.pkg
new.pkg <- pkg[!(pkg %in% installed.packages())]
# If there are any packages in the list that aren't installed,
# install them
if (length(new.pkg)) {
  install.packages(new.pkg, repos = "http://cran.rstudio.com")
}
# Download the field list codes
system("dx download 'code/field_number.csv'")
field_selected <- read.csv("field_number.csv", header = TRUE)[[1]]

Function to get our dataset id:

In [13]:
# Get dataset ID
get_dataset_id <- function() {
  project <- Sys.getenv("DX_PROJECT_CONTEXT_ID")
  record <- system("dx find data --type Dataset --delimiter ',' | awk -F ',' '{print $5}'", intern = TRUE)
  dataset <- list()
  dataset$id <- paste0(project, ":", record)
  dataset$prefix <- stringr::str_sub(system("dx find data --type Dataset --delimiter ',' | awk -F ',' '{print $4}'", intern = TRUE), 2)
  return(dataset)
}
dataset <- get_dataset_id()

# Get dictionnary
if (!file.exists(paste0(dataset$prefix, '.data_dictionary.csv'))) {
  system(paste0("dx extract_dataset ", dataset$id, " -ddd"), intern = TRUE)
}
datadict <- data.table::fread(paste0(dataset$prefix,".data_dictionary.csv"))

# Create the dictionnary
category_schema <- data.table::fread('/mnt/project/Showcase metadata/category.tsv')
field_schema <- data.table::fread('/mnt/project/Showcase metadata/field.tsv')

### Check availability of selected field in our dataset

In [15]:
# Check availability 
for (cat in field_selected) {
  if (!cat %in% field_schema$field_id) {
    stop(paste0(cat, ' is not a valid UK Biobank category.'))
  }
}

In [26]:
#Extract UKB-RAP column name
fields_for_id <- function(field) {
  regex <- paste0('^p', field, '(?![0-9])')
  fields <- dplyr::filter(datadict, stringr::str_detect(name, regex)) |>
    dplyr::pull(name)
  return(fields)
}
all_columns <- c('eid', unlist(lapply(field_selected, fields_for_id)))
write.table(all_columns, paste0('table_exporter_fields.txt'), row.names = F, col.names = F, quote = F)

### Upload back to the DNAnexus 

In [28]:
system("dx upload table_exporter_fields.txt --path /code/table_exporter_fields.txt")