In [1]:
library(dplyr)
library(readxl)

# make sure to change the output directory to your own
output_dir <- "../dwc/marino89"
dir.create(output_dir)

# limit number of rows in notebook output
options(repr.matrix.max.rows = 10, repr.matrix.max.cols = 20)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [2]:
list.files("../dataset", full.names = "TRUE")

In [3]:
seqtab <- read.table("../dataset/seqtab.txt", sep = "\t", header = TRUE)
seqtab

asv,EE0493,EE0495
<chr>,<int>,<int>
asv.1,0,0
asv.2,14,2447
asv.3,0,0
asv.4,0,0
asv.5,40587,1857
⋮,⋮,⋮
asv.16981,0,0
asv.16982,0,0
asv.16983,0,0
asv.16984,0,0


In [4]:
taxonomy <- read.table("../dataset/taxonomy.txt", sep = "\t", header = TRUE)
taxonomy

asv,taxonomy
<chr>,<chr>
asv.1,Eukaryota
asv.2,Clausocalanus_furcatus
asv.3,Eurotatoria
asv.4,Arthropoda
asv.5,Eukaryota
⋮,⋮
asv.16981,Metazoa
asv.16982,Metazoa
asv.16983,Metazoa
asv.16984,Eukaryota


In [5]:
samples <- read_excel("../dataset/samples.xlsx")
samples

name,size,event_begin,area_name,area_longitude,area_latitude,area_uncertainty,parent_area_name,dna,depth,temperature
<chr>,<dbl>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>
EE0493,1450,24/04/2023,Ile esprit,46.22536,9.42518,20,Aldabra Atoll,7.23,10,26.3
EE0495,1500,02/04/2023,Settlement beach,46.20605,9.400901,98,Aldabra Atoll,15.83,12,25.1


In [6]:
event <- samples %>%
    select(
        eventID = name,
        materialSampleID = name,
        eventDate = event_begin,
        locality = area_name,
        decimalLongitude = area_longitude,
        decimalLatitude = area_latitude,
        coordinateUncertaintyInMeters = area_uncertainty,
        higherGeography = parent_area_name,
        minimumDepthInMeters = depth,
        maximumDepthInMeters = depth,
        sampleSizeValue = size,
        dna,
        temperature
    ) %>%
    mutate(sampleSizeUnit = "ml")
event

eventID,materialSampleID,eventDate,locality,decimalLongitude,decimalLatitude,coordinateUncertaintyInMeters,higherGeography,minimumDepthInMeters,maximumDepthInMeters,sampleSizeValue,dna,temperature,sampleSizeUnit
<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
EE0493,EE0493,24/04/2023,Ile esprit,46.22536,9.42518,20,Aldabra Atoll,10,10,1450,7.23,26.3,ml
EE0495,EE0495,02/04/2023,Settlement beach,46.20605,9.400901,98,Aldabra Atoll,12,12,1500,15.83,25.1,ml


In [7]:
library(tidyr)

occurrence <- seqtab %>%
    gather(eventID, organismQuantity, 2:3) %>%
    filter(organismQuantity > 0) %>%
    mutate(
        occurrenceID = paste0(eventID, "_", asv),
        organismQuantityType = "sequence reads"
    )
occurrence

asv,eventID,organismQuantity,occurrenceID,organismQuantityType
<chr>,<chr>,<int>,<chr>,<chr>
asv.2,EE0493,14,EE0493_asv.2,sequence reads
asv.5,EE0493,40587,EE0493_asv.5,sequence reads
asv.6,EE0493,7,EE0493_asv.6,sequence reads
asv.7,EE0493,29367,EE0493_asv.7,sequence reads
asv.8,EE0493,72378,EE0493_asv.8,sequence reads
⋮,⋮,⋮,⋮,⋮
asv.16949,EE0495,1,EE0495_asv.16949,sequence reads
asv.16958,EE0495,1,EE0495_asv.16958,sequence reads
asv.16961,EE0495,1,EE0495_asv.16961,sequence reads
asv.16962,EE0495,1,EE0495_asv.16962,sequence reads


In [8]:
taxonomy <- taxonomy %>%
    select(asv, verbatimIdentification = taxonomy)
     

In [9]:
occurrence <- occurrence %>%
    left_join(taxonomy, by = "asv")
occurrence

asv,eventID,organismQuantity,occurrenceID,organismQuantityType,verbatimIdentification
<chr>,<chr>,<int>,<chr>,<chr>,<chr>
asv.2,EE0493,14,EE0493_asv.2,sequence reads,Clausocalanus_furcatus
asv.5,EE0493,40587,EE0493_asv.5,sequence reads,Eukaryota
asv.6,EE0493,7,EE0493_asv.6,sequence reads,Farranula_gibbula
asv.7,EE0493,29367,EE0493_asv.7,sequence reads,Eukaryota
asv.8,EE0493,72378,EE0493_asv.8,sequence reads,Metazoa
⋮,⋮,⋮,⋮,⋮,⋮
asv.16949,EE0495,1,EE0495_asv.16949,sequence reads,Metazoa
asv.16958,EE0495,1,EE0495_asv.16958,sequence reads,Eukaryota
asv.16961,EE0495,1,EE0495_asv.16961,sequence reads,Eukaryota
asv.16962,EE0495,1,EE0495_asv.16962,sequence reads,Mantoniella_squamata


In [10]:
occurrence <- event %>%
    left_join(occurrence, by = "eventID")
occurrence

eventID,materialSampleID,eventDate,locality,decimalLongitude,decimalLatitude,coordinateUncertaintyInMeters,higherGeography,minimumDepthInMeters,maximumDepthInMeters,sampleSizeValue,dna,temperature,sampleSizeUnit,asv,organismQuantity,occurrenceID,organismQuantityType,verbatimIdentification
<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<int>,<chr>,<chr>,<chr>
EE0493,EE0493,24/04/2023,Ile esprit,46.22536,9.42518,20,Aldabra Atoll,10,10,1450,7.23,26.3,ml,asv.2,14,EE0493_asv.2,sequence reads,Clausocalanus_furcatus
EE0493,EE0493,24/04/2023,Ile esprit,46.22536,9.42518,20,Aldabra Atoll,10,10,1450,7.23,26.3,ml,asv.5,40587,EE0493_asv.5,sequence reads,Eukaryota
EE0493,EE0493,24/04/2023,Ile esprit,46.22536,9.42518,20,Aldabra Atoll,10,10,1450,7.23,26.3,ml,asv.6,7,EE0493_asv.6,sequence reads,Farranula_gibbula
EE0493,EE0493,24/04/2023,Ile esprit,46.22536,9.42518,20,Aldabra Atoll,10,10,1450,7.23,26.3,ml,asv.7,29367,EE0493_asv.7,sequence reads,Eukaryota
EE0493,EE0493,24/04/2023,Ile esprit,46.22536,9.42518,20,Aldabra Atoll,10,10,1450,7.23,26.3,ml,asv.8,72378,EE0493_asv.8,sequence reads,Metazoa
⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮
EE0495,EE0495,02/04/2023,Settlement beach,46.20605,9.400901,98,Aldabra Atoll,12,12,1500,15.83,25.1,ml,asv.16949,1,EE0495_asv.16949,sequence reads,Metazoa
EE0495,EE0495,02/04/2023,Settlement beach,46.20605,9.400901,98,Aldabra Atoll,12,12,1500,15.83,25.1,ml,asv.16958,1,EE0495_asv.16958,sequence reads,Eukaryota
EE0495,EE0495,02/04/2023,Settlement beach,46.20605,9.400901,98,Aldabra Atoll,12,12,1500,15.83,25.1,ml,asv.16961,1,EE0495_asv.16961,sequence reads,Eukaryota
EE0495,EE0495,02/04/2023,Settlement beach,46.20605,9.400901,98,Aldabra Atoll,12,12,1500,15.83,25.1,ml,asv.16962,1,EE0495_asv.16962,sequence reads,Mantoniella_squamata


In [11]:
occurrence$samplingProtocol <- "https://github.com/BeBOP-OBON/UNESCO_protocol_collection"

In [12]:
taxon_names <- stringr::str_replace(occurrence$verbatimIdentification, "_", " ")

In [13]:
matched <- obistools::match_taxa(taxon_names, ask = FALSE) %>%
    select(scientificName, scientificNameID)

matched

433 names, 0 without matches, 10 with multiple matches



Unnamed: 0_level_0,scientificName,scientificNameID
Unnamed: 0_level_1,<chr>,<chr>
106,Clausocalanus furcatus,urn:lsid:marinespecies.org:taxname:104503
163,,
168,Farranula gibbula,urn:lsid:marinespecies.org:taxname:346477
163.1,,
265,Metazoa,urn:lsid:marinespecies.org:taxname:1486573
⋮,⋮,⋮
265.1833,Metazoa,urn:lsid:marinespecies.org:taxname:1486573
163.8662,,
163.8663,,
262.3,Mantoniella squamata,urn:lsid:marinespecies.org:taxname:134563


In [14]:
occurrence <- bind_cols(occurrence, matched)
occurrence

Unnamed: 0_level_0,eventID,materialSampleID,eventDate,locality,decimalLongitude,decimalLatitude,coordinateUncertaintyInMeters,higherGeography,minimumDepthInMeters,maximumDepthInMeters,⋯,temperature,sampleSizeUnit,asv,organismQuantity,occurrenceID,organismQuantityType,verbatimIdentification,samplingProtocol,scientificName,scientificNameID
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,⋯,<dbl>,<chr>,<chr>,<int>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
106,EE0493,EE0493,24/04/2023,Ile esprit,46.22536,9.42518,20,Aldabra Atoll,10,10,⋯,26.3,ml,asv.2,14,EE0493_asv.2,sequence reads,Clausocalanus_furcatus,https://github.com/BeBOP-OBON/UNESCO_protocol_collection,Clausocalanus furcatus,urn:lsid:marinespecies.org:taxname:104503
163,EE0493,EE0493,24/04/2023,Ile esprit,46.22536,9.42518,20,Aldabra Atoll,10,10,⋯,26.3,ml,asv.5,40587,EE0493_asv.5,sequence reads,Eukaryota,https://github.com/BeBOP-OBON/UNESCO_protocol_collection,,
168,EE0493,EE0493,24/04/2023,Ile esprit,46.22536,9.42518,20,Aldabra Atoll,10,10,⋯,26.3,ml,asv.6,7,EE0493_asv.6,sequence reads,Farranula_gibbula,https://github.com/BeBOP-OBON/UNESCO_protocol_collection,Farranula gibbula,urn:lsid:marinespecies.org:taxname:346477
163.1,EE0493,EE0493,24/04/2023,Ile esprit,46.22536,9.42518,20,Aldabra Atoll,10,10,⋯,26.3,ml,asv.7,29367,EE0493_asv.7,sequence reads,Eukaryota,https://github.com/BeBOP-OBON/UNESCO_protocol_collection,,
265,EE0493,EE0493,24/04/2023,Ile esprit,46.22536,9.42518,20,Aldabra Atoll,10,10,⋯,26.3,ml,asv.8,72378,EE0493_asv.8,sequence reads,Metazoa,https://github.com/BeBOP-OBON/UNESCO_protocol_collection,Metazoa,urn:lsid:marinespecies.org:taxname:1486573
⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋱,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮
265.1833,EE0495,EE0495,02/04/2023,Settlement beach,46.20605,9.400901,98,Aldabra Atoll,12,12,⋯,25.1,ml,asv.16949,1,EE0495_asv.16949,sequence reads,Metazoa,https://github.com/BeBOP-OBON/UNESCO_protocol_collection,Metazoa,urn:lsid:marinespecies.org:taxname:1486573
163.8662,EE0495,EE0495,02/04/2023,Settlement beach,46.20605,9.400901,98,Aldabra Atoll,12,12,⋯,25.1,ml,asv.16958,1,EE0495_asv.16958,sequence reads,Eukaryota,https://github.com/BeBOP-OBON/UNESCO_protocol_collection,,
163.8663,EE0495,EE0495,02/04/2023,Settlement beach,46.20605,9.400901,98,Aldabra Atoll,12,12,⋯,25.1,ml,asv.16961,1,EE0495_asv.16961,sequence reads,Eukaryota,https://github.com/BeBOP-OBON/UNESCO_protocol_collection,,
262.3,EE0495,EE0495,02/04/2023,Settlement beach,46.20605,9.400901,98,Aldabra Atoll,12,12,⋯,25.1,ml,asv.16962,1,EE0495_asv.16962,sequence reads,Mantoniella_squamata,https://github.com/BeBOP-OBON/UNESCO_protocol_collection,Mantoniella squamata,urn:lsid:marinespecies.org:taxname:134563


In [15]:
non_matches <- occurrence %>%
    filter(is.na(scientificNameID)) %>%
    group_by(verbatimIdentification) %>%
    summarize(n = n()) %>%
    arrange(desc(n))

write.table(non_matches, file = file.path(output_dir, "nonmatches.txt"), sep = "\t", row.names = FALSE, na = "", quote = FALSE)

non_matches

verbatimIdentification,n
<chr>,<int>
Eukaryota,8664
undef_Eukaryota,447
,283
undef_Oomycota,30
Navicula,9
⋮,⋮
Lobophora_brown_algae,1
Nitzschia_diatoms,1
Synchaetomella_acerina,1
undef_Gastropoda,1


In [16]:
occurrence <- occurrence %>%
    mutate(
        scientificName = case_when(verbatimIdentification %in% c("Eukaryota", "undef_Eukaryota", "") ~ "Incertae sedis", .default = scientificName),
        scientificNameID = case_when(verbatimIdentification %in% c("Eukaryota", "undef_Eukaryota", "") ~ "urn:lsid:marinespecies.org:taxname:12", .default = scientificNameID)
    )

In [17]:
occurrence %>%
    filter(is.na(scientificNameID)) %>%
    group_by(verbatimIdentification) %>%
    summarize(n = n()) %>%
    arrange(desc(n))

verbatimIdentification,n
<chr>,<int>
undef_Oomycota,30
Navicula,9
undef_Bacteria_bacteria,7
Clathria_genus,5
Nitzschia_sp._BOLD:AAO7110,4
⋮,⋮
Lobophora_brown_algae,1
Nitzschia_diatoms,1
Synchaetomella_acerina,1
undef_Gastropoda,1


In [18]:

occurrence

eventID,materialSampleID,eventDate,locality,decimalLongitude,decimalLatitude,coordinateUncertaintyInMeters,higherGeography,minimumDepthInMeters,maximumDepthInMeters,⋯,temperature,sampleSizeUnit,asv,organismQuantity,occurrenceID,organismQuantityType,verbatimIdentification,samplingProtocol,scientificName,scientificNameID
<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,⋯,<dbl>,<chr>,<chr>,<int>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
EE0493,EE0493,24/04/2023,Ile esprit,46.22536,9.42518,20,Aldabra Atoll,10,10,⋯,26.3,ml,asv.2,14,EE0493_asv.2,sequence reads,Clausocalanus_furcatus,https://github.com/BeBOP-OBON/UNESCO_protocol_collection,Clausocalanus furcatus,urn:lsid:marinespecies.org:taxname:104503
EE0493,EE0493,24/04/2023,Ile esprit,46.22536,9.42518,20,Aldabra Atoll,10,10,⋯,26.3,ml,asv.5,40587,EE0493_asv.5,sequence reads,Eukaryota,https://github.com/BeBOP-OBON/UNESCO_protocol_collection,Incertae sedis,urn:lsid:marinespecies.org:taxname:12
EE0493,EE0493,24/04/2023,Ile esprit,46.22536,9.42518,20,Aldabra Atoll,10,10,⋯,26.3,ml,asv.6,7,EE0493_asv.6,sequence reads,Farranula_gibbula,https://github.com/BeBOP-OBON/UNESCO_protocol_collection,Farranula gibbula,urn:lsid:marinespecies.org:taxname:346477
EE0493,EE0493,24/04/2023,Ile esprit,46.22536,9.42518,20,Aldabra Atoll,10,10,⋯,26.3,ml,asv.7,29367,EE0493_asv.7,sequence reads,Eukaryota,https://github.com/BeBOP-OBON/UNESCO_protocol_collection,Incertae sedis,urn:lsid:marinespecies.org:taxname:12
EE0493,EE0493,24/04/2023,Ile esprit,46.22536,9.42518,20,Aldabra Atoll,10,10,⋯,26.3,ml,asv.8,72378,EE0493_asv.8,sequence reads,Metazoa,https://github.com/BeBOP-OBON/UNESCO_protocol_collection,Metazoa,urn:lsid:marinespecies.org:taxname:1486573
⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋱,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮
EE0495,EE0495,02/04/2023,Settlement beach,46.20605,9.400901,98,Aldabra Atoll,12,12,⋯,25.1,ml,asv.16949,1,EE0495_asv.16949,sequence reads,Metazoa,https://github.com/BeBOP-OBON/UNESCO_protocol_collection,Metazoa,urn:lsid:marinespecies.org:taxname:1486573
EE0495,EE0495,02/04/2023,Settlement beach,46.20605,9.400901,98,Aldabra Atoll,12,12,⋯,25.1,ml,asv.16958,1,EE0495_asv.16958,sequence reads,Eukaryota,https://github.com/BeBOP-OBON/UNESCO_protocol_collection,Incertae sedis,urn:lsid:marinespecies.org:taxname:12
EE0495,EE0495,02/04/2023,Settlement beach,46.20605,9.400901,98,Aldabra Atoll,12,12,⋯,25.1,ml,asv.16961,1,EE0495_asv.16961,sequence reads,Eukaryota,https://github.com/BeBOP-OBON/UNESCO_protocol_collection,Incertae sedis,urn:lsid:marinespecies.org:taxname:12
EE0495,EE0495,02/04/2023,Settlement beach,46.20605,9.400901,98,Aldabra Atoll,12,12,⋯,25.1,ml,asv.16962,1,EE0495_asv.16962,sequence reads,Mantoniella_squamata,https://github.com/BeBOP-OBON/UNESCO_protocol_collection,Mantoniella squamata,urn:lsid:marinespecies.org:taxname:134563


In [19]:
library(leaflet)

stations <- occurrence %>%
    distinct(locality, decimalLongitude, decimalLatitude)

stations

leaflet() %>%
    addTiles() %>%
    addMarkers(lng = decimalLongitude,lat = decimalLatitude, popup = stations$locality)

locality,decimalLongitude,decimalLatitude
<chr>,<dbl>,<dbl>
Ile esprit,46.22536,9.42518
Settlement beach,46.20605,9.400901


ERROR: Error in resolveFormula(lng, data): object 'decimalLongitude' not found


In [None]:
occurrence <- occurrence %>%
    mutate(decimalLatitude = -decimalLatitude)

stations <- occurrence %>%
    distinct(locality, decimalLongitude, decimalLatitude)
stations

leaflet() %>%
    addTiles() %>%
    addMarkers(lng = decimalLongitude,lat = decimalLatitude, popup = stations$locality)
     

In [None]:
obistools::check_eventdate(occurrence)

In [None]:
library(lubridate)

occurrence <- occurrence %>%
    mutate(eventDate = format_ISO8601(parse_date_time(eventDate, "%d/%m/%Y"), precision = "ymd", usetz = FALSE))

unique(occurrence$eventDate)
     

In [None]:
head(occurrence)

In [None]:
obistools::check_fields(occurrence)

In [None]:
occurrence <- occurrence %>%
    mutate(
        occurrenceStatus = "present",
        basisOfRecord = "MaterialSample"
    )

In [None]:
mof_reads <- occurrence %>%
    select(occurrenceID, measurementValue = organismQuantity) %>%
    mutate(
        measurementType = "sequence reads"
    )

mof_samplesize <- occurrence %>%
    select(occurrenceID, measurementValue = sampleSizeValue, measurementUnit = sampleSizeUnit) %>%
    mutate(
        measurementType = "sample size",
        measurementTypeID = "http://vocab.nerc.ac.uk/collection/P01/current/VOLWBSMP/",
        measurementUnit = "ml",
        measurementUnitID = "http://vocab.nerc.ac.uk/collection/P06/current/VVML/"
    )

mof_dna <- occurrence %>%
    select(occurrenceID, measurementValue = dna) %>%
    mutate(
        measurementType = "DNA concentration",
        measurementTypeID = "http://vocab.nerc.ac.uk/collection/P01/current/A260DNAX/",
        measurementUnit = "ng/μl",
        measurementUnitID = "http://vocab.nerc.ac.uk/collection/P06/current/UNUL/"
    )

mof_temperature <- occurrence %>%
    select(occurrenceID, measurementValue = temperature) %>%
    mutate(
        measurementType = "seawater temperature",
        measurementTypeID = "http://vocab.nerc.ac.uk/collection/P01/current/TEMPPR01/",
        measurementUnit = "degrees Celsius",
        measurementUnitID = "http://vocab.nerc.ac.uk/collection/P06/current/UPAA/"
    )

mof <- bind_rows(mof_reads, mof_samplesize, mof_dna)
mof
    

In [None]:
library(Biostrings)

fasta_file <- readDNAStringSet("../dataset/sequences.fasta")
fasta <- data.frame(asv = names(fasta_file), DNA_sequence = paste(fasta_file))
fasta

In [None]:
dna <- occurrence %>%
    select(occurrenceID, asv, concentration = dna) %>%
    left_join(fasta, by = "asv")

dna

In [None]:
cat(paste0(readLines("../dataset/metadata.txt"), collapse = "\n"))

In [None]:
dna <- dna %>%
    mutate(
        concentrationUnit = "ng/μl",
        lib_layout = "paired",
        target_gene = "COI",
        pcr_primers = "FWD:GGWACWGGWTGAACWGTWTAYCCYCC;REV:TANACYTCNGGRTGNCCRAARAAYCA",
        seq_meth = "Illumina NovaSeq6000",
        ref_db = "https://github.com/iobis/edna-reference-databases",
        pcr_primer_forward = "GGWACWGGWTGAACWGTWTAYCCYCC",
        pcr_primer_reverse = "TANACYTCNGGRTGNCCRAARAAYCA",
        pcr_primer_name_forward = "mlCOIintF",
        pcr_primer_name_reverse = "dgHCO2198",
        pcr_primer_reference = "doi:10.1186/1742-9994-10-34"
    ) %>%
    select(-asv)

dna

In [None]:
occurrence <- occurrence %>%
    select(-asv, -dna, -temperature)

write.table(occurrence, file = file.path(output_dir, "occurrence.txt"), sep = "\t", row.names = FALSE, na = "", quote = FALSE)
write.table(mof, file = file.path(output_dir, "measurementorfact.txt"), sep = "\t", row.names = FALSE, na = "", quote = FALSE)
write.table(dna, file = file.path(output_dir, "dnaderiveddata.txt"), sep = "\t", row.names = FALSE, na = "", quote = FALSE)
     

In [None]:
library(dwcawriter)

archive <- list(
    eml = '
        
        Dummy Dataset
        
    ',
    core = list(
        name = "occurrence",
        type = "https://rs.gbif.org/core/dwc_occurrence_2022-02-02.xml",
        index = which(names(occurrence) == "occurrenceID"),
        data = occurrence
    ),
    extensions = list(
        list(
            name = "measurementorfact",
            type = "https://rs.gbif.org/extension/obis/extended_measurement_or_fact_2023-08-28.xml",
            index = which(names(mof) == "occurrenceID"),
            data = mof
        ),
        list(
            name = "dnaderiveddata",
            type = "https://rs.gbif.org/extension/gbif/1.0/dna_derived_data_2022-02-23.xml",
            index = which(names(dna) == "occurrenceID"),
            data = dna
        )
    )
)

write_dwca(archive, file.path(output_dir, "archive.zip"))