In [1]:
# Install preprequisites
# From CRAN
install.packages(c("remotes", "RstoxData", "data.table", "worms"))

# From GITHUB
remotes::install_github("Sea2Data/Rstox", ref = "develop")

Updating HTML index of packages in '.Library'

Making 'packages.html' ...
 done

Skipping install of 'Rstox' from a github remote, the SHA1 (e2246bf3) has not changed since last install.
  Use `force = TRUE` to force installation



In [2]:
# Load libraries
library(Rstox)
library(RstoxData)
library(data.table)

Rstox_1.11.1 
**********
If problems with Java Memory such as java.lang.OutOfMemoryError occurs, see ?setJavaMemory.
**********


Attaching package: ‘RstoxData’


The following object is masked from ‘package:Rstox’:

    readErsFile




In [3]:
#
# (1) Function for adding missing scientific names names
#
getScientificNames <- function(data) {
        # Source Edvin's script
        source("https://github.com/Sea2Data/cruisetools/raw/master/taxaAnnotation/annotateTaxa.R")

        # Get taxa taxaTable
        ## Get list of aphias
        aphias <- unlist(unique(data$catchsample[!is.na(aphia), "aphia"]))
        ## Make taxa table
        taxaTable <- makeTaxaTable(aphias)

        # We can merge with NMD biotic catchsample
        newcatchsample <- merge(data$catchsample[,-c("scientificname")], taxaTable[, c("AphiaID", "scientificname")], 
                                by.x="aphia", by.y="AphiaID", all.x = TRUE)

        # Now scientificname is properly populated
        data$catchsample <- newcatchsample
        return(data)
    }

In [4]:
#
# (2) Function for downloading all cruise of selection
#

getSurveySeriesData <- function(surveyName, combine_year = FALSE, combine_table = FALSE) {
    # Download cruise series as ReadBiotic only project into StoX directory
    modelBio <- list("ReadBioticXML")
    projects <- getNMDdata(cruise=surveyName, group="all", model=modelBio, abbrev=FALSE, subdir=TRUE, ow=TRUE)

    # Load all biotic files (projects should contains the full path to the files)
    files <- list.files(paste0(projects,"/input/biotic/"), full.names=TRUE)
    biotic <- ReadBiotic(files) # This will take a while to process all 80 biotic files!!!

    result <- biotic
    
    if (combine_year) {
        result <- list()
        # Merge using data.table's rbindlist
        for(i in seq_along(biotic)) {
          station.dt <- rbindlist(lapply(biotic, "[[", "fishstation"))
          catch.dt <- rbindlist(lapply(biotic, "[[", "catchsample"))
          individual.dt <- rbindlist(lapply(biotic, "[[", "individual"))
          age.dt <- rbindlist(lapply(biotic, "[[", "agedetermination"))
        }
        result$fishstation <- station.dt
        result$catchsample <- catch.dt
        result$individual <- individual.dt
        result$agedetermination <- age.dt
    }

    if (combine_year && combine_table) {
        # (if required) merge all data (all.x means include empty stations too)
        all.dt <- merge(station.dt, catch.dt, all.x = TRUE)
        all.dt <- merge(all.dt, individual.dt, by = intersect(names(all.dt), names(individual.dt)), all.x = TRUE)

        ## Age is a bit tricky to merge
        all.dt <- merge(all.dt, age.dt, by.x=c(intersect(names(all.dt), names(age.dt)), "preferredagereading"),
                        by.y= c(intersect(names(all.dt), names(age.dt)), "agedeterminationid"), all.x = TRUE)
        result <- all.dt
    }
    
    return(result)
}

In [5]:
# Start download data

# List all cruise series
CS <- getNMDinfo("cs", recursive=FALSE)
as.data.frame(CS)

CS
<chr>
Atlantic Ocean West of British Isles INT blue whiting spawning survey in spring
Barents Sea NOR demersal fish cruise in August-September
Barents Sea NOR demersel fish cruise in October-November
Barents Sea NOR-RUS 0-group cruise in autumn
Barents Sea NOR-RUS ecosystem cruise in autumn
North Sea International ecosystem cruise in Q2_Q3
North Sea International IBTS cruise in Q1
North Sea International IBTS cruise in Q2_Q3
North Sea International IBTS cruise in Q4
North Sea NOR mackerel cruise in summer


In [6]:
# Pick "Barents Sea NOR-RUS ecosystem cruise in autumn" cruise series (no. 5)
myCS <- CS[5]
getNMDinfo(c("cs", myCS))

Unnamed: 0_level_0,code,Cruise,ShipName,Year
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>
3,1,0087_2003_UFVZ_TSIVI,Tsivilsk,2003
6,2,2003110,G.O.Sars,2003
1,3,2003209,Johan Hjort,2003
4,4,2003703,Jan Mayen,2003
2,5,2003705,Jan Mayen,2003
5,6,0115_2003_UFFJ_SMOLE,Smolensk,2003
9,1,2004210,Johan Hjort,2004
7,2,2004702,Jan Mayen,2004
10,3,2004703,Jan Mayen,2004
8,4,0118_2004_UFFJ_SMOLE,Smolensk,2004


In [7]:
# Get all data from the survey, combine all cruises into hierarchical tables
surveyData <- getSurveySeriesData(myCS, combine_year = TRUE)

Downloading cruise series 'Barents Sea NOR-RUS ecosystem cruise in autumn' ...

Searching for files (164 runs):





Overwriting: /home/jovyan/workspace/stox/project/Barents Sea NOR-RUS ecosystem cruise in autumn_Alldata 


Downloading... (1 runs):





In [8]:
# Examine sample count of the survey
sampleCount <- lapply(surveyData, nrow)
t(as.data.frame(sampleCount))

0,1
fishstation,13917
catchsample,243012
individual,3606935
agedetermination,261539


In [9]:
# Check if we have missing scientific names (answer: yes)
unique(surveyData$catchsample$scientificname)

In [10]:
# Let's populate them
newSurveyData <- getScientificNames(surveyData)

Loading required package: httr

Loading required package: plyr



REQUESTING 817 ITEMS BY ID from World Register of Marine Species  (www.marinespecies.org), 13/01/2021 12:09:14 PM (CC-BY)
,126436,126441,126437,126444,126461,126439,151324,127137,127144,105865
,107649,233889,127254,274100,126758,126759,19494,126735,127255,126757
,127191,1135,126417,126433,127212,127193,127110,154675,127115,127073
,11723,126435,126505,127214,1128,127206,127235,127104,127205,106669
,127218,105863,127199,125589,234519,125566,126580,127103,127072,11760
,127113,125909,126752,135220,127136,127070,138481,106903,126088,127111
,127112,106782,123082,125575,126102,140692,125517,125912,127203,101800
,127143,127190,127139,126154,127207,126160,254529,126150,139178,110708
,123080,123586,100653,101,105,127217,105410,107315,110690,123084
,126104,127211,51,1806,127198,127215,127098,1248,140223,1302
,107013,123083,558,123081,883,125732,1066,159523,110711,138281
,138036,153097,105894,127119,126352,126555,126450,126627,107012,107566
,124612,100694,118283,119036,107567,107563,126715,123258,

“Some provided Aphia IDs encode non-accepted scientific names.”


In [11]:
# Re-check the scientific names
as.data.frame(unique(newSurveyData$catchsample$scientificname))

unique(newSurveyData$catchsample$scientificname)
<chr>
""
Actiniidae
Actinostolidae
Sagartiidae
Epizoanthidae
Actinia
Urticina
Hormathia
Cerianthus
Epizoanthus
