-
Notifications
You must be signed in to change notification settings - Fork 12
/
Search_species.R
99 lines (90 loc) · 4.85 KB
/
Search_species.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#' Search species
#'
#' Match taxonomic inputs to a given row of \code{ParentChild_gz} or its closest ancestor
#'
#' This function attempts to do a smart match to elements of \code{ParentChild_gz}. It sweeps from Order to Species
#' and ignores any taxonomic input listed as \code{"predictive"} until it finds something else. It then appends
#' \code{"predictive"} to any lower taxonomic level that is missing, and checks whether this specification yields a single,
#' unique taxon. If it does, it then returns the row number and potentially any ancestors (higher taxonomic levels)
#'
#' @param Class Character input for taxonomic class
#' @param Order Character input for taxonomic class
#' @param Family Character input for taxonomic class
#' @param Genus Character input for taxonomic class
#' @param Species Character input for taxonomic class
#' @param add_ancestors Boolean whether to add ancestors for matching species or not
#' @param ParentChild_gz vector providing index of parent-taxon for every child-taxa
#'
#' @return integer of row numbers of \code{ParentChild_gz} matching \code{genus_species}
#' @export
Search_species <-
function( Class = "predictive",
Order = "predictive",
Family = "predictive",
Genus = "predictive",
Species = "predictive",
add_ancestors = TRUE,
Database = FishLife::FishBase_and_RAM,
ParentChild_gz = Database$ParentChild_gz ){
# Match full taxonomy from fishbase
Match = 1:nrow(rfishbase::fishbase)
if( Class!="predictive" ) Match = Match[ which(tolower(rfishbase::fishbase$Class[Match])==tolower(Class)) ]
if( Order!="predictive" ) Match = Match[ which(tolower(rfishbase::fishbase$Order[Match])==tolower(Order)) ]
if( Family!="predictive" ) Match = Match[ which(tolower(rfishbase::fishbase$Family[Match])==tolower(Family)) ]
if( Genus!="predictive" ) Match = Match[ which(tolower(rfishbase::fishbase$Genus[Match])==tolower(Genus)) ]
if( Species!="predictive" ) Match = Match[ which(tolower(rfishbase::fishbase$Species[Match])==tolower(Species)) ]
if( length(Match)==0 ) stop( paste("Inputs not found in FishBase, please check spelling of",tolower(Class),tolower(Order),tolower(Family),tolower(Genus),tolower(Species)) )
# add missing taxonomic levels from FishBase if uniquely defined (and throw error if not)
full_taxonomy = c(Class, Order, Family, Genus, Species)
if( !all(c(Species)=="predictive") ){
if( length(unique(rfishbase::fishbase[Match,'Species']))!=1) stop("inputs are not unique")
if( length(unique(rfishbase::fishbase[Match,'Species']))==1) full_taxonomy[5] = unique(rfishbase::fishbase[Match,'Species'])[1]
}
if( !all(c(Species,Genus)=="predictive") ){
if( length(unique(rfishbase::fishbase[Match,'Genus']))!=1) stop("inputs are not unique")
if( length(unique(rfishbase::fishbase[Match,'Genus']))==1) full_taxonomy[4] = unique(rfishbase::fishbase[Match,'Genus'])[1]
}
if( !all(c(Species,Genus,Family)=="predictive") ){
if( length(unique(rfishbase::fishbase[Match,'Family']))!=1) stop("inputs are not unique")
if( length(unique(rfishbase::fishbase[Match,'Family']))==1) full_taxonomy[3] = unique(rfishbase::fishbase[Match,'Family'])[1]
}
if( !all(c(Species,Genus,Family,Order)=="predictive") ){
if( length(unique(rfishbase::fishbase[Match,'Order']))!=1) stop("inputs are not unique")
if( length(unique(rfishbase::fishbase[Match,'Order']))==1) full_taxonomy[2] = unique(rfishbase::fishbase[Match,'Order'])[1]
}
if( !all(c(Species,Genus,Family,Order,Class)=="predictive") ){
if( length(unique(rfishbase::fishbase[Match,'Class']))!=1) stop("inputs are not unique")
if( length(unique(rfishbase::fishbase[Match,'Class']))==1) full_taxonomy[1] = unique(rfishbase::fishbase[Match,'Class'])[1]
}
match_taxonomy = full_taxonomy
# Match in database
Count = 1
Group = NA
while( is.na(Group) ){
Group = match( paste(tolower(match_taxonomy),collapse="_"), tolower(ParentChild_gz[,'ChildName']) )
if( is.na(Group) ){
match_taxonomy[length(match_taxonomy)-Count+1] = "predictive"
Count = Count+1
}
}
message( "Closest match: ", as.character(ParentChild_gz[Group,'ChildName']) )
# Pick out ancestors
if( add_ancestors==TRUE ){
Group = Find_ancestors(child_num=Group, ParentChild_gz=ParentChild_gz)
}
# Function to add predictive to taxon name
Add_predictive = function( char_vec ){
return_vec = char_vec
for(i in 1:length(return_vec)){
vec = strsplit(as.character(return_vec[i]),"_")[[1]]
return_vec[i] = paste( c(vec,rep("predictive",5-length(vec))), collapse="_")
}
return(return_vec)
}
match_taxonomy = unique(as.character(Add_predictive(ParentChild_gz[Group,'ChildName'])))
# Find new matches
GroupNum = match(match_taxonomy,ParentChild_gz[,'ChildName'])
# Return match
Return = list( "GroupNum"=GroupNum, "match_taxonomy"=match_taxonomy )
return( Return )
}