# Preparing morphological measurments for comparative analyses

## Cleaning and loading data

### Loading packages

In [1]:
library("geomorph")
library("stringr")

Le chargement a nécessité le package : RRPP

Le chargement a nécessité le package : rgl

This build of rgl does not include OpenGL functions.  Use
 rglwidget() to display results, e.g. via options(rgl.printRglwidget = TRUE).

Le chargement a nécessité le package : Matrix



### Loading data

#### Landmarks file

In [2]:
landmarks <- readland.tps("data/HYDRO_LM_all_withNAs.tps",
                          specID = "imageID",
                          negNA = FALSE,
                          readcurves = FALSE,
                          warnmsg = TRUE
)


No curves detected; all points appear to be fixed landmarks.


“Not all specimens have scale adjustment (perhaps because they are already scaled); 
no rescaling will be performed in these cases
”


#### Verterbrae count file

In [3]:
Vert_Count <- read.csv("data/Vertebral_Count.csv")
Vert_Count$Short_Code <- sapply(strsplit(Vert_Count$Full_Code, ""), function(x) paste(x[1:7], collapse = ""))

#### Ecological data file

In [4]:
data_eco<-read.table("data/Cat_Var.csv", header = TRUE)

#### Loading vertebral count

In [5]:
vertnumb <- read.csv("data/Caudal_Count_All.csv", header = FALSE, row.names = 1, sep = ",")
rnames <- rownames(vertnumb)
colnames(vertnumb) <- "Vert_numb"

### Cleaning data

#### Remove missidentify specimen (Protochromys fellowsi)

In [6]:
landmarks <- landmarks[,,-74]

#### Substetting datasets

In [7]:
genus<-as.factor(substr(dimnames(landmarks)[[3]], 1,3))
species <- as.factor(substr(dimnames(landmarks)[[3]],1,7))
locomotion<-as.factor(substr(dimnames(landmarks)[[3]], 8,10))
climbing<-as.factor(substr(dimnames(landmarks)[[3]], 11,13))
prehensility_TER<-as.factor(substr(dimnames(landmarks)[[3]], 14,15))
prehensility_BIN<-as.factor(substr(dimnames(landmarks)[[3]], 16,16))
prehensility_POST<-as.factor(substr(dimnames(landmarks)[[3]], 17,18))
tribe<-as.factor(substr(dimnames(landmarks)[[3]], 19,21))
coll<-as.factor(substr(dimnames(landmarks)[[3]], 22,25))
sex<-as.factor(substr(dimnames(landmarks)[[3]], 26,27))

#### Attributing species names to landmark table

In [8]:
dimnames(landmarks)[[3]]<-substr(dimnames(landmarks)[[3]],1,7)
namesp<-dimnames(landmarks)[[3]]

#### Linear extraction from landmarks

In [9]:
vec<-c("FMRMID")
for(i in 1:46){
       if(i < 10){
            vec<-c(vec,c(paste("V0", i, "TPW", sep = ""), paste("V0", i, "MDW", sep = ""), paste("V0", i, "CTL", sep = ""), paste("V0", i, "CTH", sep = "")))
       }
        else{
            vec<-c(vec,c(paste("V", i, "TPW", sep = ""), paste("V", i, "MDW", sep = ""), paste("V", i, "CTL", sep = ""), paste("V", i, "CTH", sep = "")))
        }
}

lmks <- matrix(c(1:370), ncol=2, byrow=TRUE, 
               dimnames = list(vec,c("start", "end")))


#### Inter-landmark distance computing

In [10]:
lineardists <- interlmkdist(landmarks, lmks)
write.csv(lineardists, "data/Linear_Measurements_all_with_NAs.csv")

#### Removing outliers

In [11]:
lineardists_out <- lineardists
lineardists_out[8,130:137] <- NA #remove 2 last vertebrae from CHIglir BOR014
lineardists_out[9,126:129] <- NA #remove last vertebrae from CHImajo
lineardists_out[40,170:173] <- NA #remove last vertebrae from MALroth
lineardists_out[48,130:133] <- NA #remove last vertebrae from MICspce

#### Preparing subsets per vertebrae type

In [12]:
lineardists_out <- as.data.frame(lineardists_out)

#### Removing and isolating femur measure (size proxy)

In [13]:
sizef <- lineardists_out[,"FMRMID"]
sizef <- as.matrix(sizef)
rownames(sizef) <- c(namesp)
colnames(sizef) <- "Proxy_size_raw"
TBLsizef <- as.matrix(sizef)
lineardists_out <- lineardists_out[,-1]

#### Computing other size proxy (geometric mean of other measurements)

In [14]:
size_prox <- apply(lineardists_out, 1, function(x) prod(x, na.rm = T)^(1/(length(x)-sum(is.na(x)))))
size_prox <- as.matrix(size_prox)
rownames(size_prox) <- c(namesp)
colnames(size_prox) <- "Proxy_size_GM"

#### Extracting specific measurments

In [15]:
tableTPW <- lineardists_out[,seq(1, 184, 4)]
tableMDW <- lineardists_out[,seq(2, 184, 4)]
tableCTL <- lineardists_out[,seq(3, 184, 4)]
tableCTH <- lineardists_out[,seq(4, 184, 4)]

#### Tail length proxy

In [16]:
TailLength <- rowSums(tableCTL, na.rm = TRUE)
TBLtaillength <- as.matrix(TailLength)

#### Attributing the longest vertebrae number for each specimen

In [17]:
LongestVertebra <- apply(tableCTL,1,function(row) colnames(tableCTL)[which.max(row)])
LongestVertebra <-sub("V", "" , LongestVertebra)
LongestVertebra <-sub("CTL", "" , LongestVertebra)
tableLV <- as.matrix(LongestVertebra)
tableLV <- as.data.frame(tableLV)
tableLV[,1]<-as.numeric(tableLV[,1])
tableLV <- as.matrix(tableLV)
rownames(tableLV) <- c(namesp)
write.csv(tableLV, "data/Longest_Vertebra_All.csv")

## Computing metrics

### Robusticity index (RI)

#### Robusticity index for all specimens

In [18]:
RI <- ((tableMDW)/(tableCTL))*100

#### Mean robusticity index per species (multiple specimens)

In [19]:
RImean <- rowMeans(RI, na.rm = TRUE)

### Relative expansion of the transverse process index (TPEI)

#### Transverse process index for all specimens

In [20]:
TPEI <- ((tableTPW)/(tableCTL))*100

#### Mean transverse process index per species (multiple specimens)

In [21]:
TPEImean <- rowMeans(TPEI, na.rm = TRUE)

## Per vertebral region species analyses

In [22]:
lineardists_out$TV <- Vert_Count$Transition_Vertebra
lineardists_out$LV <- Vert_Count$Longest_Vertebra

### Subsetting per region

#### Proximal

In [23]:
proximal <- apply(lineardists_out, 1, function(x){
  limit_prox <- as.numeric(x["TV"])
  vert_index <- sapply(strsplit(names(x), ""), function(x) paste(x[2:3], collapse = ""))
  vert_index <- as.numeric(vert_index[-c(length(vert_index)-1, length(vert_index))])
  limit_prox <- which(vert_index == limit_prox)
  limit_prox <- limit_prox[length(limit_prox)]
  proximal <- x[1:limit_prox]
  names(proximal) <- NULL
  return(proximal)
})

#### Transitional

In [24]:
transitional <- apply(lineardists_out, 1, function(x){
  limit_prox <- as.numeric(x["TV"])
  vert_index <- sapply(strsplit(names(x), ""), function(x) paste(x[2:3], collapse = ""))
  vert_index <- as.numeric(vert_index[-c(length(vert_index)-1, length(vert_index))])
  limit_prox <- which(vert_index == limit_prox)
  limit_prox <- limit_prox[length(limit_prox)]
  limit_trans <- as.numeric(x["LV"])
  limit_trans <- which(vert_index == limit_trans)
  limit_trans <- limit_trans[length(limit_trans)]
  transitional <- x[(limit_prox+1):limit_trans]
  names(transitional) <- NULL
  return(transitional)
  
})

#### Distal

In [25]:
distal <- apply(lineardists_out, 1, function(x){
  vert_index <- sapply(strsplit(names(x), ""), function(x) paste(x[2:3], collapse = ""))
  vert_index <- as.numeric(vert_index[-c(length(vert_index)-1, length(vert_index))])
  limit_trans <- as.numeric(x["LV"])
  limit_trans <- which(vert_index == limit_trans)
  limit_trans <- limit_trans[length(limit_trans)]
  distal <- x[(limit_trans+1):(length(x)-2)]
  names(distal) <- NULL
  return(distal)
})

### Assemble table in a list and fill gaps with NA

In [26]:
regions <- list(proximal = proximal,
                transitional = transitional,
                distal = distal)
regions_mat <- lapply(regions, function(x){
  max_length <- max(sapply(x, length))
  NA_rep <- max_length - sapply(x, length)
  x <- lapply(x, function(y) c(y, rep(NA, max_length - length(y))))
  x <- do.call(rbind, x)
  return(x)
})

### Create a table with last 25% from the distal region per specimen ("Last Quarter")

#### Isolating distal data of interest

In [27]:
distL <- rowSums(!is.na(regions_mat$distal)) 
distLvrt <- (distL/4)
distLQ <- (25*distLvrt)/100
distLQ <- ceiling(distLQ)
distLQm <- distLQ*4
distLQv <- as.vector(distLQm)
distalDF <- as.data.frame(regions_mat$distal)

#### Creating a function extracting the last 25% from the distal region

In [28]:
tail_func<-function(df, vec, species){
  max_tail<-max(vec)
  data_tail<-c(1:max_tail)
  for (i in 1: nrow(df)){
    vec_temp<-df[i,]
    vec_temp<-vec_temp[!is.na(vec_temp)]
    vec_temp<-rev(rev(vec_temp)[1:vec[i]])
    if (length(vec_temp) < max_tail){
      vec_temp<-c(vec_temp, rep(NA, (max_tail - length(vec_temp))))
    }
    data_tail<-cbind(data_tail, vec_temp)
  }
  data_tail<-t(data_tail)
  data_tail<-data_tail[-1,]
  rownames(data_tail)<-species
return(as.data.frame(data_tail))  
}

#### Apply the function to the dataset

In [29]:
LastQ <- tail_func(distalDF, distLQv, species)

### Per region measure extraction

##### Transverse process width

In [30]:
tableTPWreg <- lapply(regions_mat, function(x){
  x[, seq(1, ncol(x), by = 4)]
})

##### Mid-distal width

In [31]:
tableMDWreg <- lapply(regions_mat, function(x){
  x[, seq(1, ncol(x), by = 4)+1]
})

##### Centrum length

In [32]:
tableCTLreg <- lapply(regions_mat, function(x){
  x[, seq(1, ncol(x), by = 4)+2]
})

##### Centrum height

In [33]:
tableCTHreg <- lapply(regions_mat, function(x){
  x[, seq(1, ncol(x), by = 4)+3]
})

### Last Quarter measure extraction

##### Transverse process width

In [34]:
tableTPWq <- LastQ[,seq(1,40,by=4)]

##### Mid-distal width

In [35]:
tableMDWq <- LastQ[,seq(1,40,by=4)+1]

##### Centrum length

In [36]:
tableCTLq <- LastQ[,seq(1,40,by=4)+2]

##### Centrum height

In [37]:
tableCTHq <- LastQ[,seq(1,40,by=4)+3]

### Per vertebrae region RI

#### Proximal (All)

In [38]:
RIprox <- ((tableMDWreg$proximal)/(tableCTLreg$proximal))*100 

#### Proximal (Species)

In [39]:
RIproxmean <- rowMeans(RIprox, na.rm = TRUE)

#### Transitional (All)

In [40]:
RItran <- ((tableMDWreg$transitional)/(tableCTLreg$transitional))*100 

#### Transitional (Species)

In [41]:
RItranmean <- rowMeans(RItran, na.rm = TRUE)

#### Distal (All)

In [42]:
RIdist <- ((tableMDWreg$distal)/(tableCTLreg$distal))*100 

#### Distal (Species)

In [43]:
RIdistmean <- rowMeans(RIdist, na.rm = TRUE)

#### Last Quarter (All)

In [44]:
RIlq <- ((tableMDWq)/(tableCTLq))*100 

#### Last Quarter (Species)

In [45]:
RIlqmean <- rowMeans(RIlq, na.rm = TRUE)

### Per vertebrae region TPEI

#### Proximal (All)

In [46]:
TPEIprox <- ((tableTPWreg$proximal)/(tableCTLreg$proximal))*100

#### Proximal (Species)

In [47]:
TPEIproxmean <- rowMeans(TPEIprox, na.rm = TRUE)

#### Transitional (All)

In [48]:
TPEItran <- ((tableTPWreg$transitional)/(tableCTLreg$transitional))*100

#### Transitional (Species)

In [49]:
TPEItranmean <- rowMeans(TPEItran, na.rm = TRUE)

#### Distal (All)

In [50]:
TPEIdist <- ((tableTPWreg$distal)/(tableCTLreg$distal))*100

#### Distal (Species)

In [51]:
TPEIdistmean <- rowMeans(TPEIdist, na.rm = TRUE)

#### Last Quarter (All)

In [52]:
TPEIlq <- ((tableTPWq)/(tableCTLq))*100

#### Last Quarter (Species)

In [53]:
TPEIlqmean <- rowMeans(TPEIlq, na.rm = TRUE)

## Preparing synthetic output table

### Merging all measurments

In [54]:
table_synth <- cbind(vertnumb, TailLength, sizef, size_prox, RImean, RIproxmean, RItranmean, RIdistmean, RIlqmean, TPEImean, TPEIproxmean, TPEItranmean, TPEIdistmean, TPEIlqmean)

### Removing specimen not included in the phylogeny (Chiropodomys pusillus & Haeromys pusillus)

In [55]:
table_synth <- table_synth[!rownames(table_synth)==c("CHIpusi"), ]
table_synth <- table_synth[!rownames(table_synth)==c("HAEpusi"), ]

### Isolating Nomenclature code

In [56]:
data_names <- as.data.frame(as.matrix(str_split_fixed(rownames(table_synth), "[.]", n = 2)[,1]))
colnames(data_names)<-"Nomenclature"

### Merging with ecological data

In [57]:
Nomenclature <- str_split_fixed(string = rownames(table_synth), pattern = "[.]", 2)[,1]

table_synth <- aggregate(table_synth, list(Nomenclature), mean)

colnames(table_synth)[1]<-"Nomenclature"

In [58]:
data_eco<-merge(data_names, data_eco, by = "Nomenclature")
data_eco<-unique(data_eco)

In [59]:
table_synth <- merge(data_eco, table_synth, by = "Nomenclature")
rownames(table_synth)<-NULL

## Saving synthetic data

In [60]:
write.table(table_synth, "data/Complete_Table.tsv", sep="\t")