# Phylogenetic signal of cell size data

In [1]:
library(ape)
library(phytools)
library(caper)
library(geiger)

Loading required package: maps

Loading required package: MASS

Loading required package: mvtnorm



### Tree and data

Read phylogenetic tree.

In [2]:
tree <- read.tree("../phylogeny/place/fine_all.nwk")
tree


Phylogenetic tree with 5380 tips and 1961 internal nodes.

Tip labels:
  taxid71518, taxid83984, taxid2193, taxid83985, taxid71152, taxid2203, ...
Node labels:
  N1, N5, N18, N51, N79, N119, ...

Rooted; includes branch lengths.

Read data table.

In [3]:
data <- read.table("../phylogeny/place/fine_all.tsv", header=TRUE, sep="\t", quote="")
head(data, 3)

Unnamed: 0_level_0,taxid,length,width,volume,surface,shape,species,genus,family,order,⋯,rank,node,genome,gc,proteins,coding,rrnas,MILC,ENCprime,hash
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,⋯,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,taxid11,2.371708,1.0606602,1.783187,7.902917,rod-shaped,Cellulomonas gilvus,Cellulomonas,Cellulomonadaceae,Micrococcales,⋯,species,G000218545,3526441,73.81,3206,91.77278,2,-0.262005,0.10083562,1.15
2,taxid14,10.0,0.4898979,1.8541744,15.390598,rod-shaped,Dictyoglomus thermophilum,Dictyoglomus,Dictyoglomaceae,Dictyoglomales,⋯,species,G000020965,1959987,33.74,1890,93.77725,2,-0.0644294,0.03020484,1.13
3,taxid23,1.5,0.7,0.4874705,3.298672,rod-shaped,Shewanella colwelliana,Shewanella,Shewanellaceae,Alteromonadales,⋯,species,G000518705,4575622,45.39,4094,87.38314,0,-0.6533632,0.24898652,1.16


In [4]:
nrow(data)

Log-transform some metrics.

In [5]:
data[[paste("svratio")]] = (data[['volume']] / data[['surface']])

In [6]:
cols = c("length", "width", "volume", "surface", "genome", "proteins", "coding", "rrnas", "svratio")

In [7]:
for (col in cols) {
    data[[paste("log", col, sep="_")]] = log10(data[[col]])
}

In [8]:
head(data, 3)

Unnamed: 0_level_0,taxid,length,width,volume,surface,shape,species,genus,family,order,⋯,svratio,log_length,log_width,log_volume,log_surface,log_genome,log_proteins,log_coding,log_rrnas,log_svratio
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,taxid11,2.371708,1.0606602,1.783187,7.902917,rod-shaped,Cellulomonas gilvus,Cellulomonas,Cellulomonadaceae,Micrococcales,⋯,0.2256366,0.3750613,0.02557626,0.2511969,0.8977874,6.547337,3.505964,1.962714,0.30103,-0.6465905
2,taxid14,10.0,0.4898979,1.8541744,15.390598,rod-shaped,Dictyoglomus thermophilum,Dictyoglomus,Dictyoglomaceae,Dictyoglomales,⋯,0.1204745,1.0,-0.30989438,0.2681506,1.1872555,6.292253,3.276462,1.972098,0.30103,-0.9191049
3,taxid23,1.5,0.7,0.4874705,3.298672,rod-shaped,Shewanella colwelliana,Shewanella,Shewanellaceae,Alteromonadales,⋯,0.1477778,0.1760913,-0.15490196,-0.3120517,0.5183392,6.66045,3.612148,1.941428,-inf,-0.8303909


### Pagel's $\lambda, \sigma^2$ - geiger 

In [9]:
# Binarize tree - required for geiger package
tree2 <- multi2di(tree)

In [18]:
# Dataframe to save outputs
df_geiger <- data.frame(matrix(ncol=6, nrow=0))
colnames(df_geiger) <- c('lambda', 'pval0', 'pval1', 'sigsq',
                 'sigsq_unb', 'lnL')
df_geiger

lambda,pval0,pval1,sigsq,sigsq_unb,lnL
<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>


In [10]:
lambda_sigsq <- function(tree, x){
    # Fit model
    lambda2 <- fitContinuous(tree, x, model="lambda")
    # Reescale tree such that lambda = 0, i.e. star phylogeny
    t0 <- rescale(tree, 'lambda', 0)
    # Fit tree and trait to a BM model of evolution
    lambda_L0 <- fitContinuous(t0, x, model = 'BM')
    # Likelihood ratio test
    LLR0 <- -2 * (lambda_L0$opt$lnL - lambda2$opt$lnL)
    # Get a p-value from a chi-sq distribution
    pval0 <- pchisq(LLR0, df = 1, lower.tail = FALSE)
    
    # Test if lambda != 1
    lambda_L1 <- fitContinuous(tree, x, model = 'BM')
    # Likelihood ratio test
    LLR1 <- -2 * (lambda_L1$opt$lnL - lambda2$opt$lnL)
    # Get a p-value
    pval1 <- pchisq(LLR1, df = 1, lower.tail = FALSE)
    # sigsq
    sigsq <- lambda2$opt$sigsq
    # Get the unbiased estimator of sigsq
    sigsq_unb <- lambda2$opt$sigsq * length(tree$tip.label)/(length(tree$tip.label)-1)
    # Get the likelihood
    lnL <- lambda2$opt$lnL
    
    return(list(lambda = lambda2$opt$lambda, pval0 = pval0, 
                pval1 = pval1, sigsq = sigsq, sigsq_unb = sigsq_unb,
               lnL = lnL))
}

In [20]:
datum <- setNames(data$log_volume, data$taxid)

In [21]:
l_sq <- lambda_sigsq(tree2, datum)
l_sq

In [22]:
df_geiger[nrow(df_geiger) + 1,] <- c(l_sq$lambda, l_sq$pval0, l_sq$pval1, l_sq$sigsq,
                                    l_sq$sigsq_unb, l_sq$lnL)
rownames(df_geiger)[nrow(df_geiger)] <- "log_volume"
df_geiger

Unnamed: 0_level_0,lambda,pval0,pval1,sigsq,sigsq_unb,lnL
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
log_volume,0.8329569,0,5.925951000000001e-128,0.6851921,0.6853195,-3791.529


In [23]:
# Save x and y positions for likelihood calculation
lambda2 <- fitContinuous(tree2, datum, model = 'lambda')
# For lambda
xpos1 <- seq(0, 1, length.out = 50)
# For sigmasq
xpos2 <- seq(0, 3, length.out = 50)
ypos <- c()
for (i in 1:length(xpos1)){
    ypos[i] <- lambda2$lik(c(xpos1[i], xpos2[i]))
}

In [24]:
write.table(data.frame(x1 = xpos1, x2 = xpos2, y = ypos), "physig/log_volume.lambda-sigsq.plot", 
            sep="\t", col.names=FALSE, row.names=FALSE)

Calculate Pagel's _λ_ of log cell surface area.

In [31]:
# Lambda and sigsq with geiger
datum <- setNames(data$log_surface, data$taxid)
l_sq <- lambda_sigsq(tree2, datum)

In [32]:
df_geiger[nrow(df_geiger) + 1,] <- c(l_sq$lambda, l_sq$pval0, l_sq$pval1, l_sq$sigsq,
                                    l_sq$sigsq_unb, l_sq$lnL)
rownames(df_geiger)[nrow(df_geiger)] <- "log_surface"
df_geiger

Unnamed: 0_level_0,lambda,pval0,pval1,sigsq,sigsq_unb,lnL
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
log_volume,0.8329569,0,5.925951000000001e-128,0.6851921,0.6853195,-3791.529
log_surface,0.8484278,0,3.590505e-118,0.3417221,0.3417857,-1815.575


In [33]:
# Save x and y positions for likelihood calculation
lambda2 <- fitContinuous(tree2, datum, model = 'lambda')
# For lambda
xpos1 <- seq(0, 1, length.out = 50)
# For sigmasq
xpos2 <- seq(0, 2, length.out = 50)
ypos <- c()
for (i in 1:length(xpos1)){
    ypos[i] <- lambda2$lik(c(xpos1[i], xpos2[i]))
}

In [34]:
write.table(data.frame(x1 = xpos1, x2 = xpos2, y = ypos), "physig/log_surface.lambda-sigsq.plot", 
            sep="\t", col.names=FALSE, row.names=FALSE)

Calculate Pagel's λ of volume-to-surface ratio.

In [41]:
# Lambda and sigsq with geiger
datum <- setNames(data$svratio, data$taxid)
l_sq <- lambda_sigsq(tree2, datum)

“
Parameter estimates appear at bounds:
	lambda”


In [42]:
df_geiger[nrow(df_geiger) + 1,] <- c(l_sq$lambda, l_sq$pval0, l_sq$pval1, l_sq$sigsq,
                                    l_sq$sigsq_unb, l_sq$lnL)
rownames(df_geiger)[nrow(df_geiger)] <- "svratio"
df_geiger

Unnamed: 0_level_0,lambda,pval0,pval1,sigsq,sigsq_unb,lnL
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
log_volume,0.8329569,0,5.925951000000001e-128,0.6851921,0.6853195,-3791.529
log_surface,0.8484278,0,3.590505e-118,0.3417221,0.3417857,-1815.575
svratio,1.0,0,1.0,0.8230996,0.8232526,-2669.255


In [43]:
# Save x and y positions for likelihood calculation
lambda2 <- fitContinuous(tree2, datum, model = 'lambda')
# For lambda
xpos1 <- seq(0, 1, length.out = 50)
# For sigmasq
xpos2 <- seq(0, 2, length.out = 50)
ypos <- c()
for (i in 1:length(xpos1)){
    ypos[i] <- lambda2$lik(c(xpos1[i], xpos2[i]))
}

“
Parameter estimates appear at bounds:
	lambda”


In [44]:
write.table(data.frame(x1 = xpos1, x2 = xpos2, y = ypos), "physig/svratio.lambda-sigsq.plot", 
            sep="\t", col.names=FALSE, row.names=FALSE)

Calculate Pagel's _λ_ of all metrics.

In [45]:
cols = c("volume", "surface", "genome", "gc", "proteins", "coding", "rrnas", "log_svratio",
         "log_genome", "log_proteins", "MILC", "ENCprime", "log_coding", "log_rrnas", "hash")

In [46]:
for (col in cols) {
    datum <- setNames(data[[col]], data$taxid)
    #lambda <- phylosig(tree, datum, method="lambda", test=TRUE)
    #df[nrow(df) + 1,] <- c(lambda$lambda, lambda$logL, lambda$logL0, lambda$P)
    #rownames(df)[nrow(df)] <- col
    # For lambda and sigmasq - geiger
    l_sq <- lambda_sigsq(tree2, datum)
    df_geiger[nrow(df_geiger) + 1,] <- c(l_sq$lambda, l_sq$pval0, l_sq$pval1, l_sq$sigsq,
                                    l_sq$sigsq_unb, l_sq$lnL)
    rownames(df_geiger)[nrow(df_geiger)] <- col
}

“
Parameter estimates appear at bounds:
	lambda”
“
Parameter estimates appear at bounds:
	lambda”
“
Parameter estimates appear at bounds:
	lambda”
“
Parameter estimates appear at bounds:
	lambda”
“
Parameter estimates appear at bounds:
	lambda”


In [49]:
df_geiger

Unnamed: 0_level_0,lambda,pval0,pval1,sigsq,sigsq_unb,lnL
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
log_volume,0.8329569,0.0,5.925951000000001e-128,0.6851921,0.6853195,-3791.529
log_surface,0.8484278,0.0,3.590505e-118,0.3417221,0.3417857,-1815.575
svratio,1.0,0.0,1.0,0.8230996,0.8232526,-2669.255
volume,1.0,0.0,1.0,27531890000.0,27537010000.0,-67856.77
surface,1.0,0.0,1.0,587220400.0,587329600.0,-57506.46
genome,0.9999398,0.0,0.9278988,2529165000000.0,2529635000000.0,-80017.51
gc,1.0,0.0,1.0,65.32254,65.33468,-14435.36
proteins,0.9991343,0.0,0.5912825,2004941.0,2005314.0,-42242.43
coding,0.9579968,0.0,4.405728e-65,10.57644,10.5784,-10098.35
rrnas,0.7578361,0.0,0.0,4.209784,4.210567,-9120.741


In [51]:
write.table(df_geiger, "physig/lambda-sigsq.tsv", sep="\t", quote=FALSE)

**Calculate Pagel's $\lambda$ for data points placed at the tips of the WoL reference phylogeny**

Load tree

In [9]:
tree_wol <- read.tree("../phylogeny/place/fine_all_wol.nwk")
tree_wol


Phylogenetic tree with 1363 tips and 1332 internal nodes.

Tip labels:
  taxid2201, taxid2200, taxid263906, taxid882104, taxid2205, taxid54120, ...
Node labels:
  N1, N5, N18, N51, N79, N119, ...

Rooted; includes branch lengths.

Load data

In [10]:
data_wol <- read.table("../phylogeny/place/fine_all_wol.tsv", header=TRUE, sep="\t", quote="")
dim(data_wol)

In [11]:
data_wol[[paste("svratio")]] = (data_wol[['volume']] / data_wol[['surface']])

Binarize tree

In [14]:
tree2_wol <- multi2di(tree_wol)

In [15]:
tree2 <- multi2di(tree_wol)

In [60]:
# Dataframe to save outputs
df_geiger <- data.frame(matrix(ncol=6, nrow=0))
colnames(df_geiger) <- c('lambda', 'pval0', 'pval1', 'sigsq',
                 'sigsq_unb', 'lnL')
df_geiger

lambda,pval0,pval1,sigsq,sigsq_unb,lnL
<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>


In [61]:
datum <- setNames(data_wol$log_volume, data_wol$taxid)

In [62]:
l_sq <- lambda_sigsq(tree2, datum)

In [63]:
df_geiger[nrow(df_geiger) + 1,] <- c(l_sq$lambda, l_sq$pval0, l_sq$pval1, l_sq$sigsq,
                                    l_sq$sigsq_unb, l_sq$lnL)
rownames(df_geiger)[nrow(df_geiger)] <- "log_volume"

In [64]:
datum <- setNames(data_wol$log_surface, data_wol$taxid)
l_sq <- lambda_sigsq(tree2, datum)

In [65]:
df_geiger[nrow(df_geiger) + 1,] <- c(l_sq$lambda, l_sq$pval0, l_sq$pval1, l_sq$sigsq,
                                    l_sq$sigsq_unb, l_sq$lnL)
rownames(df_geiger)[nrow(df_geiger)] <- "log_surface"

In [66]:
datum <- setNames(data_wol$svratio, data_wol$taxid)
l_sq <- lambda_sigsq(tree2, datum)

In [None]:
df_geiger[nrow(df_geiger) + 1,] <- c(l_sq$lambda, l_sq$pval0, l_sq$pval1, l_sq$sigsq,
                                    l_sq$sigsq_unb, l_sq$lnL)
rownames(df_geiger)[nrow(df_geiger)] <- "svratio"

In [68]:
cols = c("volume", "surface", "genome", "gc", "proteins", "coding", "rrnas", "log_svratio",
         "log_genome", "log_proteins", "ENCprime", "arc_coding", "arc_gc", "hash")

In [69]:
for (col in cols) {
    datum <- setNames(data_wol[[col]], data_wol$taxid)
    l_sq <- lambda_sigsq(tree2, datum)
    df_geiger[nrow(df_geiger) + 1,] <- c(l_sq$lambda, l_sq$pval0, l_sq$pval1, l_sq$sigsq,
                                    l_sq$sigsq_unb, l_sq$lnL)
    rownames(df_geiger)[nrow(df_geiger)] <- col
}

“
Parameter estimates appear at bounds:
	lambda”
“
Parameter estimates appear at bounds:
	lambda”
“
Parameter estimates appear at bounds:
	lambda”


In [70]:
df_geiger

Unnamed: 0_level_0,lambda,pval0,pval1,sigsq,sigsq_unb,lnL
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
log_volume,0.725766,5.344874e-121,7.723809000000001e-75,0.5926582,0.5930934,-1057.5678
log_surface,0.7489596,4.733365e-112,1.461248e-71,0.298663,0.2988823,-561.9867
svratio,0.4639481,8.522829999999999e-89,1.113416e-10,0.05105509,0.05109258,372.2584
volume,0.02815038,0.01212248,1.603914e-91,195032.2,195175.4,-10182.1117
surface,1.0752350000000001e-163,1.0,1.493138e-45,8704.827,8711.218,-8068.9799
genome,0.9983199,1.167223e-286,0.3788883,4753077000000.0,4756567000000.0,-20734.2607
gc,1.0,0.0,1.0,128.0842,128.1782,-4139.889
proteins,0.9964982,7.374988e-263,0.4948841,3702380.0,3705099.0,-11157.754
coding,0.9420309,1.1540389999999998e-254,3.195706e-23,21.46298,21.47873,-3129.0243
rrnas,0.2139133,9.989505e-12,1.414415e-155,4.085458,4.088458,-2764.0221


In [71]:
write.table(df_geiger, "physig/lambda-sigsq_wol.tsv", sep="\t", quote=FALSE)

Blomberg's K for data placed at the tips of WoL

In [72]:
df_wol_k <- data.frame(matrix(ncol=2, nrow=0))
colnames(df_wol_k) <- c("K", "P")

In [73]:
cols = c('log_volume', 'log_surface', 'svratio',
         "volume", "surface", "genome", "gc", "proteins", "coding", "rrnas", "log_svratio",
         "log_genome", "log_proteins", "ENCprime", "arc_coding", "arc_gc", "hash")

In [74]:
for (col in cols) {
    set.seed(42)
    datum <- setNames(data_wol[[col]], data_wol$taxid)
    K <- phylosig(tree2, datum, method = "K", test = TRUE)
    df_wol_k[nrow(df_wol_k) + 1,] <- c(K$K, K$P)
    rownames(df_wol_k)[nrow(df_wol_k)] <- col
}

In [75]:
df_wol_k

Unnamed: 0_level_0,K,P
Unnamed: 0_level_1,<dbl>,<dbl>
log_volume,0.2627441,0.001
log_surface,0.2601296,0.001
svratio,0.2708281,0.003
volume,0.1448924,0.624
surface,0.1705959,0.48
genome,0.8849974,0.001
gc,1.6388989,0.001
proteins,0.8139452,0.001
coding,0.5619261,0.001
rrnas,0.1347479,0.369


In [76]:
df_wol_k

Unnamed: 0_level_0,K,P
Unnamed: 0_level_1,<dbl>,<dbl>
log_volume,0.2627441,0.001
log_surface,0.2601296,0.001
svratio,0.2708281,0.003
volume,0.1448924,0.624
surface,0.1705959,0.48
genome,0.8849974,0.001
gc,1.6388989,0.001
proteins,0.8139452,0.001
coding,0.5619261,0.001
rrnas,0.1347479,0.369


### Blomberg's _K_

In [77]:
df <- data.frame(matrix(ncol=2, nrow=0))
colnames(df) <- c("K", "P")

Log cell volume.

In [78]:
datum <- setNames(data$log_volume, data$taxid)

In [79]:
set.seed(42)
K <- phylosig(tree, datum, method="K", test=TRUE)
K


Phylogenetic signal K : 0.357735 
P-value (based on 1000 randomizations) : 0.001 


In [80]:
writeLines(capture.output(K), "physig/log_volume.K")

In [81]:
write.table(K$sim.K, "physig/log_volume.K.plot", col.names=FALSE, row.names=FALSE)

In [82]:
df[nrow(df) + 1,] <- c(K$K, K$P)
rownames(df)[nrow(df)] <- "log_volume"

Log cell surface area.

In [83]:
datum <- setNames(data$log_surface, data$taxid)

In [84]:
set.seed(42)
K <- phylosig(tree, datum, method="K", test=TRUE)
K


Phylogenetic signal K : 0.360687 
P-value (based on 1000 randomizations) : 0.001 


In [85]:
writeLines(capture.output(K), "physig/log_surface.K")

In [86]:
write.table(K$sim.K, "physig/log_surface.K.plot", col.names=FALSE, row.names=FALSE)

In [87]:
df[nrow(df) + 1,] <- c(K$K, K$P)
rownames(df)[nrow(df)] <- "log_surface"

Surface-to-volume ratio

In [88]:
datum <- setNames(data$svratio, data$taxid)

In [89]:
set.seed(42)
K <- phylosig(tree, datum, method="K", test=TRUE)
K


Phylogenetic signal K : 0.340295 
P-value (based on 1000 randomizations) : 0.021 


In [90]:
writeLines(capture.output(K), "physig/svratio.K")

In [91]:
write.table(K$sim.K, "physig/svratio.K.plot", col.names=FALSE, row.names=FALSE)

In [92]:
df[nrow(df) + 1,] <- c(K$K, K$P)
rownames(df)[nrow(df)] <- "svratio"

Other metrics.

In [93]:
cols = c("volume", "surface", "genome", "gc", "proteins", "coding", "rrnas", "log_svratio",
         "log_genome", "log_proteins", "MILC", "ENCprime", "hash")

In [94]:
for (col in cols) {
    set.seed(42)
    datum <- setNames(data[[col]], data$taxid)
    K <- phylosig(tree, datum, method = "K", test = TRUE)
    df[nrow(df) + 1,] <- c(K$K, K$P)
    rownames(df)[nrow(df)] <- col
}

In [95]:
df

Unnamed: 0_level_0,K,P
Unnamed: 0_level_1,<dbl>,<dbl>
log_volume,0.3577353,0.001
log_surface,0.3606872,0.001
svratio,0.3402954,0.021
volume,0.4224734,0.04
surface,0.3071818,0.202
genome,2.0974696,0.001
gc,3.7109566,0.001
proteins,1.9170825,0.001
coding,1.5756372,0.001
rrnas,0.2214721,0.001


In [96]:
write.table(df, "physig/K.tsv", sep="\t", quote=FALSE)

### Phylogenetic signal across sample sizes

In [11]:
sizes <- c(100, 500, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 5380)
# sizes <- c(100, 500, 1500)

In [12]:
# Data frame to save outputs
df_sample_size <- data.frame(matrix(ncol=6, nrow=0))
colnames(df_sample_size) <- c('lambda', 'pval0', 'pval1', 'sigsq',
                 'sigsq_unb', 'lnL')

In [13]:
for (size in sizes) {
    set.seed(42)
    data_sampled <- data[sample(nrow(data), size, replace = FALSE), ]
    tree2_sampled <- drop.tip(tree2, setdiff(tree2$tip.label, data_sampled$taxid))
    datum_sampled <- setNames(data_sampled[['log_svratio']], data_sampled$taxid)
    l_sq <- lambda_sigsq(tree2_sampled, datum_sampled)
    # Add to data frame
    df_sample_size[nrow(df_sample_size) + 1,] <- c(l_sq$lambda, l_sq$pval0, l_sq$pval1, l_sq$sigsq,
                                    l_sq$sigsq_unb, l_sq$lnL)
    rownames(df_sample_size)[nrow(df_sample_size)] <- size
}

In [14]:
df_sample_size

Unnamed: 0_level_0,lambda,pval0,pval1,sigsq,sigsq_unb,lnL
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
100,0.7948376,0.005730776,0.05081482,0.06306451,0.06370153,26.64912
500,0.7641655,4.639293e-45,4.878156e-12,0.07587149,0.07602354,109.34821
1500,0.7926591,6.457841999999999e-166,9.038552e-31,0.06911336,0.06915947,509.18188
2000,0.8028735,5.537476e-250,1.711738e-36,0.06896744,0.06900194,731.56596
2500,0.7978041,1.5e-323,2.215865e-67,0.07057436,0.0706026,900.8309
3000,0.8066584,0.0,3.719949e-77,0.07374708,0.07377167,1065.00995
3500,0.8239937,0.0,5.187518e-85,0.07711937,0.07714142,1263.66121
4000,0.8376513,0.0,5.169025e-85,0.08154921,0.0815696,1419.65614
4500,0.8325312,0.0,8.518685e-99,0.08015271,0.08017053,1624.73442
5000,0.8351676,0.0,3.747686e-113,0.08211403,0.08213045,1779.61059


In [15]:
write.table(df_sample_size, "physig/lambda_random_sample_size.tsv", sep="\t", quote=FALSE)

### Same but only sampling $n$ species per genus.

The most abundant genus is Flavobacterium with 119 species. 

In [16]:
genera <- unique(data$genus)

In [17]:
# sizes <- c(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22)
sizes <- c(10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 119)

In [18]:
# Data frame to save outputs
df_sample_size <- data.frame(matrix(ncol = 7, nrow = 0))
colnames(df_sample_size) <- c('sample_size', 'lambda', 'pval0', 'pval1', 'sigsq',
                 'sigsq_unb', 'lnL')

In [19]:
n <- ncol(data)

In [20]:
for (size in sizes) {
    # Empty data frame
    data_sampled = data.frame(matrix(ncol = n, nrow = 0))
    colnames(data_sampled) <- colnames(data)
    for (genus in genera) {
        sps <- data[data[['genus']] == genus,]
        # If there are more than n species, do sampling
        if (nrow(sps) < size){
            data_sampled <- rbind(data_sampled, sps)
        } else {
            sps_sampled <- sps[sample(nrow(sps), size, replace = FALSE), ]
            data_sampled <- rbind(data_sampled, sps_sampled)
        }
    }
    tree2_sampled <- drop.tip(tree2, setdiff(tree2$tip.label, data_sampled$taxid))
    datum_sampled <- setNames(data_sampled[['log_svratio']], data_sampled$taxid)
    l_sq <- lambda_sigsq(tree2_sampled, datum_sampled)
    sample_size <- length(datum_sampled)
    # Add results
    df_sample_size[nrow(df_sample_size) + 1,] <- c(sample_size, l_sq$lambda, l_sq$pval0, l_sq$pval1, l_sq$sigsq,
                                    l_sq$sigsq_unb, l_sq$lnL)
    rownames(df_sample_size)[nrow(df_sample_size)] <- size
}

In [21]:
df_sample_size

Unnamed: 0_level_0,sample_size,lambda,pval0,pval1,sigsq,sigsq_unb,lnL
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
10,4175,0.8386075,0,7.158549000000001e-105,0.08804618,0.08806728,1363.726
20,4720,0.8417036,0,9.786542e-111,0.08624691,0.08626518,1628.451
30,4984,0.8386029,0,3.648842e-116,0.08431075,0.08432767,1758.185
40,5102,0.8384277,0,2.3309740000000003e-118,0.08398422,0.08400068,1804.733
50,5181,0.8328847,0,4.782669e-123,0.08212209,0.08213794,1850.884
60,5237,0.8336151,0,8.241913e-124,0.08200427,0.08201993,1876.78
70,5277,0.831184,0,7.382404e-126,0.08132441,0.08133982,1893.834
80,5304,0.8315466,0,9.593816e-126,0.08146574,0.08148111,1899.684
90,5324,0.8301097,0,5.318876e-127,0.08089757,0.08091277,1916.719
100,5344,0.8304573,0,5.304012e-128,0.08127539,0.0812906,1914.175


In [22]:
write.table(df_sample_size, "physig/lambda_random_sample_genus.tsv", sep="\t", quote=FALSE)

### Rod-shaped only

In [127]:
data_ <- data[data$shape=="rod-shaped",]

In [128]:
tree_ <- drop.tip(tree, setdiff(tree$tip.label, data_$taxid))
tree_


Phylogenetic tree with 3873 tips and 1380 internal nodes.

Tip labels:
  taxid668570, taxid1277351, taxid263906, taxid547055, taxid882104, taxid487686, ...
Node labels:
  N1, N5, N18, N51, N119, N265, ...

Rooted; includes branch lengths.

In [129]:
# Binarized tree for geiger
tree2_ <- multi2di(tree_)
tree2_


Phylogenetic tree with 3873 tips and 3872 internal nodes.

Tip labels:
  taxid668570, taxid1277351, taxid263906, taxid547055, taxid882104, taxid487686, ...
Node labels:
  N1, N5, N18, N51, N119, N265, ...

Rooted; includes branch lengths.

In [130]:
df <- data.frame(matrix(ncol=4, nrow=0))
colnames(df) <- c("lambda", "logL", "logL0", "P")

In [131]:
# Data frame - geiger
df_geiger <- data.frame(matrix(ncol=6, nrow=0))
colnames(df_geiger) <- c('lambda', 'pval0', 'pval1', 'sigsq',
                 'sigsq_unb', 'lnL')

In [132]:
for (col in c("log_volume", "log_surface", "svratio")) {
    datum <- setNames(data_[[col]], data_$taxid)
    lambda <- phylosig(tree_, datum, method="lambda", test=TRUE)
    df[nrow(df) + 1,] <- c(lambda$lambda, lambda$logL, lambda$logL0, lambda$P)
    rownames(df)[nrow(df)] <- col
    # Lambda and sigsq - geiger
    l_sq <- lambda_sigsq(tree2_, datum)
    df_geiger[nrow(df_geiger) + 1,] <- c(l_sq$lambda, l_sq$pval0, l_sq$pval1, l_sq$sigsq,
                                    l_sq$sigsq_unb, l_sq$lnL)
    rownames(df_geiger)[nrow(df_geiger)] <- col
}

In [133]:
df

Unnamed: 0_level_0,lambda,logL,logL0,P
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>
log_volume,0.6619315,-2331.6914,-2759.663,3.7101520000000004e-188
log_surface,0.6922851,-931.8798,-1403.819,2.842225e-207
svratio,0.6223762,5873.5366,5523.045,1.828198e-154


In [134]:
write.table(df, "physig/rod.lambda.tsv", sep="\t", quote=FALSE)

In [135]:
df_geiger

Unnamed: 0_level_0,lambda,pval0,pval1,sigsq,sigsq_unb,lnL
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
log_volume,0.6619316,3.7101520000000004e-188,4.507086e-115,0.407793474,0.407898793,-2331.6914
log_surface,0.6922921,2.842225e-207,4.648813e-108,0.208363058,0.208416871,-931.8798
svratio,0.6223621,1.828198e-154,4.102306e-168,0.005534952,0.005536381,5873.5366


In [136]:
write.table(df_geiger, "physig/rod.lambda-sigsq.tsv", sep="\t", quote=FALSE)

### Taxonomic ranks

In [137]:
ranks = c("species", "genus", "family", "order", "class", "phylum")

In [138]:
for (rank in ranks) {
    tree_ <- read.tree(paste("place/", rank, '_all', ".nwk", sep=""))
    data_ <- read.table(paste("place/", rank, '_all', ".tsv", sep=""), header=TRUE, sep="\t", quote="")
    tree2_ = multi2di(tree_)
    print(paste(rank, nrow(data_),sep=": "))
    data_[["svratio"]] = data_[["volume"]] / data_[["surface"]]
    for (col in c("volume", "surface", "svratio")) {
        data_[[paste("log", col, sep="_")]] = log10(data_[[col]])
    }
    df <- data.frame(matrix(ncol=4, nrow=0))
    colnames(df) <- c("lambda", "logL", "logL0", "P")
    df_geiger <- data.frame(matrix(ncol=6, nrow=0))
    colnames(df_geiger) <- c('lambda', 'pval0', 'pval1', 'sigsq',
                 'sigsq_unb', 'lnL')
    
    for (col in c("log_volume", "log_surface", "log_svratio")) {
        datum <- setNames(data_[[col]], data_$taxid)
        lambda <- phylosig(tree_, datum, method="lambda", test=TRUE)
        df[nrow(df) + 1,] <- c(lambda$lambda, lambda$logL, lambda$logL0, lambda$P)
        rownames(df)[nrow(df)] <- col
        
        # Lambda and sigsq - geiger
        l_sq <- lambda_sigsq(tree2_, datum)
        df_geiger[nrow(df_geiger) + 1,] <- c(l_sq$lambda, l_sq$pval0, l_sq$pval1, l_sq$sigsq,
                                    l_sq$sigsq_unb, l_sq$lnL)
        rownames(df_geiger)[nrow(df_geiger)] <- col
    }
    write.table(df, paste("physig/", rank, ".lambda.tsv", sep=""), sep="\t", quote=FALSE)
    write.table(df_geiger, paste("physig/", rank, ".lambda-sigsq.tsv", sep=""), sep="\t", quote=FALSE)
}

[1] "species: 1491"
[1] "genus: 1143"
[1] "family: 342"
[1] "order: 141"
[1] "class: 60"
[1] "phylum: 27"


“
Parameter estimates appear at bounds:
	lambda”
“
Parameter estimates appear at bounds:
	lambda”


### Taxonomic groups

Taxonomic groups that have 50 or more samples will be analyzed.

In [139]:
df <- data.frame(matrix(ncol=15, nrow=0))
colnames(df) <- c("rank", "taxon", "count",
                  "V.lambda", "V.logL", "V.logL0", "V.P",
                  "S.lambda", "S.logL", "S.logL0", "S.P",
                 "SV.lambda", "SV.logL", "SV.logL0", "SV.P")
# Gieger
df_geiger <- data.frame(matrix(ncol=21, nrow=0))
colnames(df_geiger) <- c('rank', 'taxon', 'count',
                        'V.lambda', 'V.pval0', 'V.pval1', 'V.sigsq', 'V.sigsq_unb', 'V.lnL',
                        'S.lambda', 'S.pval0', 'S.pval1', 'S.sigsq', 'S.sigsq_unb', 'S.lnL',
                        'SV.lambda', 'SV.pval0', 'SV.pval1', 'SV.sigsq', 'SV.sigsq_unb', 'SV.lnL')

In [140]:
th = 50

In [141]:
tree3 <- drop.tip(tree2, setdiff(tree2$tip.label, data$taxid))

In [142]:
for (rank in ranks[-1]) {
    print(rank)
    counts <- table(data[!data[[rank]]=="",][[rank]])
    for (taxon in names(counts)) {
        n <- counts[[taxon]]
        if (n < th) {
            next
        }
        data_ <- data[data[[rank]]==taxon,]
        # Drop tips that appear in tree but not appear in data_
        # i.e. keep tips that appear in data_
        tree_ <- drop.tip(tree, setdiff(tree$tip.label, data_$taxid))
        tree2_ = multi2di(tree_)
        res = list(rank, taxon, n)
        res_ = list(rank, taxon, n)
        for (col in c("log_volume", "log_surface", "log_svratio")) {
            datum <- setNames(data_[[col]], data_$taxid)
            lambda <- phylosig(tree_, datum, method="lambda", test=TRUE)
            l_sq <- lambda_sigsq(tree2_, datum)
            res <- append(res, c(lambda$lambda, lambda$logL, lambda$logL0, lambda$P))
            res_ <- append(res_, c(l_sq$lambda, l_sq$pval0, l_sq$pval1, l_sq$sigsq,
                                    l_sq$sigsq_unb, l_sq$lnL))
        }
        df[nrow(df) + 1,] <- res
        df_geiger[nrow(df_geiger) + 1,] <- res_
    }
}

[1] "genus"


“
Parameter estimates appear at bounds:
	lambda”
“
Parameter estimates appear at bounds:
	lambda”
“
Parameter estimates appear at bounds:
	lambda”
“
Parameter estimates appear at bounds:
	lambda”
“
Parameter estimates appear at bounds:
	lambda”
“
Parameter estimates appear at bounds:
	lambda”


[1] "family"


“
Parameter estimates appear at bounds:
	lambda”
“
Parameter estimates appear at bounds:
	lambda”
“
Parameter estimates appear at bounds:
	lambda”
“
Parameter estimates appear at bounds:
	lambda”
“
Parameter estimates appear at bounds:
	lambda”
“
Parameter estimates appear at bounds:
	lambda”
“
Parameter estimates appear at bounds:
	lambda”


[1] "order"


“
Parameter estimates appear at bounds:
	lambda”
“
Parameter estimates appear at bounds:
	lambda”
“
Parameter estimates appear at bounds:
	lambda”
“
Parameter estimates appear at bounds:
	lambda”
“
Parameter estimates appear at bounds:
	lambda”


[1] "class"


“
Parameter estimates appear at bounds:
	lambda”
“
Parameter estimates appear at bounds:
	lambda”


[1] "phylum"


“
Parameter estimates appear at bounds:
	lambda”
“
Parameter estimates appear at bounds:
	lambda”
“
Parameter estimates appear at bounds:
	lambda”
“
Parameter estimates appear at bounds:
	lambda”
“
Parameter estimates appear at bounds:
	lambda”


In [143]:
df

Unnamed: 0_level_0,rank,taxon,count,V.lambda,V.logL,V.logL0,V.P,S.lambda,S.logL,S.logL0,S.P,SV.lambda,SV.logL,SV.logL0,SV.P
Unnamed: 0_level_1,<chr>,<chr>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,genus,Flavobacterium,119,3.740304e-01,-88.09643,-88.54687,3.425428e-01,2.062704e-01,-45.1430636,-45.2212366,6.925434e-01,5.506170e-01,41.90976,40.532502,9.698069e-02
2,genus,Halomonas,53,7.672824e-01,-34.71664,-35.14984,3.519539e-01,8.673720e-01,-13.2767523,-13.8607737,2.798038e-01,5.020125e-01,21.61666,21.491163,6.163716e-01
3,genus,Microbacterium,55,3.036224e-01,-45.09428,-45.25689,5.684879e-01,2.699518e-01,-24.2642084,-24.3283357,7.202487e-01,3.380626e-01,16.74787,16.329782,3.604904e-01
4,genus,Mycoplasma,73,9.999339e-01,28.93427,28.87850,7.384020e-01,9.999339e-01,66.8160720,66.7612937,7.406495e-01,9.999339e-01,94.08451,94.027264,7.350970e-01
5,genus,Nocardioides,74,9.999339e-01,-34.60831,-35.74271,1.320008e-01,9.999339e-01,-8.7909772,-9.8774995,1.404479e-01,9.999339e-01,45.61734,44.697746,1.750461e-01
6,genus,Paenibacillus,117,4.885485e-05,-53.87433,-53.87420,1.000000e+00,4.627546e-05,-7.0736710,-7.0735902,1.000000e+00,4.273025e-01,63.02314,63.115946,1.000000e+00
7,genus,Sphingomonas,58,6.610696e-05,-20.86786,-20.86747,1.000000e+00,6.610696e-05,1.5110944,1.5114746,1.000000e+00,6.610696e-05,39.71881,39.719217,1.000000e+00
8,family,Acetobacteraceae,57,6.610696e-05,-27.73803,-27.73724,1.000000e+00,6.610696e-05,-0.9321196,-0.9315129,1.000000e+00,6.610696e-05,23.14402,23.144442,1.000000e+00
9,family,Bacillaceae,239,4.490515e-01,-125.97282,-140.10044,1.063261e-07,4.391927e-01,-34.6236018,-46.6708188,9.172504e-07,5.248549e-01,112.90574,93.472406,4.537637e-10
10,family,Burkholderiaceae,67,7.672388e-01,-30.13335,-36.55199,3.397804e-04,7.963086e-01,-3.8499504,-10.8874082,1.756711e-04,6.783059e-01,42.38910,37.678229,2.144275e-03


In [144]:
write.table(df, "physig/groups.tsv", sep="\t", quote=FALSE)

In [145]:
df_geiger

Unnamed: 0_level_0,rank,taxon,count,V.lambda,V.pval0,V.pval1,V.sigsq,V.sigsq_unb,V.lnL,S.lambda,⋯,S.pval1,S.sigsq,S.sigsq_unb,S.lnL,SV.lambda,SV.pval0,SV.pval1,SV.sigsq,SV.sigsq_unb,SV.lnL
Unnamed: 0_level_1,<chr>,<chr>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,genus,Flavobacterium,119,3.740349e-01,3.425428e-01,1.887271e-03,1.36540889,1.37698016,-88.09643,2.062669e-01,⋯,5.822430e-04,0.65836704,0.66394642,-45.1430636,5.506144e-01,9.698069e-02,1.649344e-02,0.155319821,0.156636090,41.90976
2,genus,Halomonas,53,7.672872e-01,3.519539e-01,5.401727e-01,1.36020204,1.38635977,-34.71664,8.673693e-01,⋯,7.271666e-01,0.60875236,0.62045913,-13.2767523,5.020315e-01,6.163716e-01,2.487983e-01,0.160766207,0.163857865,21.61666
3,genus,Microbacterium,55,3.036205e-01,5.684879e-01,7.919656e-02,1.30599228,1.33017732,-45.09428,2.699590e-01,⋯,1.068832e-01,0.61098034,0.62229479,-24.2642084,3.380621e-01,3.604904e-01,4.517328e-02,0.138149409,0.140707732,16.74787
4,genus,Mycoplasma,73,1.000000e+00,7.383855e-01,1.000000e+00,0.04092482,0.04149322,28.93428,1.000000e+00,⋯,1.000000e+00,0.01449609,0.01469743,66.8160792,1.000000e+00,7.350806e-01,1.000000e+00,0.006867491,0.006962873,94.08451
5,genus,Nocardioides,74,1.000000e+00,1.319907e-01,1.000000e+00,0.78137437,0.79207813,-34.60825,1.000000e+00,⋯,1.000000e+00,0.38888695,0.39421417,-8.7909212,1.000000e+00,1.750348e-01,1.000000e+00,0.089370797,0.090595055,45.61739
6,genus,Paenibacillus,117,1.177756e-204,9.999999e-01,2.224049e-04,0.49026590,0.49449233,-53.87420,2.166414e-17,⋯,6.366530e-05,0.22028837,0.22218741,-7.0735902,1.868696e-29,9.999996e-01,1.669734e-02,0.066361452,0.066933533,63.11595
7,genus,Sphingomonas,58,1.356359e-82,9.999999e-01,8.961419e-02,0.49860957,0.50735711,-20.86747,5.652433e-125,⋯,1.090941e-01,0.23047343,0.23451682,1.5114746,3.586340e-63,9.999998e-01,5.899299e-02,0.061721185,0.062804012,39.71922
8,family,Acetobacteraceae,57,3.099522e-158,9.999999e-01,5.556346e-04,0.48644683,0.49513338,-27.73724,1.270268e-203,⋯,1.276489e-02,0.18991486,0.19330620,-0.9315129,8.048860e-202,9.999999e-01,1.243671e-05,0.081598063,0.083055171,23.14444
9,family,Bacillaceae,239,4.490535e-01,1.063261e-07,3.811975e-30,0.53203840,0.53427386,-125.97282,4.391935e-01,⋯,9.727263e-29,0.24558611,0.24661798,-34.6236018,5.248490e-01,4.537637e-10,2.221993e-27,0.077332499,0.077657425,112.90574
10,family,Burkholderiaceae,67,7.672347e-01,3.397804e-04,1.823912e-02,0.75556313,0.76701106,-30.13335,7.963239e-01,⋯,2.545560e-02,0.35934265,0.36478724,-3.8499504,6.783171e-01,2.144275e-03,8.242922e-03,0.077417356,0.078590346,42.38910


In [146]:
write.table(df_geiger, "physig/groups-lambda-sigsq.tsv", sep="\t", quote=FALSE)

### PGLS for data placed at tips of WoL

In [56]:
comp <- comparative.data(tree_wol, data_wol, names.col = taxid, vcv = TRUE, na.omit = FALSE, warn.dropped = TRUE)

In [57]:
df <- data.frame(matrix(ncol = 9, nrow=0))
colnames(df) <- c("y", "x", "slope", "std.slope", "P.slope", "intercept", "std.intercept", "P.intercept",
                  "adj.r.squared")

In [58]:
for (ycol in c("log_genome", "log_proteins", "arc_gc", "arc_coding", "rrnas", "ENCprime")) {
    for (xcol in c("log_svratio", "log_volume")) {
        reg <- pgls(get(ycol) ~ get(xcol), data = comp)
        sreg <- summary(reg)
        df[nrow(df) + 1,] <- list(
            ycol, xcol, unname(reg$model$coef[2]), sreg$coefficients[2, 2], sreg$coefficients[2, 4],
            unname(reg$model$coef[1]), sreg$coefficients[1, 2], sreg$coefficients[1, 4],
            sreg$adj.r.squared[1])
        print(paste(ycol, '~', xcol))
        print(sreg)
    }
}

[1] "log_genome ~ log_svratio"

Call:
pgls(formula = get(ycol) ~ get(xcol), data = comp)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.6943 -0.1418  0.0052  0.1588  0.7533 

Branch length transformations:

kappa  [Fix]  : 1.000
lambda [Fix]  : 1.000
delta  [Fix]  : 1.000

Coefficients:
            Estimate Std. Error t value  Pr(>|t|)    
(Intercept) 6.461901   0.069068 93.5584 < 2.2e-16 ***
get(xcol)   0.066545   0.013207  5.0386  5.32e-07 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2236 on 1361 degrees of freedom
Multiple R-squared: 0.01831,	Adjusted R-squared: 0.01759 
F-statistic: 25.39 on 1 and 1361 DF,  p-value: 5.32e-07 
[1] "log_genome ~ log_volume"

Call:
pgls(formula = get(ycol) ~ get(xcol), data = comp)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.67105 -0.14242  0.00488  0.15641  0.75527 

Branch length transformations:

kappa  [Fix]  : 1.000
lambda [Fix]  : 1.000
delta  [Fix]  : 1.000

Coeffic

In [59]:
df

Unnamed: 0_level_0,y,x,slope,std.slope,P.slope,intercept,std.intercept,P.intercept,adj.r.squared
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,log_genome,log_svratio,0.0665449662,0.013207137,5.320494e-07,6.4619006,0.0690681,0.0,0.017590395
2,log_genome,log_volume,0.0250333968,0.004533356,4.008481e-08,6.40822808,0.06791761,0.0,0.0211952048
3,log_proteins,log_svratio,0.0655957684,0.013118913,6.476357e-07,3.45047583,0.06860672,0.0,0.0173166677
4,log_proteins,log_volume,0.0241867046,0.004505151,9.315368e-08,3.39744457,0.06749504,0.0,0.0200188879
5,arc_gc,log_svratio,-0.0035431917,0.006828726,0.6039388,0.74846911,0.03571154,0.0,-0.0005368361
6,arc_gc,log_volume,-0.0010846521,0.002348324,0.6442387,0.75138995,0.03518201,0.0,-0.0005779136
7,arc_coding,log_svratio,-0.0077528309,0.004935932,0.1164866,1.2295182,0.02581297,0.0,0.0010759881
8,arc_coding,log_volume,-0.0036339373,0.001696058,0.03232391,1.23558915,0.02540992,0.0,0.0026293697
9,rrnas,log_svratio,-0.218121387,0.315814354,0.4898949,1.61026335,1.65158413,0.329742878,-0.00038413
10,rrnas,log_volume,-0.0481067994,0.10861407,0.6578976,1.79481166,1.62722887,0.27022797,-0.0005905294


In [60]:
write.table(df, "physig/PGLS_wol.tsv", sep="\t", quote=FALSE, row.names=FALSE)