# TARA MicV Temperature
## Figure 2
#### Phylogeny and Thermal groups 
**David Demory & Hisashi Endo -- 17 Oct. 2024**

### Set up environments and load datasets

In [12]:
## Workspace
rm(list = ls())
workdir = "../" #set your path to the folder "Demory_Endo_Temperature_MicV_biogeography"
setwd(workdir)
getwd()

In [24]:
## libraries
library(ggplot2)
library(viridis)
library(vegan)
library(ape)
library(dplyr)
library(reshape2)
library(ggExtra)
library(magrittr)
library(RColorBrewer)
library(pryr)
library(readr)

### Load Frequency tables

In [25]:
## Major Clades 
MicV.A <- read.csv("./data/df_MicV_A_merge_stdz.txt", sep="")
com.A <- MicV.A[17:ncol(MicV.A)]
totcom.A <- rowSums(com.A)

MicV.C <- read.csv("./data/df_MicV_C_merge_stdz.txt", sep="")
com.C <- MicV.C[17:ncol(MicV.C)]
totcom.C <- rowSums(com.C)

MicV.B <- read.csv("./data/df_MicV_B_merge_stdz.txt", sep="")
com.B <- MicV.B[17:ncol(MicV.B)]
totcom.B <- rowSums(com.B)

MicV.Pol <- read.csv("./data/df_MicV_Pol_merge_stdz.txt", sep="")
com.Pol <- MicV.Pol[17:ncol(MicV.Pol)]
totcom.Pol <- rowSums(com.Pol)

## Minor Clades
MicV.A1 <- read.csv("./data/df_MicV_A1_merge_stdz.txt", sep="")
com.A1 <- MicV.A1[17:ncol(MicV.A1)]
totcom.A1 <- rowSums(com.A1)

MicV.A2 <- read.csv("./data/df_MicV_A2_merge_stdz.txt", sep="")
com.A2 <- MicV.A2[17:ncol(MicV.A2)]
totcom.A2 <- rowSums(com.A2)

MicV.Pol1 <- read.csv("./data/df_MicV_Pol1_merge_stdz.txt", sep="")
com.Pol1 <- MicV.Pol1[17:ncol(MicV.Pol1)]
totcom.Pol1 <- rowSums(com.Pol1)

MicV.Pol2 <- read.csv("./data/df_MicV_Pol2_merge_stdz.txt", sep="")
com.Pol2 <- MicV.Pol2[17:ncol(MicV.Pol2)]
totcom.Pol2 <- rowSums(com.Pol2)

MicV.B1 <- read.csv("./data/df_MicV_B1_merge_stdz.txt", sep="")
com.B1 <- MicV.B1[17:ncol(MicV.B1)]
totcom.B1 <- rowSums(com.B1)

MicV.B2 <- read.csv("./data/df_MicV_B2_merge_stdz.txt", sep="")
com.B2 <- MicV.B2[17:ncol(MicV.B2)]
totcom.B2 <- rowSums(com.B2)

In [27]:
## Create community matrix (Presence/Abscence)
x = 0.0

T <- MicV.Pol1$Temperature[totcom.Pol1>x]
L <- MicV.Pol1$ChlorophyllA[totcom.Pol1>x]
df.Pol1 <- data.frame("totfreq"=totcom.Pol1[totcom.Pol1>x],"Temperature"=T,"ChlorophyllA"=L,'Clade'="Pol1")

T <- MicV.Pol2$Temperature[totcom.Pol2>x]
L <- MicV.Pol2$ChlorophyllA[totcom.Pol2>x]
df.Pol2 <- data.frame("totfreq"=totcom.Pol2[totcom.Pol2>x],"Temperature"=T,"ChlorophyllA"=L,'Clade'="Pol2")

T <- MicV.A1$Temperature[totcom.A1>x]
L <- MicV.A1$ChlorophyllA[totcom.A1>x]
df.A1 <- data.frame("totfreq"=totcom.A1[totcom.A1>x],"Temperature"=T,"ChlorophyllA"=L,'Clade'="A1")

T <- MicV.A2$Temperature[totcom.A2>x]
L <- MicV.A2$ChlorophyllA[totcom.A2>x]
df.A2 <- data.frame("totfreq"=totcom.A2[totcom.A2>x],"Temperature"=T,"ChlorophyllA"=L,'Clade'="A2")

T <- MicV.B1$Temperature[totcom.B1>x]
L <- MicV.B1$ChlorophyllA[totcom.B1>x]
df.B1 <- data.frame("totfreq"=totcom.B1[totcom.B1>x],"Temperature"=T,"ChlorophyllA"=L,'Clade'="B1")

T <- MicV.B2$Temperature[totcom.B2>x]
L <- MicV.B2$ChlorophyllA[totcom.B2>x]
df.B2 <- data.frame("totfreq"=totcom.B2[totcom.B2>x],"Temperature"=T,"ChlorophyllA"=L,'Clade'="B2")

T <- MicV.C$Temperature[totcom.C>x]
L <- MicV.C$ChlorophyllA[totcom.C>x]
df.C <- data.frame("totfreq"=totcom.C[totcom.C>x],"Temperature"=T,"ChlorophyllA"=L,'Clade'="C")

# Major Clade df
df.Pol <- rbind(df.Pol1, df.Pol2)
df.A   <- rbind(df.A1, df.A2)
df.B   <- rbind(df.B1, df.B2)

# Virus df
df.MicV <- rbind(df.Pol,df.A,df.B,df.C)

# Com matrix
com <- data.frame("A1"=totcom.A1,"A2"=totcom.A2,"B1"=totcom.B1,
                  "B2"=totcom.B2,"C"=totcom.C,"Pol1"=totcom.Pol1,
                  "Pol2"=totcom.Pol2)

head(com)

Unnamed: 0_level_0,A1,A2,B1,B2,C,Pol1,Pol2
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,0.0,1.0,0,0,0,0.0,0
2,0.0,1.0,0,0,0,0.0,0
3,0.4736392,0.5163483,0,0,0,0.01001255,0
4,0.2280675,0.7719325,0,0,0,0.0,0
5,0.4691672,0.5308328,0,0,0,0.0,0
6,0.0,1.0,0,0,0,0.0,0


## Fig2b -- Relative Abundances

In [28]:
mdf <- data.frame("Clade"=df.MicV$Clade,"freq"=df.MicV$totfreq,"Temperature"=df.MicV$Temperature)
mdf <- mdf[is.na(mdf$Temperature)==0,]

mdf = mdf %>%
  mutate(binCounts = cut(Temperature, breaks = seq(-2, 31, by = 2))) %>%
  group_by(binCounts) %>%
  mutate(sumVal = sum(freq)) %>%
  ungroup() %>%
  group_by(binCounts, Clade) %>%
  summarise(prct = sum(freq)/mean(sumVal))

pdf(file="./figures/figure2b_raw_17Oct2024.pdf",
    width = 5, height = 2, # Width and height in inches
    bg = "white",            # Background color
    colormodel = "rgb",      # Color model (cmyk is required for most publications)
    paper = "USr")           # Paper size

fig2b = ggplot(mdf) +
    geom_bar(aes(x=binCounts, y=prct, fill=Clade), stat="identity") +
    theme(axis.text.x=element_text(angle = 90, hjust=1))+
    xlab("Temperature") +
    ylab("Relative frequency") +
    guides(col=guide_legend("Thermal groups"),
           fill=guide_legend("Thermal groups"))+
    scale_x_discrete(labels=c("<0","0","2","4",
                             "6","8","10","12",
                             "14","16","18","20",
                             "22","24","26",">28"))+
    scale_y_continuous(expand = c(0, 0)) +
theme_classic() + theme(
        axis.text.x = element_text(size = 10, angle=25),
        axis.title.x = element_text(size = 12),
        axis.title.y = element_text(size = 12),
        axis.text.y = element_text(size = 10),
        legend.title = element_text(size = 12),
        legend.text = element_text(size = 12))
        #legend.position = c(1.2, 0.8))
fig2b
dev.off()

[1m[22m`summarise()` has grouped output by 'binCounts'. You can override using the `.groups` argument.


## Fig2c -- Temperatire boxplots

In [29]:
# Load MicV thermotype
thermotype_list2 <-  read_tsv("./data/thermotype_list.txt", col_names = TRUE, show_col_types = FALSE)
colnames(thermotype_list2)[1] <- c("ids")

# Load id-clade table
MicV_total_ids_clades <-  read_tsv("./data/MicV_total_ids_clades.txt", col_names = TRUE, show_col_types = FALSE)

# Merge dfs
clade_thermo_df <- left_join(thermotype_list2, MicV_total_ids_clades, by = "ids")

# Colors of A1, A2, B1, B2, C, Pol1, Pol2 for paper
col_subclade  <- c('#F8766D', '#C49A00', '#53B400', '#00C094', '#00B6EB', '#A58AFF', '#FB61D7')


# Box plot for each subclade
boxp1 <- ggplot(clade_thermo_df, aes(x=subclade, y=opt_temp, fill=subclade)) +
  geom_boxplot(coef=20, alpha=0.4) + # Change whiskers length for min to max.
  theme_bw() + 
  #scale_color_manual(breaks = c("Intra-genus", "Inter-genus"), values = col_tax) + 
  scale_fill_manual(values = col_subclade) +
  scale_y_continuous(limits = c(-5, 30)) +
  theme(axis.text.x = element_text(angle = -90, hjust = 0)) +
  geom_jitter(aes(color = subclade), width=0.15, shape=19, size =0.8, alpha=1) +
  scale_color_manual(values = col_subclade) +
  stat_summary(fun = "mean", geom = "point", shape = 21, size = 3, fill = "white") +
  ylab("Optimal temperature") + xlab("MicV Subclade")
ggsave(filename = "./figures/figure2c_05Dec2024.pdf", plot= boxp1, width=120, height=90, units="mm", dpi = 300)

## Fig2d-- Plot Chla vs. T 

In [30]:
pdf(file="./figures/figure2d_raw_17Oct2024.pdf",
    width = 5, height = 5, # Width and height in inches
    bg = "white",            # Background color
    colormodel = "rgb",      # Color model (cmyk is required for most publications)
    paper = "USr")           # Paper size


#test = df.MicV[df.MicV$Clade=="C",]

fig2d <- ggplot(df.MicV, aes(x=Temperature, y=ChlorophyllA, color=Clade,fill=Clade)) + theme_classic()+
    stat_ellipse(type="t", level = 0.95,geom="polygon",alpha=0.1,lwd=1)+
    stat_ellipse(type="t", level = 0.99,geom="polygon",alpha=0.05,lwd=1,linetype = "dotted")+
    geom_point(size=2,alpha=0.3)+    
    theme_classic() + theme(
        axis.text.x = element_text(face = "bold", size = 16),
        axis.title.x = element_text(face = "bold", size = 16),
        axis.title.y = element_text(face = "bold", size = 16),
        axis.text.y = element_text(face = "bold", size = 16),
        legend.title = element_text(face = "bold", size = 16),
        legend.text = element_text(face = "bold", size = 16)) +
    xlim(-20, 50) + scale_x_continuous(breaks = seq(-20, 50, by = 10)) +
    ylim(-1, 3)   #+ scale_y_continuous(breaks = seq(-1, 3, by = 2))

ggMarginal(fig2d, type="density",groupFill = TRUE, groupColour = TRUE,alpha=0.1,lwd=1)
dev.off()

[1m[22mScale for [32mx[39m is already present.
Adding another scale for [32mx[39m, which will replace the existing scale.
“[1m[22mRemoved 4 rows containing non-finite outside the scale range (`stat_ellipse()`).”
“[1m[22mRemoved 4 rows containing non-finite outside the scale range (`stat_ellipse()`).”
“[1m[22mRemoved 4 rows containing non-finite outside the scale range (`stat_ellipse()`).”
“[1m[22mRemoved 4 rows containing non-finite outside the scale range (`stat_ellipse()`).”
“[1m[22mRemoved 4 rows containing missing values or values outside the scale range (`geom_point()`).”
