# TARA MpV Temperature
## Figure 2
#### Phylogeny and Thermal groups 
**David Demory & Hisashi Endo -- 17 Oct. 2024**

### Set up environments and load datasets

In [1]:
## Workspace
rm(list = ls())
workdir = "../" #set your path to the folder "Demory_Endo_Temperature_MpV_biogeography"
setwd(workdir)
#getwd()

In [3]:
## libraries
library(ggplot2)
library(viridis)
library(vegan)
library(ape)
library(dplyr)
library(reshape2)
library(ggExtra)

### Load Frequency tables

In [6]:
## Major Clades 
MpV.A <- read.csv("./data/df_MicV_A_merge_stdz.txt", sep="")
com.A <- MpV.A[17:ncol(MpV.A)]
totcom.A <- rowSums(com.A)

MpV.C <- read.csv("./data/df_MicV_C_merge_stdz.txt", sep="")
com.C <- MpV.C[17:ncol(MpV.C)]
totcom.C <- rowSums(com.C)

MpV.B <- read.csv("./data/df_MicV_B_merge_stdz.txt", sep="")
com.B <- MpV.B[17:ncol(MpV.B)]
totcom.B <- rowSums(com.B)

MpV.Pol <- read.csv("./data/df_MicV_Pol_merge_stdz.txt", sep="")
com.Pol <- MpV.Pol[17:ncol(MpV.Pol)]
totcom.Pol <- rowSums(com.Pol)

## Minor Clades
MpV.A1 <- read.csv("./data/df_MicV_A1_merge_stdz.txt", sep="")
com.A1 <- MpV.A1[17:ncol(MpV.A1)]
totcom.A1 <- rowSums(com.A1)

MpV.A2 <- read.csv("./data/df_MicV_A2_merge_stdz.txt", sep="")
com.A2 <- MpV.A2[17:ncol(MpV.A2)]
totcom.A2 <- rowSums(com.A2)

MpV.Pol1 <- read.csv("./data/df_MicV_Pol1_merge_stdz.txt", sep="")
com.Pol1 <- MpV.Pol1[17:ncol(MpV.Pol1)]
totcom.Pol1 <- rowSums(com.Pol1)

MpV.Pol2 <- read.csv("./data/df_MicV_Pol2_merge_stdz.txt", sep="")
com.Pol2 <- MpV.Pol2[17:ncol(MpV.Pol2)]
totcom.Pol2 <- rowSums(com.Pol2)

MpV.B1 <- read.csv("./data/df_MicV_B1_merge_stdz.txt", sep="")
com.B1 <- MpV.B1[17:ncol(MpV.B1)]
totcom.B1 <- rowSums(com.B1)

MpV.B2 <- read.csv("./data/df_MicV_B2_merge_stdz.txt", sep="")
com.B2 <- MpV.B2[17:ncol(MpV.B2)]
totcom.B2 <- rowSums(com.B2)

In [8]:
## Create community matrix (Presence/Abscence)
x = 0.0

T <- MpV.Pol1$Temperature[totcom.Pol1>x]
L <- MpV.Pol1$ChlorophyllA[totcom.Pol1>x]
df.Pol1 <- data.frame("totfreq"=totcom.Pol1[totcom.Pol1>x],"Temperature"=T,"ChlorophyllA"=L,'Clade'="Pol1")

T <- MpV.Pol2$Temperature[totcom.Pol2>x]
L <- MpV.Pol2$ChlorophyllA[totcom.Pol2>x]
df.Pol2 <- data.frame("totfreq"=totcom.Pol2[totcom.Pol2>x],"Temperature"=T,"ChlorophyllA"=L,'Clade'="Pol2")

T <- MpV.A1$Temperature[totcom.A1>x]
L <- MpV.A1$ChlorophyllA[totcom.A1>x]
df.A1 <- data.frame("totfreq"=totcom.A1[totcom.A1>x],"Temperature"=T,"ChlorophyllA"=L,'Clade'="A1")

T <- MpV.A2$Temperature[totcom.A2>x]
L <- MpV.A2$ChlorophyllA[totcom.A2>x]
df.A2 <- data.frame("totfreq"=totcom.A2[totcom.A2>x],"Temperature"=T,"ChlorophyllA"=L,'Clade'="A2")

T <- MpV.B1$Temperature[totcom.B1>x]
L <- MpV.B1$ChlorophyllA[totcom.B1>x]
df.B1 <- data.frame("totfreq"=totcom.B1[totcom.B1>x],"Temperature"=T,"ChlorophyllA"=L,'Clade'="B1")

T <- MpV.B2$Temperature[totcom.B2>x]
L <- MpV.B2$ChlorophyllA[totcom.B2>x]
df.B2 <- data.frame("totfreq"=totcom.B2[totcom.B2>x],"Temperature"=T,"ChlorophyllA"=L,'Clade'="B2")

T <- MpV.C$Temperature[totcom.C>x]
L <- MpV.C$ChlorophyllA[totcom.C>x]
df.C <- data.frame("totfreq"=totcom.C[totcom.C>x],"Temperature"=T,"ChlorophyllA"=L,'Clade'="C")

# Major Clade df
df.Pol <- rbind(df.Pol1, df.Pol2)
df.A   <- rbind(df.A1, df.A2)
df.B   <- rbind(df.B1, df.B2)

# Virus df
df.MpV <- rbind(df.Pol,df.A,df.B,df.C)

# Com matrix
com <- data.frame("A1"=totcom.A1,"A2"=totcom.A2,"B1"=totcom.B1,
                  "B2"=totcom.B2,"C"=totcom.C,"Pol1"=totcom.Pol1,
                  "Pol2"=totcom.Pol2)

head(com)

Unnamed: 0_level_0,A1,A2,B1,B2,C,Pol1,Pol2
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,0.0,1.0,0,0,0,0.0,0
2,0.0,1.0,0,0,0,0.0,0
3,0.4736392,0.5163483,0,0,0,0.01001255,0
4,0.2280675,0.7719325,0,0,0,0.0,0
5,0.4691672,0.5308328,0,0,0,0.0,0
6,0.0,1.0,0,0,0,0.0,0


## Fig2b -- Relative Abundances

In [14]:
mdf <- data.frame("Clade"=df.MpV$Clade,"freq"=df.MpV$totfreq,"Temperature"=df.MpV$Temperature)
mdf <- mdf[is.na(mdf$Temperature)==0,]

mdf = mdf %>%
  mutate(binCounts = cut(Temperature, breaks = seq(-2, 31, by = 2))) %>%
  group_by(binCounts) %>%
  mutate(sumVal = sum(freq)) %>%
  ungroup() %>%
  group_by(binCounts, Clade) %>%
  summarise(prct = sum(freq)/mean(sumVal))

pdf(file="./figures/figure2b_raw_17Oct2024.pdf",
    width = 5, height = 2, # Width and height in inches
    bg = "white",            # Background color
    colormodel = "rgb",      # Color model (cmyk is required for most publications)
    paper = "USr")           # Paper size

fig2D = ggplot(mdf) +
    geom_bar(aes(x=binCounts, y=prct, fill=Clade), stat="identity") +
    theme(axis.text.x=element_text(angle = 90, hjust=1))+
    xlab("Temperature") +
    ylab("Relative frequency") +
    guides(col=guide_legend("Thermal groups"),
           fill=guide_legend("Thermal groups"))+
    scale_x_discrete(labels=c("<0","0","2","4",
                             "6","8","10","12",
                             "14","16","18","20",
                             "22","24","26",">28"))+
    scale_y_continuous(expand = c(0, 0)) +
theme_classic() + theme(
        axis.text.x = element_text(size = 10, angle=25),
        axis.title.x = element_text(size = 12),
        axis.title.y = element_text(size = 12),
        axis.text.y = element_text(size = 10),
        legend.title = element_text(size = 12),
        legend.text = element_text(size = 12))
        #legend.position = c(1.2, 0.8))
fig2D
dev.off()

[1m[22m`summarise()` has grouped output by 'binCounts'. You can override using the
`.groups` argument.


## Fig2c -- Temperatire boxplots

In [None]:
# HISASHI CODES

## Fig2d-- Plot Chla vs. T 

In [15]:
pdf(file="./figures/figure2d_raw_17Oct2024.pdf",
    width = 5, height = 5, # Width and height in inches
    bg = "white",            # Background color
    colormodel = "rgb",      # Color model (cmyk is required for most publications)
    paper = "USr")           # Paper size


#test = df.MpV[df.MpV$Clade=="C",]

fig2B <- ggplot(df.MpV, aes(x=Temperature, y=ChlorophyllA, color=Clade,fill=Clade)) + theme_classic()+
    stat_ellipse(type="t", level = 0.95,geom="polygon",alpha=0.1,lwd=1)+
    stat_ellipse(type="t", level = 0.99,geom="polygon",alpha=0.05,lwd=1,linetype = "dotted")+
    geom_point(size=2,alpha=0.3)+    
    theme_classic() + theme(
        axis.text.x = element_text(face = "bold", size = 16),
        axis.title.x = element_text(face = "bold", size = 16),
        axis.title.y = element_text(face = "bold", size = 16),
        axis.text.y = element_text(face = "bold", size = 16),
        legend.title = element_text(face = "bold", size = 16),
        legend.text = element_text(face = "bold", size = 16)) +
    xlim(-20, 50) + scale_x_continuous(breaks = seq(-20, 50, by = 10)) +
    ylim(-1, 3)   #+ scale_y_continuous(breaks = seq(-1, 3, by = 2))

ggMarginal(fig2B, type="density",groupFill = TRUE, groupColour = TRUE,alpha=0.1,lwd=1)
dev.off()

[1m[22mScale for [32mx[39m is already present.
Adding another scale for [32mx[39m, which will replace the existing scale.
“[1m[22mRemoved 4 rows containing non-finite outside the scale range
(`stat_ellipse()`).”
“[1m[22mRemoved 4 rows containing non-finite outside the scale range
(`stat_ellipse()`).”
“[1m[22mRemoved 4 rows containing non-finite outside the scale range
(`stat_ellipse()`).”
“[1m[22mRemoved 4 rows containing non-finite outside the scale range
(`stat_ellipse()`).”
“[1m[22mRemoved 4 rows containing missing values or values outside the scale range
(`geom_point()`).”
