# Generalized additive models for differential abundance analysis

Related to figure 2 of Nyquist et al 2022

In [None]:
library(mgcv)

## General Celltypes differential abundance

In [None]:
major_celltype_props <- read.csv("../../Data/celltype_counts/sample_celltype_counts.csv",row.names=1)
metadata_2 <- read.csv("../../Data/Supplemental Dataset 1 Metadata_BM_Study.csv", row.names=1)


major_celltype_props[is.na(major_celltype_props)] <- 0
major_celltype_props <- t(major_celltype_props)



cepi_meta_2 = metadata_2[colnames(major_celltype_props),]
cepi_2 <- major_celltype_props[,cepi_meta_2$time_post_partum_days < 400]
cepi_meta_2 = metadata_2[colnames(cepi_2),]
cepi_meta_2 <- na.omit(cepi_meta_2)
cepi_2 <- cepi_2[,rownames(cepi_meta_2)]
norm_cepi_2 = t(t(cepi_2)/colSums(cepi_2))



In [None]:
# time association of each celltype for the first 400 days
for (c in rownames(norm_cepi_2)){
one_celltype=as.data.frame(norm_cepi_2[c,])
colnames(one_celltype) = c("celltype_prop")
df_in = cbind(one_celltype, cepi_meta_2[,c("donor","time_post_partum_days")])
gam_v0 <- gam(celltype_prop ~ donor+s(time_post_partum_days, k=7),
             method="REML", data=df_in)
print(c)

print(summary(gam_v0))
}

In [None]:
# general celltype association with infant sick
for (c in rownames(norm_cepi)){
one_celltype=as.data.frame(norm_cepi[c,])
colnames(one_celltype) = c("celltype_prop")
df_in = cbind(one_celltype, cepi_meta[,c("donor","infant_sick_YN")])
gam_v0 <- gam(celltype_prop ~ donor+infant_sick_YN,
             method="REML", data=df_in)
print(c)

print(summary(gam_v0))
}

In [None]:
# formula use
for (c in rownames(norm_cepi)){
one_celltype=as.data.frame(norm_cepi[c,])
colnames(one_celltype) = c("celltype_prop")
df_in = cbind(one_celltype, cepi_meta[,c("donor","any_formula_YN")])
gam_v0 <- gam(celltype_prop ~ donor+any_formula_YN,
             method="REML", data=df_in)
print(c)

print(summary(gam_v0))
}

In [None]:
# daycare
for (c in rownames(norm_cepi)){
one_celltype=as.data.frame(norm_cepi[c,])
colnames(one_celltype) = c("celltype_prop")
df_in = cbind(one_celltype, cepi_meta[,c("donor","daycare_YN")])
gam_v0 <- gam(celltype_prop ~ donor+daycare_YN,
             method="REML", data=df_in)
print(c)

print(summary(gam_v0))
}

# Epithelial cell differential abundance

In [None]:
epi_celltype_props <- read.csv("../../Data/epithelial_subcluster_cell_counts.csv", row.names=1)

epi_meta = metadata_2[rownames(epi_celltype_props),]
epi <- epi_celltype_props[epi_meta$time_post_partum_days < 400,]
epi_meta = metadata_2[rownames(epi),]
epi_meta <- na.omit(epi_meta)
epi <- epi[rownames(epi_meta),]
norm_epi = (epi)/colSums(epi)
norm_epi[is.na(norm_epi)] <- 0
norm_epi <- t(norm_epi)

In [None]:
# time association of each epithelial celltype for the first 400 days
for (c in rownames(norm_epi)){
one_celltype=as.data.frame(norm_epi[c,])
    
colnames(one_celltype) = c("celltype_prop")
df_in = cbind(one_celltype, epi_meta[,c("donor","time_post_partum_days")])
gam_v0 <- gam(celltype_prop ~ donor+s(time_post_partum_days, k=7),
             method="REML", data=df_in)
print(c)

print(summary(gam_v0))
}

In [None]:
for (c in rownames(norm_epi)){
    df <- norm_epi[,epi_meta[,"infant_sick_YN"] != "na"]
df_meta <- epi_meta[colnames(df),]
df_meta[df_meta=="yes "] = "yes"
one_celltype=as.data.frame(df[c,])
colnames(one_celltype) = c("celltype_prop")
df_in = cbind(one_celltype, df_meta[,c("donor","infant_sick_YN")])
gam_v0 <- gam(celltype_prop ~ donor+infant_sick_YN,
             method="REML", data=df_in)
print(c)

print(summary(gam_v0))
}

In [None]:
for (c in rownames(norm_epi)){
    df <- norm_epi[,epi_meta[,"any_formula_YN"] != "na"]
df_meta <- epi_meta[colnames(df),]
df_meta[df_meta=="yes "] = "yes"
one_celltype=as.data.frame(df[c,])
colnames(one_celltype) = c("celltype_prop")
df_in = cbind(one_celltype, df_meta[,c("donor","any_formula_YN")])
gam_v0 <- gam(celltype_prop ~ donor+any_formula_YN,
             method="REML", data=df_in)
print(c)

print(summary(gam_v0))
}

In [None]:
for (c in rownames(norm_epi)){
    df <- norm_epi[,epi_meta[,"daycare_YN"] != "na"]
df_meta <- epi_meta[colnames(df),]
df_meta[df_meta=="yes "] = "yes"
one_celltype=as.data.frame(df[c,])
colnames(one_celltype) = c("celltype_prop")
df_in = cbind(one_celltype, df_meta[,c("donor","daycare_YN")])
gam_v0 <- gam(celltype_prop ~ donor+daycare_YN,
             method="REML", data=df_in)
print(c)

print(summary(gam_v0))
}

In [None]:
for (c in rownames(norm_epi)){
    df <- norm_epi[,epi_meta[,"hormonal_birthcontrol_YN"] != "na"]
df_meta <- epi_meta[colnames(df),]
df_meta[df_meta=="yes "] = "yes"
one_celltype=as.data.frame(df[c,])
colnames(one_celltype) = c("celltype_prop")
df_in = cbind(one_celltype, df_meta[,c("donor","hormonal_birthcontrol_YN")])
gam_v0 <- gam(celltype_prop ~ donor+hormonal_birthcontrol_YN,
             method="REML", data=df_in)
print(c)

print(summary(gam_v0))
}

In [None]:
for (c in rownames(norm_epi)){
    df <- norm_epi[,epi_meta[,"breast_soreness_YN"] != "na"]
df_meta <- epi_meta[colnames(df),]
df_meta[df_meta=="yes "] = "yes"
one_celltype=as.data.frame(df[c,])
colnames(one_celltype) = c("celltype_prop")
df_in = cbind(one_celltype, df_meta[,c("donor","breast_soreness_YN")])
gam_v0 <- gam(celltype_prop ~ donor+breast_soreness_YN,
             method="REML", data=df_in)
print(c)

print(summary(gam_v0))
}

In [None]:
for (c in rownames(norm_epi)){
    df <- norm_epi[,epi_meta[,"vaccines_reported_YN"] != "na"]
df_meta <- epi_meta[colnames(df),]
df_meta[df_meta=="yes "] = "yes"
one_celltype=as.data.frame(df[c,])
colnames(one_celltype) = c("celltype_prop")
df_in = cbind(one_celltype, df_meta[,c("donor","vaccines_reported_YN")])
gam_v0 <- gam(celltype_prop ~ donor+vaccines_reported_YN,
             method="REML", data=df_in)
print(c)

print(summary(gam_v0))
}

# Immune cell differential abundance

In [None]:
# read in data

immune_celltype_props <- t(major_celltype_props[c('B.cells','T.cells','dendritic.cells','eosinophils','macrophages','milk.macrophages','neutrophils'),])
immune_meta = metadata_2[rownames(immune_celltype_props),]
immune <- immune_celltype_props[immune_meta$time_post_partum_days < 400,]
immune_meta = metadata_2[rownames(immune),]
immune_meta <- na.omit(immune_meta)
immune <- immune[rownames(immune_meta),]
norm_immune = (immune)/colSums(immune)
norm_immune[is.na(norm_immune)] <- 0
norm_immune <- t(norm_immune)



In [None]:
# time association of each immune celltype for the first 400 days
for (c in rownames(norm_immune)){
one_celltype=as.data.frame(norm_immune[c,])
colnames(one_celltype) = c("celltype_prop")
    print(c)
df_in = cbind(one_celltype, immune_meta[,c("donor","time_post_partum_days")])
gam_v0 <- gam(celltype_prop ~ donor+s(time_post_partum_days, k=7),
             method="REML", data=df_in)
plot.gam(gam_v0,shade=TRUE)

print(summary(gam_v0))
}



In [None]:
for (c in rownames(norm_immune)){
one_celltype=as.data.frame(norm_immune[c,])
colnames(one_celltype) = c("celltype_prop")
df_in = cbind(one_celltype, immune_meta[,c("donor","weaning_YN","time_post_partum_days")])
df_in=df_in[df_in$weaning_YN!='na',]
df_in[df_in== "yes "] = "yes"
gam_v0 <- gam(celltype_prop ~ donor+weaning_YN+s(time_post_partum_days, k=7),
             method="REML", data=df_in)
print(c)

print(summary(gam_v0))
}


In [None]:
for (c in rownames(norm_immune)){
one_celltype=as.data.frame(norm_immune[c,])
colnames(one_celltype) = c("celltype_prop")
df_in = cbind(one_celltype, immune_meta[,c("donor","infant_sick_YN")])
df_in=df_in[df_in$infant_sick_YN!='na',]
df_in[df_in== "yes "] = "yes"
gam_v0 <- gam(celltype_prop ~ donor+infant_sick_YN,
             method="REML", data=df_in)
print(c)

print(summary(gam_v0))
}

In [None]:
for (c in rownames(norm_immune)){
one_celltype=as.data.frame(norm_immune[c,])
colnames(one_celltype) = c("celltype_prop")
df_in = cbind(one_celltype, immune_meta[,c("donor","any_formula_YN")])
df_in=df_in[df_in$any_formula_YN!='na',]
df_in[df_in== "yes "] = "yes"
gam_v0 <- gam(celltype_prop ~ donor+any_formula_YN,
             method="REML", data=df_in)
print(c)

print(summary(gam_v0))
}

In [None]:
for (c in rownames(norm_immune)){
one_celltype=as.data.frame(norm_immune[c,])
colnames(one_celltype) = c("celltype_prop")
df_in = cbind(one_celltype, immune_meta[,c("donor","daycare_YN")])
df_in=df_in[df_in$daycare_YN!='na',]
df_in[df_in== "yes "] = "yes"
gam_v0 <- gam(celltype_prop ~ donor+daycare_YN,
             method="REML", data=df_in)
print(c)

print(summary(gam_v0))
}

In [None]:
for (c in rownames(norm_immune)){
    df <- norm_immune[,immune_meta[,"hormonal_birthcontrol_YN"] != "na"]
df_meta <- immune_meta[colnames(df),]
df_meta[df_meta=="yes "] = "yes"
one_celltype=as.data.frame(df[c,])
colnames(one_celltype) = c("celltype_prop")
df_in = cbind(one_celltype, df_meta[,c("donor","hormonal_birthcontrol_YN")])
gam_v0 <- gam(celltype_prop ~ donor+hormonal_birthcontrol_YN,
             method="REML", data=df_in)
print(c)

print(summary(gam_v0))
}

In [None]:
for (c in rownames(norm_immune)){
    df <- norm_immune[,immune_meta[,"breast_soreness_YN"] != "na"]
df_meta <- immune_meta[colnames(df),]
df_meta[df_meta=="yes "] = "yes"
one_celltype=as.data.frame(df[c,])
colnames(one_celltype) = c("celltype_prop")
df_in = cbind(one_celltype, df_meta[,c("donor","breast_soreness_YN")])
gam_v0 <- gam(celltype_prop ~ donor+breast_soreness_YN,
             method="REML", data=df_in)
print(c)

print(summary(gam_v0))
}

In [None]:
for (c in rownames(norm_immune)){
    df <- norm_immune[,immune_meta['vaccines_reported_YN'] != "na"]
df_meta <- immune_meta[colnames(df),]
df_meta[df_meta=="yes "] = "yes"
one_celltype=as.data.frame(df[c,])
colnames(one_celltype) = c("celltype_prop")
df_in = cbind(one_celltype, df_meta[,c("donor",'vaccines_reported_YN')])
gam_v0 <- gam(celltype_prop ~ donor+vaccines_reported_YN,
             method="REML", data=df_in)
print(c)

print(summary(gam_v0))
}