# Natural Statistics Cross-linguistic: 

#### MLUw analysis - maximum infant vocalizations sample

----

In [1]:
import pandas as pd
import numpy as np
import sys
sys.path.insert(0, "data_proc")
import contingent_extraction
import warnings
warnings.filterwarnings('ignore')

In [2]:
maxvoc_dat_inc = pd.read_csv("../data/maxvoc_dat_inc_master.csv",index_col=0,low_memory=False)
maxvoc_dat_inc=maxvoc_dat_inc[maxvoc_dat_inc["language"]!="ara"]
maxvoc_dat_inc=maxvoc_dat_inc[(maxvoc_dat_inc["target_child_age"]>=5) & (maxvoc_dat_inc["target_child_age"]<=30)]

maxvoc_dat_inc_cg = maxvoc_dat_inc[maxvoc_dat_inc["caregiver"]=="caregiver"]

maxvoc_dat_inc_cg["contingent"] = np.where(maxvoc_dat_inc_cg["contingent"]==1, "contingent", "non-contingent")

maxvoc_dat_inc_cg = maxvoc_dat_inc_cg[maxvoc_dat_inc_cg["gloss"].notna()]
maxvoc_dat_inc_cg = maxvoc_dat_inc_cg[maxvoc_dat_inc_cg["gloss"]!="xxx"]
maxvoc_dat_inc_cg = maxvoc_dat_inc_cg[maxvoc_dat_inc_cg["gloss"]!="yyy"]
maxvoc_dat_inc_cg = maxvoc_dat_inc_cg[maxvoc_dat_inc_cg["gloss"]!="www"]

In [3]:
# create language_name column

# language_labels = pd.read_csv("../data/language_labels.csv")

# language_labels=language_labels.rename(columns={"Language":"language"})

# maxvoc_dat_inc_cg=maxvoc_dat_inc_cg.merge(language_labels,on='language',how="right")

In [4]:
maxvoc_mlu_stats = (maxvoc_dat_inc_cg.groupby(["Language_name","target_child_id","transcript_id","contingent"])
                                  .num_tokens
                                  .agg(["mean"])
                                  .reset_index())
maxvoc_mlu_sumstats =  maxvoc_mlu_stats.rename({'mean': 'means'}, axis=1)

----
#### MLUw plot

In [5]:
%load_ext rpy2.ipython

Simple plot

In [39]:
%%R -i maxvoc_mlu_sumstats

library('ggplot2')
library('repr')
options(repr.plot.width=6, repr.plot.height=12)

# ara_label <- data.frame(means=c(0),contingent = c(1.5),language="ara") # no adult speech transcribed
deu_label <- data.frame(means=c(5.8),contingent = c(1.5),Language_name="German")
eng_label <- data.frame(means=c(5.8),contingent = c(1.5),Language_name="English")
est_label <- data.frame(means=c(5.8),contingent = c(1.5),Language_name="Estonian")
# fas_label <- data.frame(means=c(5.8),contingent = c(1.5),Language_name="Persian")
fas_ns_label <- data.frame(means=c(6),contingent = c(1.5),Language_name="Persian")
fra_label <- data.frame(means=c(5.8),contingent = c(1.5),Language_name="French")
hrv_label <- data.frame(means=c(5.8),contingent = c(1.5),Language_name="Croatian")
jpn_label <- data.frame(means=c(5.8),contingent = c(1.5),Language_name="Japanese")
kor_label <- data.frame(means=c(5.8),contingent = c(1.5),Language_name="Korean")
# nor_label <- data.frame(means=c(5.8),contingent = c(1.5),Language_name="Norwegian")
nor_ns_label <- data.frame(means=c(6),contingent = c(1.5),Language_name="Norwegian")
pol_label <- data.frame(means=c(6),contingent = c(1.5),Language_name="Polish")
por_label <- data.frame(means=c(5.8),contingent = c(1.5),Language_name="Portuguese")
spa_label <- data.frame(means=c(5.8),contingent = c(1.5),Language_name="Spanish")
swe_label <- data.frame(means=c(5.8),contingent = c(1.5),Language_name="Swedish")
# zho_label <- data.frame(means=c(5.8),contingent = c(1.5),Language_name="Mandarin")
zho_ns_label <- data.frame(means=c(6),contingent = c(1.5),Language_name="Mandarin")


p <- ggplot(maxvoc_mlu_sumstats, aes(x = contingent, y = means, color = Language_name)) +
     stat_summary(fun.y=mean, geom="point", shape=19, size=1.75) + 
     stat_summary(fun.data = mean_se, geom = "errorbar", size=1.25, width = .5) +
     facet_wrap(. ~ Language_name,ncol = 7) + 
     geom_text(data = deu_label,label = "***",size=8,color="black") + 
     geom_text(data = eng_label,label = "***",size=8,color="black") +  
     geom_text(data = est_label,label = "***",size=8,color="black") +  
     geom_text(data = fas_ns_label,label = "ns",size=4,color="black",fontface = "italic") +
#      geom_text(data = fas_label,label = "*",size=8, color="black") +
     geom_text(data = fra_label,label = "***",size=8,color="black") +  
     geom_text(data = hrv_label,label = "***",size=8,color="black") + 
     geom_text(data = jpn_label,label = "***",size=8,color="black") + 
     geom_text(data = kor_label,label = "***",size=8,color="black") +  
     geom_text(data = nor_ns_label,label = "ns",size=4,color="black",fontface = "italic") +  
     geom_text(data = pol_label,label = "ns", size=4,color="black",fontface = "italic") +  
     geom_text(data = por_label,label = "***",size=8,color="black") +  
     geom_text(data = spa_label,label = "***",size=8,color="black") + 
     geom_text(data = swe_label,label = "***",size=8,color="black") + 
     geom_text(data = zho_ns_label,label = "ns",size=4,color="black",fontface = "italic") +
#      geom_text(data = zho_label,label = "*",size=8, color="black") +
     ylim(0, 6) +
     labs(# title = "Mean length of utterance in words",
       # subtitle = "Using CHILDES Token counts",
       y = "MLUw", x = "") +
     theme_classic() +
     theme(text = element_text(size=16),
           axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
           legend.title = element_blank(),
           legend.background = element_rect(fill=alpha("white",0.90),
                                                            size=0, linetype="dotted",
                                                            colour = "white"),
           legend.text=element_text(size=16))
     ggsave("../figures/token_mlu_maxvoc.pdf", width = 11.7, height = 6.2)

Plot + effect estimates

In [40]:
%%R

deu_est_label <- data.frame(means=c(.25),contingent = c(1),Language_name="German")
eng_est_label <- data.frame(means=c(.25),contingent = c(1),Language_name="English")
est_est_label <- data.frame(means=c(.25),contingent = c(1),Language_name="Estonian")
fra_est_label <- data.frame(means=c(.25),contingent = c(1),Language_name="French")
hrv_est_label <- data.frame(means=c(.25),contingent = c(1),Language_name="Croatian")
jpn_est_label <- data.frame(means=c(.25),contingent = c(1),Language_name="Japanese")
kor_est_label <- data.frame(means=c(.25),contingent = c(1),Language_name="Korean")
nor_est_label <- data.frame(means=c(.25),contingent = c(1),Language_name="Norwegian")
por_est_label <- data.frame(means=c(.25),contingent = c(1),Language_name="Portuguese")
spa_est_label <- data.frame(means=c(.25),contingent = c(1),Language_name="Spanish")
swe_est_label <- data.frame(means=c(.25),contingent = c(1),Language_name="Swedish")

p <- p + geom_text(data = deu_est_label,label = "est=-.97",size=4,color="black") +
         geom_text(data = eng_est_label,label = "est=-.54",size=4,color="black") +
         geom_text(data = est_est_label,label = "est=-.63",size=4,color="black") +
         geom_text(data = fra_est_label,label = "est=-.32",size=4,color="black") +
         geom_text(data = hrv_est_label,label = "est=-.45",size=4,color="black") +
         geom_text(data = jpn_est_label,label = "est=-.59",size=4,color="black") +
         geom_text(data = kor_est_label,label = "est=-.57",size=4,color="black") +
#          geom_text(data = nor_est_label,label = "est=-.68",size=4,color="black") +
         geom_text(data = por_est_label,label = "est=-.76",size=4,color="black") +
         geom_text(data = spa_est_label,label = "est=-.53",size=4,color="black") +
         geom_text(data = swe_est_label,label = "est=-.66",size=4,color="black")
         

ggsave("../figures/token_mlu_maxvoc_eff.pdf", width = 11.7, height = 6.2)

\+ sample size

In [41]:
%%R

deu_n_label <- data.frame(means=c(.25),contingent = c(1.7),Language_name="German")
eng_n_label <- data.frame(means=c(.25),contingent = c(1.7),Language_name="English")
est_n_label <- data.frame(means=c(.25),contingent = c(1.7),Language_name="Estonian")
fas_n_label <- data.frame(means=c(.25),contingent = c(1.7),Language_name="Persian")
fra_n_label <- data.frame(means=c(.25),contingent = c(1.7),Language_name="French")
hrv_n_label <- data.frame(means=c(.25),contingent = c(1.7),Language_name="Croatian")
jpn_n_label <- data.frame(means=c(.25),contingent = c(1.7),Language_name="Japanese")
kor_n_label <- data.frame(means=c(.25),contingent = c(1.7),Language_name="Korean")
nor_n_label <- data.frame(means=c(.25),contingent = c(1.7),Language_name="Norwegian")
pol_n_label <- data.frame(means=c(.25),contingent = c(1.7),Language_name="Polish")
por_n_label <- data.frame(means=c(.25),contingent = c(1.7),Language_name="Portuguese")
spa_n_label <- data.frame(means=c(.25),contingent = c(1.7),Language_name="Spanish")
swe_n_label <- data.frame(means=c(.25),contingent = c(1.7),Language_name="Swedish")
zho_n_label <- data.frame(means=c(.25),contingent = c(1.7),Language_name="Mandarin")

deu_sz_label <- data.frame(means=c(.25),contingent = c(2.1),Language_name="German")
eng_sz_label <- data.frame(means=c(.25),contingent = c(2.1),Language_name="English")
est_sz_label <- data.frame(means=c(.25),contingent = c(2.1),Language_name="Estonian")
fas_sz_label <- data.frame(means=c(.25),contingent = c(2.1),Language_name="Persian")
fra_sz_label <- data.frame(means=c(.25),contingent = c(2.1),Language_name="French")
hrv_sz_label <- data.frame(means=c(.25),contingent = c(2.1),Language_name="Croatian")
jpn_sz_label <- data.frame(means=c(.25),contingent = c(2.1),Language_name="Japanese")
kor_sz_label <- data.frame(means=c(.25),contingent = c(2.1),Language_name="Korean")
nor_sz_label <- data.frame(means=c(.25),contingent = c(2.1),Language_name="Norwegian")
pol_sz_label <- data.frame(means=c(.25),contingent = c(2.1),Language_name="Polish")
por_sz_label <- data.frame(means=c(.25),contingent = c(2.1),Language_name="Portuguese")
spa_sz_label <- data.frame(means=c(.25),contingent = c(2.1),Language_name="Spanish")
swe_sz_label <- data.frame(means=c(.25),contingent = c(2.1),Language_name="Swedish")
zho_sz_label <- data.frame(means=c(.25),contingent = c(2.1),Language_name="Mandarin")

p <- p + geom_text(data = deu_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = eng_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = est_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = fas_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = fra_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = hrv_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = jpn_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = kor_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = nor_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = pol_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = por_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = spa_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = swe_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = zho_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = deu_sz_label,label = " = 39",size=4,color="black") +
         geom_text(data = eng_sz_label,label = " =1010",size=4,color="black") +
         geom_text(data = est_sz_label,label = " = 22",size=4,color="black") +
         geom_text(data = fas_sz_label,label = " = 12",size=4,color="black") +
         geom_text(data = fra_sz_label,label = " = 258",size=4,color="black") +
         geom_text(data = hrv_sz_label,label = " = 79",size=4,color="black") +
         geom_text(data = jpn_sz_label,label = " = 139",size=4,color="black") +
         geom_text(data = kor_sz_label,label = " = 37",size=4,color="black") +
         geom_text(data = nor_sz_label,label = " = 56",size=4,color="black") +
         geom_text(data = pol_sz_label,label = " = 1",size=4,color="black") +
         geom_text(data = por_sz_label,label = " = 24",size=4,color="black") +
         geom_text(data = spa_sz_label,label = " = 31",size=4,color="black") +
         geom_text(data = swe_sz_label,label = " = 16",size=4,color="black") +
         geom_text(data = zho_sz_label,label = " = 2",size=4,color="black")
         

ggsave("../figures/token_mlu_maxvoc_eff_n.pdf", width = 11.7, height = 6.2)

----

#### MLUw mixed models

In [6]:
# ara=maxvoc_dat_inc_cg[['language','num_tokens','contingent','transcript_id','target_child_id']][maxvoc_dat_inc_cg["language"]=="ara"] # no adult speech transcribed
deu=maxvoc_dat_inc_cg[['language','num_tokens','contingent','transcript_id','target_child_id']][maxvoc_dat_inc_cg["language"]=="deu"]
eng=maxvoc_dat_inc_cg[['language','num_tokens','contingent','transcript_id','target_child_id']][maxvoc_dat_inc_cg["language"]=="eng"]
est=maxvoc_dat_inc_cg[['language','num_tokens','contingent','transcript_id','target_child_id']][maxvoc_dat_inc_cg["language"]=="est"]
fas=maxvoc_dat_inc_cg[['language','num_tokens','contingent','transcript_id','target_child_id']][maxvoc_dat_inc_cg["language"]=="fas"]
fra=maxvoc_dat_inc_cg[['language','num_tokens','contingent','transcript_id','target_child_id']][maxvoc_dat_inc_cg["language"]=="fra"]
hrv=maxvoc_dat_inc_cg[['language','num_tokens','contingent','transcript_id','target_child_id']][maxvoc_dat_inc_cg["language"]=="hrv"]
jpn=maxvoc_dat_inc_cg[['language','num_tokens','contingent','transcript_id','target_child_id']][maxvoc_dat_inc_cg["language"]=="jpn"]
kor=maxvoc_dat_inc_cg[['language','num_tokens','contingent','transcript_id','target_child_id']][maxvoc_dat_inc_cg["language"]=="kor"]
nor=maxvoc_dat_inc_cg[['language','num_tokens','contingent','transcript_id','target_child_id']][maxvoc_dat_inc_cg["language"]=="nor"]
pol=maxvoc_dat_inc_cg[['language','num_tokens','contingent','transcript_id','target_child_id']][maxvoc_dat_inc_cg["language"]=="pol"]
por=maxvoc_dat_inc_cg[['language','num_tokens','contingent','transcript_id','target_child_id']][maxvoc_dat_inc_cg["language"]=="por"]
spa=maxvoc_dat_inc_cg[['language','num_tokens','contingent','transcript_id','target_child_id']][maxvoc_dat_inc_cg["language"]=="spa"]
swe=maxvoc_dat_inc_cg[['language','num_tokens','contingent','transcript_id','target_child_id']][maxvoc_dat_inc_cg["language"]=="swe"]
zho=maxvoc_dat_inc_cg[['language','num_tokens','contingent','transcript_id','target_child_id']][maxvoc_dat_inc_cg["language"]=="zho"]

In [7]:
%%R

library("lme4")
library("emmeans")
library("lmerTest")
library("tidyverse")

effect_sizes <- read.csv("../data/MLUw_effect_sizes.csv")

effect_sizes["max_voc_effect_size"] <- NA

R[write to console]: Loading required package: Matrix

R[write to console]: 
Attaching package: ‘lmerTest’


R[write to console]: The following object is masked from ‘package:lme4’:

    lmer


R[write to console]: The following object is masked from ‘package:stats’:

    step


R[write to console]: ── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.1 ──

R[write to console]: [32m✔[39m [34mggplot2[39m 3.3.5     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.5     [32m✔[39m [34mdplyr  [39m 1.0.7
[32m✔[39m [34mtidyr  [39m 1.1.3     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.4.0     [32m✔[39m [34mforcats[39m 0.5.1

R[write to console]: ── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mtidyr[39m::[32mexpand()[39m masks [34mMatrix[39m::expand()
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39

In [8]:
%%R -i deu

lm2 <- lmer(num_tokens ~ contingent + (1|target_child_id) + (1|transcript_id),data=deu, REML= FALSE)
emm2<-emmeans(lm2,pairwise~contingent)
pval<-summary(emm2$contrasts)$p.value
print(c(emm2$contrasts, pval))
print(p.adjust(pval, "holm", 14)) # create big vector of p-values and ajdust those
# summary(emmeans(lm2,"contingent",contr="pairwise"),infer=TRUE) #group means
# test(contrast(emmeans(lm2,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

deu_lname <- deu$language[1]

deu_eff <- eff_size(emm2,sigma = sigma(lm2), edf = df.residual(lm2))

deu_eff <- summary(deu_eff)$effect.size

effect_sizes[effect_sizes$Language_name==deu_lname,"max_voc_effect_size"] <- deu_eff
effect_sizes

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 4810' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 4810)' or larger];
but be warned that this may result in large computation time and memory use.

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 4810' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 4810)' or larger];
but be warned that this may result in large computation time and memory use.



[[1]]
 contrast                      estimate     SE  df z.ratio p.value
 contingent - (non-contingent)   -0.935 0.0948 Inf -9.863  <.0001 

Degrees-of-freedom method: asymptotic 

[[2]]
[1] 6.001967e-23

[1] 8.402754e-22


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



   Language_name rand_effect_size max_voc_effect_size
1            deu       -0.2014434          -0.2921636
2            eng       -0.2136242                  NA
3            est       -0.1484433                  NA
4            fas              NaN                  NA
5            fra       -0.1424092                  NA
6            hrv       -0.1661630                  NA
7            jpn       -0.3211032                  NA
8            kor       -0.2469108                  NA
9            nor       -0.1823449                  NA
10           pol              NaN                  NA
11           por       -0.2390893                  NA
12           spa       -0.2410338                  NA
13           swe       -0.2695666                  NA
14           zho              NaN                  NA


In [9]:
%%R -i eng

lm3 <- lmer(num_tokens ~ contingent + (1|target_child_id) + (1|transcript_id),data=eng, REML= FALSE)
emm3<-emmeans(lm3,pairwise~contingent)
pval<-summary(emm3$contrasts)$p.value
print(c(emm3$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm3,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm3,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

eng_lname <- eng$language[1]

eng_eff <- eff_size(emm3,sigma = sigma(lm3), edf = df.residual(lm3))

eng_eff <- summary(eng_eff)$effect.size

effect_sizes[effect_sizes$Language_name==eng_lname,"max_voc_effect_size"] <- eng_eff
effect_sizes

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 114722' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 114722)' or larger];
but be warned that this may result in large computation time and memory use.

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 114722' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 114722)' or larger];
but be warned that this may result in large computation time and memory use.



[[1]]
 contrast                      estimate     SE  df z.ratio p.value
 contingent - (non-contingent)   -0.584 0.0233 Inf -25.128 <.0001 

Degrees-of-freedom method: asymptotic 

[[2]]
[1] 2.484097e-139

[1] 3.477736e-138


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



   Language_name rand_effect_size max_voc_effect_size
1            deu       -0.2014434          -0.2921636
2            eng       -0.2136242          -0.2056120
3            est       -0.1484433                  NA
4            fas              NaN                  NA
5            fra       -0.1424092                  NA
6            hrv       -0.1661630                  NA
7            jpn       -0.3211032                  NA
8            kor       -0.2469108                  NA
9            nor       -0.1823449                  NA
10           pol              NaN                  NA
11           por       -0.2390893                  NA
12           spa       -0.2410338                  NA
13           swe       -0.2695666                  NA
14           zho              NaN                  NA


In [10]:
%%R -i est

lm4 <- lmer(num_tokens ~ contingent + (1|target_child_id) + (1|transcript_id),data=est, REML= FALSE)
emm4<-emmeans(lm4,pairwise~contingent)
pval<-summary(emm4$contrasts)$p.value
print(c(emm4$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm4,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm4,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

est_lname <- est$language[1]

est_eff <- eff_size(emm4,sigma = sigma(lm4), edf = df.residual(lm4))

est_eff <- summary(est_eff)$effect.size

effect_sizes[effect_sizes$Language_name==est_lname,"max_voc_effect_size"] <- est_eff
effect_sizes

R[write to console]: boundary (singular) fit: see ?isSingular

R[write to console]: boundary (singular) fit: see ?isSingular



[[1]]
 contrast                      estimate    SE   df t.ratio p.value
 contingent - (non-contingent)   -0.635 0.125 2063 -5.080  <.0001 

Degrees-of-freedom method: kenward-roger 

[[2]]
[1] 4.122348e-07

[1] 5.771287e-06


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



   Language_name rand_effect_size max_voc_effect_size
1            deu       -0.2014434          -0.2921636
2            eng       -0.2136242          -0.2056120
3            est       -0.1484433          -0.2174693
4            fas              NaN                  NA
5            fra       -0.1424092                  NA
6            hrv       -0.1661630                  NA
7            jpn       -0.3211032                  NA
8            kor       -0.2469108                  NA
9            nor       -0.1823449                  NA
10           pol              NaN                  NA
11           por       -0.2390893                  NA
12           spa       -0.2410338                  NA
13           swe       -0.2695666                  NA
14           zho              NaN                  NA


In [11]:
%%R -i fas

lm5 <- lmer(num_tokens ~ contingent + (1|transcript_id),data=fas, REML= FALSE)
emm5<-emmeans(lm5,pairwise~contingent)
pval<-summary(emm5$contrasts)$p.value
print(c(emm5$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm5,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm5,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

fas_lname <- fas$language[1]

effect_sizes[effect_sizes$Language_name==fas_lname,"max_voc_effect_size"] <- NaN
effect_sizes

[[1]]
 contrast                      estimate    SE  df t.ratio p.value
 contingent - (non-contingent)   -0.477 0.189 540 -2.522  0.0120 

Degrees-of-freedom method: kenward-roger 

[[2]]
[1] 0.01196451

[1] 0.1675032
   Language_name rand_effect_size max_voc_effect_size
1            deu       -0.2014434          -0.2921636
2            eng       -0.2136242          -0.2056120
3            est       -0.1484433          -0.2174693
4            fas              NaN                 NaN
5            fra       -0.1424092                  NA
6            hrv       -0.1661630                  NA
7            jpn       -0.3211032                  NA
8            kor       -0.2469108                  NA
9            nor       -0.1823449                  NA
10           pol              NaN                  NA
11           por       -0.2390893                  NA
12           spa       -0.2410338                  NA
13           swe       -0.2695666                  NA
14           zho          

In [12]:
%%R -i fra

lm6 <- lmer(num_tokens ~ contingent + (1|target_child_id) + (1|transcript_id),data=fra, REML= FALSE)
emm6<-emmeans(lm6,pairwise~contingent)
pval<-summary(emm6$contrasts)$p.value
print(c(emm6$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm6,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm6,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

fra_lname <- fra$language[1]

fra_eff <- eff_size(emm6,sigma = sigma(lm6), edf = df.residual(lm6))

fra_eff <- summary(fra_eff)$effect.size

effect_sizes[effect_sizes$Language_name==fra_lname,"max_voc_effect_size"] <- fra_eff
effect_sizes

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 23322' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 23322)' or larger];
but be warned that this may result in large computation time and memory use.

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 23322' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 23322)' or larger];
but be warned that this may result in large computation time and memory use.



[[1]]
 contrast                      estimate     SE  df z.ratio p.value
 contingent - (non-contingent)   -0.368 0.0463 Inf -7.960  <.0001 

Degrees-of-freedom method: asymptotic 

[[2]]
[1] 1.724529e-15

[1] 2.414341e-14


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



   Language_name rand_effect_size max_voc_effect_size
1            deu       -0.2014434          -0.2921636
2            eng       -0.2136242          -0.2056120
3            est       -0.1484433          -0.2174693
4            fas              NaN                 NaN
5            fra       -0.1424092          -0.1191522
6            hrv       -0.1661630                  NA
7            jpn       -0.3211032                  NA
8            kor       -0.2469108                  NA
9            nor       -0.1823449                  NA
10           pol              NaN                  NA
11           por       -0.2390893                  NA
12           spa       -0.2410338                  NA
13           swe       -0.2695666                  NA
14           zho              NaN                  NA


In [13]:
%%R -i hrv

lm7 <- lmer(num_tokens ~ contingent + (1|target_child_id) + (1|transcript_id),data=hrv, REML= FALSE)
emm7<-emmeans(lm7,pairwise~contingent)
pval<-summary(emm7$contrasts)$p.value
print(c(emm7$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm7,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm7,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

hrv_lname <- hrv$language[1]

hrv_eff <- eff_size(emm7,sigma = sigma(lm7), edf = df.residual(lm7))

hrv_eff <- summary(hrv_eff)$effect.size

effect_sizes[effect_sizes$Language_name==hrv_lname,"max_voc_effect_size"] <- hrv_eff
effect_sizes

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 6713' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 6713)' or larger];
but be warned that this may result in large computation time and memory use.

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 6713' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 6713)' or larger];
but be warned that this may result in large computation time and memory use.



[[1]]
 contrast                      estimate     SE  df z.ratio p.value
 contingent - (non-contingent)   -0.464 0.0673 Inf -6.901  <.0001 

Degrees-of-freedom method: asymptotic 

[[2]]
[1] 5.156291e-12

[1] 7.218807e-11


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



   Language_name rand_effect_size max_voc_effect_size
1            deu       -0.2014434          -0.2921636
2            eng       -0.2136242          -0.2056120
3            est       -0.1484433          -0.2174693
4            fas              NaN                 NaN
5            fra       -0.1424092          -0.1191522
6            hrv       -0.1661630          -0.1725226
7            jpn       -0.3211032                  NA
8            kor       -0.2469108                  NA
9            nor       -0.1823449                  NA
10           pol              NaN                  NA
11           por       -0.2390893                  NA
12           spa       -0.2410338                  NA
13           swe       -0.2695666                  NA
14           zho              NaN                  NA


In [14]:
%%R -i jpn

lm8 <- lmer(num_tokens ~ contingent + (1|target_child_id) + (1|transcript_id),data=jpn, REML= FALSE)
emm8<-emmeans(lm8,pairwise~contingent)
pval<-summary(emm8$contrasts)$p.value
print(c(emm8$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm8,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm8,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

jpn_lname <- jpn$language[1]

jpn_eff <- eff_size(emm8,sigma = sigma(lm8), edf = df.residual(lm8))

jpn_eff <- summary(jpn_eff)$effect.size

effect_sizes[effect_sizes$Language_name==jpn_lname,"max_voc_effect_size"] <- jpn_eff
effect_sizes

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 22539' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 22539)' or larger];
but be warned that this may result in large computation time and memory use.

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 22539' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 22539)' or larger];
but be warned that this may result in large computation time and memory use.



[[1]]
 contrast                      estimate     SE  df z.ratio p.value
 contingent - (non-contingent)   -0.566 0.0255 Inf -22.141 <.0001 

Degrees-of-freedom method: asymptotic 

[[2]]
[1] 1.268781e-108

[1] 1.776294e-107


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



   Language_name rand_effect_size max_voc_effect_size
1            deu       -0.2014434          -0.2921636
2            eng       -0.2136242          -0.2056120
3            est       -0.1484433          -0.2174693
4            fas              NaN                 NaN
5            fra       -0.1424092          -0.1191522
6            hrv       -0.1661630          -0.1725226
7            jpn       -0.3211032          -0.3044613
8            kor       -0.2469108                  NA
9            nor       -0.1823449                  NA
10           pol              NaN                  NA
11           por       -0.2390893                  NA
12           spa       -0.2410338                  NA
13           swe       -0.2695666                  NA
14           zho              NaN                  NA


In [15]:
%%R -i kor

lm9 <- lmer(num_tokens ~ contingent + (1|transcript_id), data=kor, REML= FALSE)
emm9<-emmeans(lm9,pairwise~contingent)
pval<-summary(emm9$contrasts)$p.value
print(c(emm9$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm9,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm9,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

kor_lname <- kor$language[1]

kor_eff <- eff_size(emm9,sigma = sigma(lm9), edf = df.residual(lm9))

kor_eff <- summary(kor_eff)$effect.size

effect_sizes[effect_sizes$Language_name==kor_lname,"max_voc_effect_size"] <- kor_eff
effect_sizes

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 4722' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 4722)' or larger];
but be warned that this may result in large computation time and memory use.

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 4722' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 4722)' or larger];
but be warned that this may result in large computation time and memory use.



[[1]]
 contrast                      estimate     SE  df z.ratio p.value
 contingent - (non-contingent)   -0.572 0.0649 Inf -8.819  <.0001 

Degrees-of-freedom method: asymptotic 

[[2]]
[1] 1.16e-18

[1] 1.624e-17


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



   Language_name rand_effect_size max_voc_effect_size
1            deu       -0.2014434          -0.2921636
2            eng       -0.2136242          -0.2056120
3            est       -0.1484433          -0.2174693
4            fas              NaN                 NaN
5            fra       -0.1424092          -0.1191522
6            hrv       -0.1661630          -0.1725226
7            jpn       -0.3211032          -0.3044613
8            kor       -0.2469108          -0.2712496
9            nor       -0.1823449                  NA
10           pol              NaN                  NA
11           por       -0.2390893                  NA
12           spa       -0.2410338                  NA
13           swe       -0.2695666                  NA
14           zho              NaN                  NA


In [16]:
%%R -i nor

lm10 <- lmer(num_tokens ~ contingent + (1|target_child_id) + (1|transcript_id),data=nor, REML= FALSE)
emm10<-emmeans(lm10,pairwise~contingent)
pval<-summary(emm10$contrasts)$p.value
print(c(emm10$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm10,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm10,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

nor_lname <- nor$language[1]

nor_eff <- eff_size(emm10,sigma = sigma(lm10), edf = df.residual(lm10))

nor_eff <- summary(nor_eff)$effect.size

effect_sizes[effect_sizes$Language_name==nor_lname,"max_voc_effect_size"] <- nor_eff
effect_sizes

R[write to console]: boundary (singular) fit: see ?isSingular



[[1]]
 contrast                      estimate    SE   df t.ratio p.value
 contingent - (non-contingent)   -0.743 0.133 2109 -5.605  <.0001 

Degrees-of-freedom method: kenward-roger 

[[2]]
[1] 2.358735e-08

[1] 3.302228e-07


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



   Language_name rand_effect_size max_voc_effect_size
1            deu       -0.2014434          -0.2921636
2            eng       -0.2136242          -0.2056120
3            est       -0.1484433          -0.2174693
4            fas              NaN                 NaN
5            fra       -0.1424092          -0.1191522
6            hrv       -0.1661630          -0.1725226
7            jpn       -0.3211032          -0.3044613
8            kor       -0.2469108          -0.2712496
9            nor       -0.1823449          -0.2682480
10           pol              NaN                  NA
11           por       -0.2390893                  NA
12           spa       -0.2410338                  NA
13           swe       -0.2695666                  NA
14           zho              NaN                  NA


In [17]:
%%R -i pol

# simple linear model (no random effects, because only 1 transcript from 1 sub)

lm11 <- lm(num_tokens ~ contingent ,data=pol, REML= FALSE)
emm11<-emmeans(lm11,pairwise~contingent)
pval<-summary(emm11$contrasts)$p.value
print(c(emm11$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm11,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm11,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

pol_lname <- pol$language[1]


effect_sizes[effect_sizes$Language_name==pol_lname,"max_voc_effect_size"] <- NaN
effect_sizes

[[1]]
 contrast                      estimate    SE  df t.ratio p.value
 contingent - (non-contingent)   -0.368 0.382 100 -0.963  0.3378 


[[2]]
[1] 0.3378332

[1] 1
   Language_name rand_effect_size max_voc_effect_size
1            deu       -0.2014434          -0.2921636
2            eng       -0.2136242          -0.2056120
3            est       -0.1484433          -0.2174693
4            fas              NaN                 NaN
5            fra       -0.1424092          -0.1191522
6            hrv       -0.1661630          -0.1725226
7            jpn       -0.3211032          -0.3044613
8            kor       -0.2469108          -0.2712496
9            nor       -0.1823449          -0.2682480
10           pol              NaN                 NaN
11           por       -0.2390893                  NA
12           spa       -0.2410338                  NA
13           swe       -0.2695666                  NA
14           zho              NaN                  NA


In [18]:
%%R -i por

lm12 <- lmer(num_tokens ~ contingent + (1|target_child_id) + (1|transcript_id),data=por, REML= FALSE)
emm12<-emmeans(lm12,pairwise~contingent)
pval<-summary(emm12$contrasts)$p.value
print(c(emm12$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm12,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm12,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

por_lname <- por$language[1]

por_eff <- eff_size(emm12,sigma = sigma(lm12), edf = df.residual(lm12))

por_eff <- summary(por_eff)$effect.size

effect_sizes[effect_sizes$Language_name==por_lname,"max_voc_effect_size"] <- por_eff
effect_sizes

[[1]]
 contrast                      estimate    SE   df t.ratio p.value
 contingent - (non-contingent)   -0.831 0.107 2820 -7.758  <.0001 

Degrees-of-freedom method: kenward-roger 

[[2]]
[1] 1.200582e-14

[1] 1.680815e-13


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



   Language_name rand_effect_size max_voc_effect_size
1            deu       -0.2014434          -0.2921636
2            eng       -0.2136242          -0.2056120
3            est       -0.1484433          -0.2174693
4            fas              NaN                 NaN
5            fra       -0.1424092          -0.1191522
6            hrv       -0.1661630          -0.1725226
7            jpn       -0.3211032          -0.3044613
8            kor       -0.2469108          -0.2712496
9            nor       -0.1823449          -0.2682480
10           pol              NaN                 NaN
11           por       -0.2390893          -0.2981426
12           spa       -0.2410338                  NA
13           swe       -0.2695666                  NA
14           zho              NaN                  NA


In [19]:
%%R -i spa

lm13 <- lmer(num_tokens ~ contingent + (1|target_child_id) + (1|transcript_id),data=spa, REML= FALSE)
emm13<-emmeans(lm13,pairwise~contingent)
pval<-summary(emm13$contrasts)$p.value
print(c(emm13$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm13,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm13,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

spa_lname <- spa$language[1]

spa_eff <- eff_size(emm13,sigma = sigma(lm13), edf = df.residual(lm13))

spa_eff <- summary(spa_eff)$effect.size

effect_sizes[effect_sizes$Language_name==spa_lname,"max_voc_effect_size"] <- spa_eff
effect_sizes

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 4916' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 4916)' or larger];
but be warned that this may result in large computation time and memory use.

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 4916' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 4916)' or larger];
but be warned that this may result in large computation time and memory use.



[[1]]
 contrast                      estimate     SE  df z.ratio p.value
 contingent - (non-contingent)   -0.543 0.0679 Inf -7.990  <.0001 

Degrees-of-freedom method: asymptotic 

[[2]]
[1] 1.35346e-15

[1] 1.894844e-14


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



   Language_name rand_effect_size max_voc_effect_size
1            deu       -0.2014434          -0.2921636
2            eng       -0.2136242          -0.2056120
3            est       -0.1484433          -0.2174693
4            fas              NaN                 NaN
5            fra       -0.1424092          -0.1191522
6            hrv       -0.1661630          -0.1725226
7            jpn       -0.3211032          -0.3044613
8            kor       -0.2469108          -0.2712496
9            nor       -0.1823449          -0.2682480
10           pol              NaN                 NaN
11           por       -0.2390893          -0.2981426
12           spa       -0.2410338          -0.2353482
13           swe       -0.2695666                  NA
14           zho              NaN                  NA


In [20]:
%%R -i swe

lm14 <- lmer(num_tokens ~ contingent + (1|target_child_id) + (1|transcript_id),data=swe, REML= FALSE)
emm14<-emmeans(lm14,pairwise~contingent)
pval<-summary(emm14$contrasts)$p.value
print(c(emm14$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm14,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm14,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

swe_lname <- swe$language[1]

swe_eff <- eff_size(emm14,sigma = sigma(lm14), edf = df.residual(lm14))

swe_eff <- summary(swe_eff)$effect.size

effect_sizes[effect_sizes$Language_name==swe_lname,"max_voc_effect_size"] <- swe_eff
effect_sizes

[[1]]
 contrast                      estimate     SE   df t.ratio p.value
 contingent - (non-contingent)   -0.612 0.0925 2758 -6.612  <.0001 

Degrees-of-freedom method: kenward-roger 

[[2]]
[1] 4.522597e-11

[1] 6.331636e-10


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



   Language_name rand_effect_size max_voc_effect_size
1            deu       -0.2014434          -0.2921636
2            eng       -0.2136242          -0.2056120
3            est       -0.1484433          -0.2174693
4            fas              NaN                 NaN
5            fra       -0.1424092          -0.1191522
6            hrv       -0.1661630          -0.1725226
7            jpn       -0.3211032          -0.3044613
8            kor       -0.2469108          -0.2712496
9            nor       -0.1823449          -0.2682480
10           pol              NaN                 NaN
11           por       -0.2390893          -0.2981426
12           spa       -0.2410338          -0.2353482
13           swe       -0.2695666          -0.2685154
14           zho              NaN                  NA


In [21]:
%%R -i zho

lm15 <- lmer(num_tokens ~ contingent + (1|transcript_id),data=zho, REML= FALSE)
emm15<-emmeans(lm15,pairwise~contingent)
pval<-summary(emm15$contrasts)$p.value
print(c(emm15$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm15,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm15,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

zho_lname <- zho$language[1]

effect_sizes[effect_sizes$Language_name==zho_lname,"max_voc_effect_size"] <- NaN
effect_sizes

[[1]]
 contrast                      estimate   SE  df t.ratio p.value
 contingent - (non-contingent)   -0.665 0.27 333 -2.464  0.0142 

Degrees-of-freedom method: kenward-roger 

[[2]]
[1] 0.01423914

[1] 0.1993479
   Language_name rand_effect_size max_voc_effect_size
1            deu       -0.2014434          -0.2921636
2            eng       -0.2136242          -0.2056120
3            est       -0.1484433          -0.2174693
4            fas              NaN                 NaN
5            fra       -0.1424092          -0.1191522
6            hrv       -0.1661630          -0.1725226
7            jpn       -0.3211032          -0.3044613
8            kor       -0.2469108          -0.2712496
9            nor       -0.1823449          -0.2682480
10           pol              NaN                 NaN
11           por       -0.2390893          -0.2981426
12           spa       -0.2410338          -0.2353482
13           swe       -0.2695666          -0.2685154
14           zho            

In [22]:
%%R
write.csv(x=effect_sizes,'../data/MLUw_effect_sizes.csv', row.names = FALSE)