# Natural Statistics Cross-linguistic: 

#### Proportion of single-word utterances analysis - maximum vocal turns sample

----

In [1]:
import pandas as pd
import numpy as np
import sys
sys.path.insert(0, "data_proc")
import contingent_extraction
import warnings
warnings.filterwarnings('ignore')

In [3]:
maxturn_dat_inc = pd.read_csv("../data/maxturn_dat_inc_master.csv",index_col=0,low_memory=False)
maxturn_dat_inc=maxturn_dat_inc[maxturn_dat_inc["language"]!="ara"]
maxturn_dat_inc=maxturn_dat_inc[(maxturn_dat_inc["target_child_age"]>=5) & (maxturn_dat_inc["target_child_age"]<=30)]
maxturn_dat_inc_cg = maxturn_dat_inc[maxturn_dat_inc["caregiver"]=="caregiver"]

maxturn_dat_inc_cg["contingent"] = np.where(maxturn_dat_inc_cg["contingent"]==1, "contingent", "non-contingent")

maxturn_dat_inc_cg = maxturn_dat_inc_cg[maxturn_dat_inc_cg["gloss"].notna()]
maxturn_dat_inc_cg = maxturn_dat_inc_cg[maxturn_dat_inc_cg["gloss"]!="xxx"]
maxturn_dat_inc_cg = maxturn_dat_inc_cg[maxturn_dat_inc_cg["gloss"]!="yyy"]
maxturn_dat_inc_cg = maxturn_dat_inc_cg[maxturn_dat_inc_cg["gloss"]!="www"]

maxturn_dat_inc_cg["swu"]=np.where(maxturn_dat_inc_cg["num_tokens"]==1,1,0)

In [4]:
maxturn_swu_stats = (maxturn_dat_inc_cg.groupby(["Language_name","target_child_id","transcript_id","contingent"])
                                  .swu
                                  .agg(["mean"])
                                  .reset_index())
maxturn_swu_stats =  maxturn_swu_stats.rename({'mean': 'means'}, axis=1)

----
#### Proportion single-word utterances plot

In [5]:
%load_ext rpy2.ipython

In [5]:
%%R -i maxturn_swu_stats

library('ggplot2')
library('repr')
options(repr.plot.width=6, repr.plot.height=12)

# # ara_label <- data.frame(means=c(.9),contingent = c(1.5),language="ara")
deu_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="German")
# deu_ns_label <- data.frame(means=c(.5),contingent = c(1.5),Language_name="German")
eng_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="English")
est_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Estonian")
# est_ns_label <- data.frame(means=c(.5),contingent = c(1.5),Language_name="Estonian")
fas_label <- data.frame(means=c(.5),contingent = c(1.5),Language_name="Persian")
fra_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="French")
hrv_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Croatian")
jpn_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Japanese")
kor_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Korean")
nor_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Norwegian")
pol_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Polish")
por_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Portuguese")
spa_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Spanish")
swe_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Swedish")
zho_label <- data.frame(means=c(.5),contingent = c(1.5),Language_name="Mandarin")


p <- ggplot(maxturn_swu_stats, aes(x = contingent, y = means, color = Language_name)) +
     stat_summary(fun.y=mean, geom="point", shape=19, size=1.75) + 
     stat_summary(fun.data = mean_se, geom = "errorbar", size=1.25, width = .5) +
     facet_wrap(. ~ Language_name,ncol = 7) + 
     geom_text(data = deu_label,label = "***",size=8,color="black") +
#      geom_text(data = deu_ns_label,label = "ns",size=4,color="black",fontface = "italic") +
     geom_text(data = eng_label,label = "***",size=8,color="black") +  
#      geom_text(data = est_ns_label,label = "ns",size=4,color="black",fontface = "italic") +  
     geom_text(data = est_label,label = "***", size=8,color="black") +  
     geom_text(data = fas_label,label = "ns", size=4,color="black",fontface = "italic") + 
     geom_text(data = fra_label,label = "***",size=8,color="black") +  
     geom_text(data = hrv_label,label = "***",size=8,color="black") + 
     geom_text(data = jpn_label,label = "***",size=8,color="black") + 
     geom_text(data = kor_label,label = "***",size=8,color="black") +  
     geom_text(data = nor_label,label = "***",size=8,color="black") +  
     geom_text(data = pol_label,label = "ns", size=4,color="black",fontface = "italic") +    
     geom_text(data = por_label,label = "***",size=8,color="black") +  
     geom_text(data = spa_label,label = "***",size=8,color="black") + 
     geom_text(data = swe_label,label = "***",size=8,color="black") + 
     geom_text(data = zho_label,label = "ns", size=4, color="black",fontface = "italic") +
     ylim(0, .5) +
     labs(# title = "Proportion of single-word utterances",
       # subtitle = "Using CHILDES Token counts",
       y = "Prop. single word utt.", x = "") +
     theme_classic() +
     theme(text = element_text(size=16),
           axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
           legend.title = element_blank(),
           legend.background = element_rect(fill=alpha("white",0.90),
                                                            size=0, linetype="dotted",
                                                            colour = "white"),
           legend.text=element_text(size=16))
     ggsave("../figures/token_swu_maxturn.pdf", width = 11.7, height = 6.2)

Plot + effect estimates

In [6]:
%%R

deu_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="German")
eng_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="English")
est_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="Estonian")
fra_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="French")
hrv_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="Croatian")
jpn_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="Japanese")
kor_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="Korean")
nor_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="Norwegian")
por_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="Portuguese")
spa_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="Spanish")
swe_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="Swedish")

p <- p + geom_text(data = deu_est_label,label = "est=.09",size=4,color="black") +
         geom_text(data = eng_est_label,label = "est=.09",size=4,color="black") +
         geom_text(data = est_est_label,label = "est=.06",size=4,color="black") +
         geom_text(data = fra_est_label,label = "est=.04",size=4,color="black") +
         geom_text(data = hrv_est_label,label = "est=.07",size=4,color="black") +
         geom_text(data = jpn_est_label,label = "est=.18",size=4,color="black") +
         geom_text(data = kor_est_label,label = "est=.08",size=4,color="black") +
         geom_text(data = nor_est_label,label = "est=.03",size=4,color="black") +
         geom_text(data = por_est_label,label = "est=.09",size=4,color="black") +
         geom_text(data = spa_est_label,label = "est=.05",size=4,color="black") +
         geom_text(data = swe_est_label,label = "est=.14",size=4,color="black")
         
ggsave("../figures/token_swu_maxturn_eff.pdf", width = 11.7, height = 6.2)

\+ sample size

In [7]:
%%R

deu_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="German")
eng_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="English")
est_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Estonian")
fas_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Persian")
fra_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="French")
hrv_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Croatian")
jpn_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Japanese")
kor_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Korean")
nor_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Norwegian")
pol_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Polish")
por_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Portuguese")
spa_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Spanish")
swe_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Swedish")
zho_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Mandarin")

deu_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="German")
eng_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="English")
est_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Estonian")
fas_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Persian")
fra_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="French")
hrv_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Croatian")
jpn_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Japanese")
kor_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Korean")
nor_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Norwegian")
pol_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Polish")
por_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Portuguese")
spa_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Spanish")
swe_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Swedish")
zho_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Mandarin")

p <- p + geom_text(data = deu_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = eng_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = est_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = fas_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = fra_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = hrv_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = jpn_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = kor_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = nor_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = pol_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = por_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = spa_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = swe_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = zho_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = deu_sz_label,label = " = 39",size=4,color="black") +
         geom_text(data = eng_sz_label,label = " =1010",size=4,color="black") +
         geom_text(data = est_sz_label,label = " = 22",size=4,color="black") +
         geom_text(data = fas_sz_label,label = " = 12",size=4,color="black") +
         geom_text(data = fra_sz_label,label = " = 258",size=4,color="black") +
         geom_text(data = hrv_sz_label,label = " = 79",size=4,color="black") +
         geom_text(data = jpn_sz_label,label = " = 139",size=4,color="black") +
         geom_text(data = kor_sz_label,label = " = 37",size=4,color="black") +
         geom_text(data = nor_sz_label,label = " = 56",size=4,color="black") +
         geom_text(data = pol_sz_label,label = " = 1",size=4,color="black") +
         geom_text(data = por_sz_label,label = " = 24",size=4,color="black") +
         geom_text(data = spa_sz_label,label = " = 31",size=4,color="black") +
         geom_text(data = swe_sz_label,label = " = 16",size=4,color="black") +
         geom_text(data = zho_sz_label,label = " = 2",size=4,color="black")
         

ggsave("../figures/token_swu_maxturn_eff_n.pdf", width = 11.7, height = 6.2)

----

#### SWU mixed models

In [6]:
deu=maxturn_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][maxturn_dat_inc_cg["language"]=="deu"]
eng=maxturn_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][maxturn_dat_inc_cg["language"]=="eng"]
est=maxturn_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][maxturn_dat_inc_cg["language"]=="est"]
fas=maxturn_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][maxturn_dat_inc_cg["language"]=="fas"]
fra=maxturn_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][maxturn_dat_inc_cg["language"]=="fra"]
hrv=maxturn_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][maxturn_dat_inc_cg["language"]=="hrv"]
jpn=maxturn_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][maxturn_dat_inc_cg["language"]=="jpn"]
kor=maxturn_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][maxturn_dat_inc_cg["language"]=="kor"]
nor=maxturn_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][maxturn_dat_inc_cg["language"]=="nor"]
pol=maxturn_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][maxturn_dat_inc_cg["language"]=="pol"]
por=maxturn_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][maxturn_dat_inc_cg["language"]=="por"]
spa=maxturn_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][maxturn_dat_inc_cg["language"]=="spa"]
swe=maxturn_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][maxturn_dat_inc_cg["language"]=="swe"]
zho=maxturn_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][maxturn_dat_inc_cg["language"]=="zho"]

In [8]:
%%R

library("lme4")
library("broom")
library("emmeans")
library("lmerTest")
library("tidyverse")

effect_sizes <- read.csv("../data/SWU_effect_sizes.csv")

effect_sizes["max_turn_effect_size"] <- NA

In [9]:
%%R -i deu

lm2_1 <- lmer(swu ~ contingent + (1|target_child_id) + (1|transcript_id),data=deu)
emm2_1<-emmeans(lm2_1,pairwise~contingent)
pval<-summary(emm2_1$contrasts)$p.value
print(c(emm2_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# plot(emm2_1)
# summary(emmeans(lm2_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm2_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

deu_lname <- deu$language[1]

deu_eff <- eff_size(emm2_1,sigma = sigma(lm2_1), edf = df.residual(lm2_1))

deu_eff <- summary(deu_eff)$effect.size

effect_sizes[effect_sizes$Language_name==deu_lname,"max_turn_effect_size"] <- deu_eff
effect_sizes

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 5287' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 5287)' or larger];
but be warned that this may result in large computation time and memory use.

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 5287' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 5287)' or larger];
but be warned that this may result in large computation time and memory use.



[[1]]
 contrast                      estimate     SE  df z.ratio p.value
 contingent - (non-contingent)    0.086 0.0113 Inf 7.604   <.0001 

Degrees-of-freedom method: asymptotic 

[[2]]
[1] 2.877465e-14

[1] 4.028451e-13


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



   Language_name rand_effect_size max_voc_effect_size max_turn_effect_size
1            deu       0.17637894           0.2092186            0.2148297
2            eng       0.26132217           0.2586518                   NA
3            est       0.09026104           0.1718871                   NA
4            fas               NA                  NA                   NA
5            fra       0.10166887           0.1104040                   NA
6            hrv       0.17609065           0.1905610                   NA
7            jpn       0.37964019           0.3503237                   NA
8            kor       0.25125284           0.2379528                   NA
9            nor       0.16761078           0.2302313                   NA
10           pol               NA                  NA                   NA
11           por       0.17936983           0.3122990                   NA
12           spa       0.21785360           0.1744664                   NA
13           swe       0.

In [10]:
%%R -i eng

lm3_1 <- lmer(swu ~ contingent + (1|target_child_id) + (1|transcript_id),data=eng, REML= FALSE)
emm3_1<-emmeans(lm3_1,pairwise~contingent)
pval<-summary(emm3_1$contrasts)$p.value
print(c(emm3_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm3_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm3_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

eng_lname <- eng$language[1]

eng_eff <- eff_size(emm3_1,sigma = sigma(lm3_1), edf = df.residual(lm3_1))

eng_eff <- summary(eng_eff)$effect.size

effect_sizes[effect_sizes$Language_name==eng_lname,"max_turn_effect_size"] <- eng_eff
effect_sizes

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 123279' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 123279)' or larger];
but be warned that this may result in large computation time and memory use.

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 123279' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 123279)' or larger];
but be warned that this may result in large computation time and memory use.



[[1]]
 contrast                      estimate      SE  df z.ratio p.value
 contingent - (non-contingent)    0.108 0.00312 Inf 34.649  <.0001 

Degrees-of-freedom method: asymptotic 

[[2]]
[1] 4.668348e-263

[1] 6.535687e-262


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



   Language_name rand_effect_size max_voc_effect_size max_turn_effect_size
1            deu       0.17637894           0.2092186            0.2148297
2            eng       0.26132217           0.2586518            0.2675167
3            est       0.09026104           0.1718871                   NA
4            fas               NA                  NA                   NA
5            fra       0.10166887           0.1104040                   NA
6            hrv       0.17609065           0.1905610                   NA
7            jpn       0.37964019           0.3503237                   NA
8            kor       0.25125284           0.2379528                   NA
9            nor       0.16761078           0.2302313                   NA
10           pol               NA                  NA                   NA
11           por       0.17936983           0.3122990                   NA
12           spa       0.21785360           0.1744664                   NA
13           swe       0.

In [11]:
%%R -i est

lm4_1 <- lmer(swu ~ contingent + (1|target_child_id) + (1|transcript_id),data=est, REML= FALSE)
emm4_1<-emmeans(lm4_1,pairwise~contingent)
pval<-summary(emm4_1$contrasts)$p.value
print(c(emm4_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm4_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm4_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

est_lname <- est$language[1]

est_eff <- eff_size(emm4_1,sigma = sigma(lm4_1), edf = df.residual(lm4_1))

est_eff <- summary(est_eff)$effect.size

effect_sizes[effect_sizes$Language_name==est_lname,"max_turn_effect_size"] <- est_eff
effect_sizes

[[1]]
 contrast                      estimate     SE   df t.ratio p.value
 contingent - (non-contingent)   0.0668 0.0138 2688 4.844   <.0001 

Degrees-of-freedom method: kenward-roger 

[[2]]
[1] 1.344248e-06

[1] 1.881947e-05


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



   Language_name rand_effect_size max_voc_effect_size max_turn_effect_size
1            deu       0.17637894           0.2092186            0.2148297
2            eng       0.26132217           0.2586518            0.2675167
3            est       0.09026104           0.1718871            0.1939326
4            fas               NA                  NA                   NA
5            fra       0.10166887           0.1104040                   NA
6            hrv       0.17609065           0.1905610                   NA
7            jpn       0.37964019           0.3503237                   NA
8            kor       0.25125284           0.2379528                   NA
9            nor       0.16761078           0.2302313                   NA
10           pol               NA                  NA                   NA
11           por       0.17936983           0.3122990                   NA
12           spa       0.21785360           0.1744664                   NA
13           swe       0.

In [13]:
%%R -i fas

lm5_1 <- lmer(swu ~ contingent + (1|transcript_id),data=fas, REML= FALSE)
emm5_1<-emmeans(lm5_1,pairwise~contingent)
pval<-summary(emm5_1$contrasts)$p.value
print(c(emm5_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm5_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm5_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

fas_lname <- fas$language[1]

effect_sizes[effect_sizes$Language_name==fas_lname,"max_turn_effect_size"] <- NaN
effect_sizes

[[1]]
 contrast                      estimate     SE  df t.ratio p.value
 contingent - (non-contingent)   0.0122 0.0324 712 0.376   0.7073 

Degrees-of-freedom method: kenward-roger 

[[2]]
[1] 0.7073194

[1] 1
   Language_name rand_effect_size max_voc_effect_size max_turn_effect_size
1            deu       0.17637894           0.2092186            0.2148297
2            eng       0.26132217           0.2586518            0.2675167
3            est       0.09026104           0.1718871            0.1939326
4            fas               NA                  NA                  NaN
5            fra       0.10166887           0.1104040                   NA
6            hrv       0.17609065           0.1905610                   NA
7            jpn       0.37964019           0.3503237                   NA
8            kor       0.25125284           0.2379528                   NA
9            nor       0.16761078           0.2302313                   NA
10           pol               NA      

In [14]:
%%R -i fra
lm6_1 <- lmer(swu ~ contingent + (1|target_child_id) + (1|transcript_id),data=fra, REML= FALSE)
emm6_1<-emmeans(lm6_1,pairwise~contingent)
pval<-summary(emm6_1$contrasts)$p.value
print(c(emm6_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm6_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm6_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

fra_lname <- fra$language[1]

fra_eff <- eff_size(emm6_1,sigma = sigma(lm6_1), edf = df.residual(lm6_1))

fra_eff <- summary(fra_eff)$effect.size

effect_sizes[effect_sizes$Language_name==fra_lname,"max_turn_effect_size"] <- fra_eff
effect_sizes

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 26424' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 26424)' or larger];
but be warned that this may result in large computation time and memory use.

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 26424' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 26424)' or larger];
but be warned that this may result in large computation time and memory use.



[[1]]
 contrast                      estimate      SE  df z.ratio p.value
 contingent - (non-contingent)   0.0448 0.00553 Inf 8.099   <.0001 

Degrees-of-freedom method: asymptotic 

[[2]]
[1] 5.542442e-16

[1] 7.759419e-15


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



   Language_name rand_effect_size max_voc_effect_size max_turn_effect_size
1            deu       0.17637894           0.2092186            0.2148297
2            eng       0.26132217           0.2586518            0.2675167
3            est       0.09026104           0.1718871            0.1939326
4            fas               NA                  NA                  NaN
5            fra       0.10166887           0.1104040            0.1136619
6            hrv       0.17609065           0.1905610                   NA
7            jpn       0.37964019           0.3503237                   NA
8            kor       0.25125284           0.2379528                   NA
9            nor       0.16761078           0.2302313                   NA
10           pol               NA                  NA                   NA
11           por       0.17936983           0.3122990                   NA
12           spa       0.21785360           0.1744664                   NA
13           swe       0.

In [15]:
%%R -i hrv

lm7_1 <- lmer(swu ~ contingent + (1|target_child_id) + (1|transcript_id),data=hrv, REML= FALSE)
emm7_1<-emmeans(lm7_1,pairwise~contingent)
pval<-summary(emm7_1$contrasts)$p.value
print(c(emm7_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm7_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm7_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

hrv_lname <- hrv$language[1]

hrv_eff <- eff_size(emm7_1,sigma = sigma(lm7_1), edf = df.residual(lm7_1))

hrv_eff <- summary(hrv_eff)$effect.size

effect_sizes[effect_sizes$Language_name==hrv_lname,"max_turn_effect_size"] <- hrv_eff
effect_sizes

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 7375' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 7375)' or larger];
but be warned that this may result in large computation time and memory use.

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 7375' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 7375)' or larger];
but be warned that this may result in large computation time and memory use.



[[1]]
 contrast                      estimate      SE  df z.ratio p.value
 contingent - (non-contingent)   0.0763 0.00971 Inf 7.859   <.0001 

Degrees-of-freedom method: asymptotic 

[[2]]
[1] 3.874507e-15

[1] 5.42431e-14


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



   Language_name rand_effect_size max_voc_effect_size max_turn_effect_size
1            deu       0.17637894           0.2092186            0.2148297
2            eng       0.26132217           0.2586518            0.2675167
3            est       0.09026104           0.1718871            0.1939326
4            fas               NA                  NA                  NaN
5            fra       0.10166887           0.1104040            0.1136619
6            hrv       0.17609065           0.1905610            0.1869155
7            jpn       0.37964019           0.3503237                   NA
8            kor       0.25125284           0.2379528                   NA
9            nor       0.16761078           0.2302313                   NA
10           pol               NA                  NA                   NA
11           por       0.17936983           0.3122990                   NA
12           spa       0.21785360           0.1744664                   NA
13           swe       0.

In [16]:
%%R -i jpn

lm8_1 <- lmer(swu ~ contingent + (1|target_child_id) + (1|transcript_id),data=jpn, REML= FALSE)
emm8_1<-emmeans(lm8_1,pairwise~contingent)
pval<-summary(emm8_1$contrasts)$p.value
print(c(emm8_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm8_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm8_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

jpn_lname <- jpn$language[1]

jpn_eff <- eff_size(emm8_1,sigma = sigma(lm8_1), edf = df.residual(lm8_1))

jpn_eff <- summary(jpn_eff)$effect.size

effect_sizes[effect_sizes$Language_name==jpn_lname,"max_turn_effect_size"] <- jpn_eff
effect_sizes

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 23841' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 23841)' or larger];
but be warned that this may result in large computation time and memory use.

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 23841' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 23841)' or larger];
but be warned that this may result in large computation time and memory use.



[[1]]
 contrast                      estimate      SE  df z.ratio p.value
 contingent - (non-contingent)     0.18 0.00636 Inf 28.248  <.0001 

Degrees-of-freedom method: asymptotic 

[[2]]
[1] 1.523675e-175

[1] 2.133145e-174


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



   Language_name rand_effect_size max_voc_effect_size max_turn_effect_size
1            deu       0.17637894           0.2092186            0.2148297
2            eng       0.26132217           0.2586518            0.2675167
3            est       0.09026104           0.1718871            0.1939326
4            fas               NA                  NA                  NaN
5            fra       0.10166887           0.1104040            0.1136619
6            hrv       0.17609065           0.1905610            0.1869155
7            jpn       0.37964019           0.3503237            0.3772449
8            kor       0.25125284           0.2379528                   NA
9            nor       0.16761078           0.2302313                   NA
10           pol               NA                  NA                   NA
11           por       0.17936983           0.3122990                   NA
12           spa       0.21785360           0.1744664                   NA
13           swe       0.

In [17]:
%%R -i kor

lm9_1 <- lmer(swu ~ contingent + (1|transcript_id),data=kor, REML= FALSE)
emm9_1<-emmeans(lm9_1,pairwise~contingent)
pval<-summary(emm9_1$contrasts)$p.value
print(c(emm9_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm9_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm9_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

kor_lname <- kor$language[1]

kor_eff <- eff_size(emm9_1,sigma = sigma(lm9_1), edf = df.residual(lm9_1))

kor_eff <- summary(kor_eff)$effect.size

effect_sizes[effect_sizes$Language_name==kor_lname,"max_turn_effect_size"] <- kor_eff
effect_sizes

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 4902' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 4902)' or larger];
but be warned that this may result in large computation time and memory use.

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 4902' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 4902)' or larger];
but be warned that this may result in large computation time and memory use.



[[1]]
 contrast                      estimate    SE  df z.ratio p.value
 contingent - (non-contingent)   0.0874 0.011 Inf 7.956   <.0001 

Degrees-of-freedom method: asymptotic 

[[2]]
[1] 1.782353e-15

[1] 2.495294e-14


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



   Language_name rand_effect_size max_voc_effect_size max_turn_effect_size
1            deu       0.17637894           0.2092186            0.2148297
2            eng       0.26132217           0.2586518            0.2675167
3            est       0.09026104           0.1718871            0.1939326
4            fas               NA                  NA                  NaN
5            fra       0.10166887           0.1104040            0.1136619
6            hrv       0.17609065           0.1905610            0.1869155
7            jpn       0.37964019           0.3503237            0.3772449
8            kor       0.25125284           0.2379528            0.2399801
9            nor       0.16761078           0.2302313                   NA
10           pol               NA                  NA                   NA
11           por       0.17936983           0.3122990                   NA
12           spa       0.21785360           0.1744664                   NA
13           swe       0.

In [18]:
%%R -i nor

lm10_1 <- lmer(swu ~ contingent + (1|target_child_id) + (1|transcript_id),data=nor, REML= FALSE)
emm10_1<-emmeans(lm10_1,pairwise~contingent)
pval<-summary(emm10_1$contrasts)$p.value
print(c(emm10_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm10_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm10_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

nor_lname <- nor$language[1]

nor_eff <- eff_size(emm10_1,sigma = sigma(lm10_1), edf = df.residual(lm10_1))

nor_eff <- summary(nor_eff)$effect.size

effect_sizes[effect_sizes$Language_name==nor_lname,"max_turn_effect_size"] <- nor_eff
effect_sizes

R[write to console]: boundary (singular) fit: see ?isSingular

R[write to console]: boundary (singular) fit: see ?isSingular



[[1]]
 contrast                      estimate     SE   df t.ratio p.value
 contingent - (non-contingent)   0.0735 0.0183 2460 4.011   0.0001 

Degrees-of-freedom method: kenward-roger 

[[2]]
[1] 6.215192e-05

[1] 0.0008701269


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



   Language_name rand_effect_size max_voc_effect_size max_turn_effect_size
1            deu       0.17637894           0.2092186            0.2148297
2            eng       0.26132217           0.2586518            0.2675167
3            est       0.09026104           0.1718871            0.1939326
4            fas               NA                  NA                  NaN
5            fra       0.10166887           0.1104040            0.1136619
6            hrv       0.17609065           0.1905610            0.1869155
7            jpn       0.37964019           0.3503237            0.3772449
8            kor       0.25125284           0.2379528            0.2399801
9            nor       0.16761078           0.2302313            0.1775366
10           pol               NA                  NA                   NA
11           por       0.17936983           0.3122990                   NA
12           spa       0.21785360           0.1744664                   NA
13           swe       0.

In [19]:
%%R -i pol
 
# simple linear model (no random effects, because only 1 transcript from 1 sub)

lm11_1 <- lm(swu ~ contingent,data=pol, REML= FALSE)
emm11_1<-emmeans(lm11_1,pairwise~contingent)
pval<-summary(emm11_1$contrasts)$p.value
print(c(emm11_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm11_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm11_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

pol_lname <- pol$language[1]

effect_sizes[effect_sizes$Language_name==pol_lname,"max_turn_effect_size"] <- NaN
effect_sizes

[[1]]
 contrast                      estimate     SE df t.ratio p.value
 contingent - (non-contingent)  -0.0278 0.0769 94 -0.362  0.7182 


[[2]]
[1] 0.718246

[1] 1
   Language_name rand_effect_size max_voc_effect_size max_turn_effect_size
1            deu       0.17637894           0.2092186            0.2148297
2            eng       0.26132217           0.2586518            0.2675167
3            est       0.09026104           0.1718871            0.1939326
4            fas               NA                  NA                  NaN
5            fra       0.10166887           0.1104040            0.1136619
6            hrv       0.17609065           0.1905610            0.1869155
7            jpn       0.37964019           0.3503237            0.3772449
8            kor       0.25125284           0.2379528            0.2399801
9            nor       0.16761078           0.2302313            0.1775366
10           pol               NA                  NA                  NaN
11       

In [20]:
%%R -i por

lm12_1 <- lmer(swu ~ contingent + (1|target_child_id) + (1|transcript_id),data=por, REML= FALSE)
emm12_1<-emmeans(lm12_1,pairwise~contingent)
pval<-summary(emm12_1$contrasts)$p.value
print(c(emm12_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm12_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm12_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

por_lname <- por$language[1]

por_eff <- eff_size(emm12_1,sigma = sigma(lm12_1), edf = df.residual(lm12_1))

por_eff <- summary(por_eff)$effect.size

effect_sizes[effect_sizes$Language_name==por_lname,"max_turn_effect_size"] <- por_eff
effect_sizes

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 3586' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 3586)' or larger];
but be warned that this may result in large computation time and memory use.

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 3586' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 3586)' or larger];
but be warned that this may result in large computation time and memory use.



[[1]]
 contrast                      estimate     SE  df z.ratio p.value
 contingent - (non-contingent)   0.0957 0.0123 Inf 7.779   <.0001 

Degrees-of-freedom method: asymptotic 

[[2]]
[1] 7.297768e-15

[1] 1.021688e-13


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



   Language_name rand_effect_size max_voc_effect_size max_turn_effect_size
1            deu       0.17637894           0.2092186            0.2148297
2            eng       0.26132217           0.2586518            0.2675167
3            est       0.09026104           0.1718871            0.1939326
4            fas               NA                  NA                  NaN
5            fra       0.10166887           0.1104040            0.1136619
6            hrv       0.17609065           0.1905610            0.1869155
7            jpn       0.37964019           0.3503237            0.3772449
8            kor       0.25125284           0.2379528            0.2399801
9            nor       0.16761078           0.2302313            0.1775366
10           pol               NA                  NA                  NaN
11           por       0.17936983           0.3122990            0.2638239
12           spa       0.21785360           0.1744664                   NA
13           swe       0.

In [21]:
%%R -i spa


lm13_1 <- lmer(swu ~ contingent + (1|target_child_id) + (1|transcript_id),data=spa, REML= FALSE)
emm13_1<-emmeans(lm13_1,pairwise~contingent)
pval<-summary(emm13_1$contrasts)$p.value
print(c(emm13_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm13_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm13_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

spa_lname <- spa$language[1]

spa_eff <- eff_size(emm13_1,sigma = sigma(lm13_1), edf = df.residual(lm13_1))

spa_eff <- summary(spa_eff)$effect.size

effect_sizes[effect_sizes$Language_name==spa_lname,"max_turn_effect_size"] <- spa_eff
effect_sizes

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 5409' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 5409)' or larger];
but be warned that this may result in large computation time and memory use.

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 5409' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 5409)' or larger];
but be warned that this may result in large computation time and memory use.



[[1]]
 contrast                      estimate     SE  df z.ratio p.value
 contingent - (non-contingent)   0.0592 0.0114 Inf 5.182   <.0001 

Degrees-of-freedom method: asymptotic 

[[2]]
[1] 2.200413e-07

[1] 3.080578e-06


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



   Language_name rand_effect_size max_voc_effect_size max_turn_effect_size
1            deu       0.17637894           0.2092186            0.2148297
2            eng       0.26132217           0.2586518            0.2675167
3            est       0.09026104           0.1718871            0.1939326
4            fas               NA                  NA                  NaN
5            fra       0.10166887           0.1104040            0.1136619
6            hrv       0.17609065           0.1905610            0.1869155
7            jpn       0.37964019           0.3503237            0.3772449
8            kor       0.25125284           0.2379528            0.2399801
9            nor       0.16761078           0.2302313            0.1775366
10           pol               NA                  NA                  NaN
11           por       0.17936983           0.3122990            0.2638239
12           spa       0.21785360           0.1744664            0.1449862
13           swe       0.

In [22]:
%%R -i swe


lm14_1 <- lmer(swu ~ contingent + (1|target_child_id) + (1|transcript_id),data=swe, REML= FALSE)
emm14_1<-emmeans(lm14_1,pairwise~contingent)
pval<-summary(emm14_1$contrasts)$p.value
print(c(emm14_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm14_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm14_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

swe_lname <- swe$language[1]

swe_eff <- eff_size(emm14_1,sigma = sigma(lm14_1), edf = df.residual(lm14_1))

swe_eff <- summary(swe_eff)$effect.size

effect_sizes[effect_sizes$Language_name==swe_lname,"max_turn_effect_size"] <- swe_eff
effect_sizes

[[1]]
 contrast                      estimate     SE   df t.ratio p.value
 contingent - (non-contingent)    0.138 0.0172 2754 8.013   <.0001 

Degrees-of-freedom method: kenward-roger 

[[2]]
[1] 1.630746e-15

[1] 2.283045e-14


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



   Language_name rand_effect_size max_voc_effect_size max_turn_effect_size
1            deu       0.17637894           0.2092186            0.2148297
2            eng       0.26132217           0.2586518            0.2675167
3            est       0.09026104           0.1718871            0.1939326
4            fas               NA                  NA                  NaN
5            fra       0.10166887           0.1104040            0.1136619
6            hrv       0.17609065           0.1905610            0.1869155
7            jpn       0.37964019           0.3503237            0.3772449
8            kor       0.25125284           0.2379528            0.2399801
9            nor       0.16761078           0.2302313            0.1775366
10           pol               NA                  NA                  NaN
11           por       0.17936983           0.3122990            0.2638239
12           spa       0.21785360           0.1744664            0.1449862
13           swe       0.

In [23]:
%%R -i zho

lm15_1 <- lmer(swu ~ contingent + (1|transcript_id),data=zho, REML= FALSE)
emm15_1<-emmeans(lm15_1,pairwise~contingent)
pval<-summary(emm15_1$contrasts)$p.value
print(c(emm15_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm15_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm15_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

zho_lname <- zho$language[1]

effect_sizes[effect_sizes$Language_name==zho_lname,"max_turn_effect_size"] <- NaN
effect_sizes

[[1]]
 contrast                      estimate     SE  df t.ratio p.value
 contingent - (non-contingent)   0.0569 0.0355 431 1.605   0.1092 

Degrees-of-freedom method: kenward-roger 

[[2]]
[1] 0.1092234

[1] 1
   Language_name rand_effect_size max_voc_effect_size max_turn_effect_size
1            deu       0.17637894           0.2092186            0.2148297
2            eng       0.26132217           0.2586518            0.2675167
3            est       0.09026104           0.1718871            0.1939326
4            fas               NA                  NA                  NaN
5            fra       0.10166887           0.1104040            0.1136619
6            hrv       0.17609065           0.1905610            0.1869155
7            jpn       0.37964019           0.3503237            0.3772449
8            kor       0.25125284           0.2379528            0.2399801
9            nor       0.16761078           0.2302313            0.1775366
10           pol               NA      

In [24]:
%%R
write.csv(x=effect_sizes,'../data/SWU_effect_sizes.csv', row.names = FALSE)