# Natural Statistics Cross-linguistic: 

#### Proportion of single-word utterances analysis

----

In [1]:
import pandas as pd
import numpy as np
import sys
sys.path.insert(0, "data_proc")
import contingent_extraction
import warnings
warnings.filterwarnings('ignore')

In [2]:
rand_dat_inc = pd.read_csv("../data/rand_dat_inc_master.csv",index_col=0,low_memory=False)
rand_dat_inc=rand_dat_inc[rand_dat_inc["language"]!="ara"]
rand_dat_inc=rand_dat_inc[(rand_dat_inc["target_child_age"]>=5) & (rand_dat_inc["target_child_age"]<=30)]
rand_dat_inc_cg = rand_dat_inc[rand_dat_inc["caregiver"]=="caregiver"]

rand_dat_inc_cg["contingent"] = np.where(rand_dat_inc_cg["contingent"]==1, "contingent", "non-contingent")

rand_dat_inc_cg = rand_dat_inc_cg[rand_dat_inc_cg["gloss"].notna()]
rand_dat_inc_cg = rand_dat_inc_cg[rand_dat_inc_cg["gloss"]!="xxx"]
rand_dat_inc_cg = rand_dat_inc_cg[rand_dat_inc_cg["gloss"]!="yyy"]
rand_dat_inc_cg = rand_dat_inc_cg[rand_dat_inc_cg["gloss"]!="www"]

rand_dat_inc_cg["swu"]=np.where(rand_dat_inc_cg["num_tokens"]==1,1,0)

In [3]:
rand_swu_stats = (rand_dat_inc_cg.groupby(["Language_name","target_child_id","transcript_id","contingent"])
                                  .swu
                                  .agg(["mean"])
                                  .reset_index())
rand_swu_sumstats =  rand_swu_stats.rename({'mean': 'means'}, axis=1)

In [4]:
rand_swu_sumstats.to_csv("../data/rand_swu_sumstats.csv")

----
#### Proportion single-word utterances plot

In [5]:
%load_ext rpy2.ipython

In [6]:
%%R -i rand_swu_sumstats

library('ggplot2')
library('repr')
options(repr.plot.width=6, repr.plot.height=12)

xlabs <- c("C", "NC")

# # ara_label <- data.frame(means=c(.9),contingent = c(1.5),language="ara")
deu_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="German")
# deu_ns_label <- data.frame(means=c(.5),contingent = c(1.5),Language_name="German")
eng_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="English")
est_label <- data.frame(means=c(.5),contingent = c(1.5),Language_name="Estonian")
# est_ns_label <- data.frame(means=c(.5),contingent = c(1.5),Language_name="Estonian")
fas_label <- data.frame(means=c(.5),contingent = c(1.5),Language_name="Persian")
fra_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="French")
hrv_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Croatian")
jpn_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Japanese")
kor_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Korean")
nor_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Norwegian")
pol_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Polish")
por_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Portuguese")
spa_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Spanish")
swe_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Swedish")
zho_label <- data.frame(means=c(.5),contingent = c(1.5),Language_name="Mandarin")


p <- ggplot(rand_swu_sumstats, aes(x = contingent, y = means, color = Language_name)) +
     stat_summary(fun.y=mean, geom="point", shape=19, size=1.75) + 
     stat_summary(fun.data = mean_se, geom = "errorbar", size=1.25, width = .5) +
     facet_wrap(. ~ Language_name,ncol = 7) + 
     geom_text(data = deu_label,label = "***",size=8,color="black") +
     geom_text(data = eng_label,label = "***",size=8,color="black") +  
     geom_text(data = est_label,label = "ns", size=4,color="black",fontface = "italic") +  
     geom_text(data = fas_label,label = "ns", size=4,color="black",fontface = "italic") + 
     geom_text(data = fra_label,label = "***",size=8,color="black") +  
     geom_text(data = hrv_label,label = "***",size=8,color="black") + 
     geom_text(data = jpn_label,label = "***",size=8,color="black") + 
     geom_text(data = kor_label,label = "***",size=8,color="black") +  
     geom_text(data = nor_label,label = "**",size=8,color="black") +  
     geom_text(data = pol_label,label = "ns", size=4,color="black",fontface = "italic") +    
     geom_text(data = por_label,label = "***",size=8,color="black") +  
     geom_text(data = spa_label,label = "***",size=8,color="black") + 
     geom_text(data = swe_label,label = "***",size=8,color="black") + 
     geom_text(data = zho_label,label = "ns", size=4, color="black",fontface = "italic") +
     ylim(0, .5) +
     labs(tag="C",
          y = "Proportion of Single Word Utterances",
          x = "") +
     theme_classic() +
     scale_x_discrete(labels= xlabs) +
     theme(text = element_text(size=16),
           axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
           legend.title = element_blank(),
           legend.background = element_rect(fill=alpha("white",0.90),
                                                            size=0, linetype="dotted",
                                                            colour = "white"),
           legend.text=element_text(size=16))
      ggsave("../figures/token_rand_swu.pdf", width = 11.7, height = 6.2)


for manuscript

In [7]:
%%R -i rand_swu_sumstats

library('ggplot2')
library('repr')
options(repr.plot.width=6, repr.plot.height=12)

xlabs <- c("C", "NC")

# # ara_label <- data.frame(means=c(.9),contingent = c(1.5),language="ara")
deu_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="German")
# deu_ns_label <- data.frame(means=c(.5),contingent = c(1.5),Language_name="German")
eng_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="English")
est_label <- data.frame(means=c(.5),contingent = c(1.5),Language_name="Estonian")
# est_ns_label <- data.frame(means=c(.5),contingent = c(1.5),Language_name="Estonian")
fas_label <- data.frame(means=c(.5),contingent = c(1.5),Language_name="Persian")
fra_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="French")
hrv_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Croatian")
jpn_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Japanese")
kor_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Korean")
nor_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Norwegian")
pol_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Polish")
por_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Portuguese")
spa_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Spanish")
swe_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Swedish")
zho_label <- data.frame(means=c(.5),contingent = c(1.5),Language_name="Mandarin")


p <- ggplot(rand_swu_sumstats, aes(x = contingent, y = means, color = Language_name)) +
     stat_summary(fun.y=mean, geom="point", shape=19, size=1.75) + 
     stat_summary(fun.data = mean_se, geom = "errorbar", size=1.25, width = .5) +
     facet_wrap(. ~ Language_name,ncol = 7) + 
     geom_text(data = deu_label,label = "***",size=8,color="black") +
     geom_text(data = eng_label,label = "***",size=8,color="black") +  
     geom_text(data = est_label,label = "ns", size=4,color="black",fontface = "italic") +  
     geom_text(data = fas_label,label = "ns", size=4,color="black",fontface = "italic") + 
     geom_text(data = fra_label,label = "***",size=8,color="black") +  
     geom_text(data = hrv_label,label = "***",size=8,color="black") + 
     geom_text(data = jpn_label,label = "***",size=8,color="black") + 
     geom_text(data = kor_label,label = "***",size=8,color="black") +  
     geom_text(data = nor_label,label = "**",size=8,color="black") +  
     geom_text(data = pol_label,label = "ns", size=4,color="black",fontface = "italic") +    
     geom_text(data = por_label,label = "***",size=8,color="black") +  
     geom_text(data = spa_label,label = "***",size=8,color="black") + 
     geom_text(data = swe_label,label = "***",size=8,color="black") + 
     geom_text(data = zho_label,label = "ns", size=4, color="black",fontface = "italic") +
     ylim(0, .5) +
     labs(tag="C",
          y = "Proportion of Single Word Utterances",
          x = "") +
     theme_classic() +
     scale_x_discrete(labels= xlabs) +
     theme(text = element_text(size=11.5),
           axis.text.x = element_text(vjust = 0.5, hjust=0.5),
           legend.position="none")

      ggsave("../figures/figure_2_C.pdf", width = 11.5, height = 4.2)

Plot + effect estimates

In [8]:
%%R

deu_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="German")
eng_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="English")
est_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="Estonian")
fra_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="French")
hrv_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="Croatian")
jpn_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="Japanese")
kor_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="Korean")
nor_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="Norwegian")
por_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="Portuguese")
spa_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="Spanish")
swe_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="Swedish")

p <- p + geom_text(data = deu_est_label,label = "est=.08",size=4,color="black") +
         geom_text(data = eng_est_label,label = "est=.09",size=4,color="black") +
         geom_text(data = est_est_label,label = "est=.04",size=4,color="black") +
         geom_text(data = fra_est_label,label = "est=.04",size=4,color="black") +
         geom_text(data = hrv_est_label,label = "est=.08",size=4,color="black") +
         geom_text(data = jpn_est_label,label = "est=.18",size=4,color="black") +
         geom_text(data = kor_est_label,label = "est=.08",size=4,color="black") +
         geom_text(data = nor_est_label,label = "est=.03",size=4,color="black") +
         geom_text(data = por_est_label,label = "est=.06",size=4,color="black") +
         geom_text(data = spa_est_label,label = "est=.06",size=4,color="black") +
         geom_text(data = swe_est_label,label = "est=.15",size=4,color="black")
         

ggsave("../figures/token_swu_rand_eff.pdf", width = 11.7, height = 6.2)

\+ sample size

In [9]:
%%R

deu_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="German")
eng_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="English")
est_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Estonian")
fas_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Persian")
fra_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="French")
hrv_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Croatian")
jpn_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Japanese")
kor_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Korean")
nor_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Norwegian")
pol_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Polish")
por_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Portuguese")
spa_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Spanish")
swe_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Swedish")
zho_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Mandarin")

deu_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="German")
eng_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="English")
est_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Estonian")
fas_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Persian")
fra_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="French")
hrv_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Croatian")
jpn_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Japanese")
kor_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Korean")
nor_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Norwegian")
pol_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Polish")
por_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Portuguese")
spa_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Spanish")
swe_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Swedish")
zho_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Mandarin")

p <- p + geom_text(data = deu_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = eng_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = est_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = fas_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = fra_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = hrv_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = jpn_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = kor_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = nor_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = pol_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = por_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = spa_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = swe_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = zho_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = deu_sz_label,label = " = 39",size=4,color="black") +
         geom_text(data = eng_sz_label,label = " = 1005",size=4,color="black") +
         geom_text(data = est_sz_label,label = " = 22",size=4,color="black") +
         geom_text(data = fas_sz_label,label = " = 12",size=4,color="black") +
         geom_text(data = fra_sz_label,label = " = 303",size=4,color="black") +
         geom_text(data = hrv_sz_label,label = " = 79",size=4,color="black") +
         geom_text(data = jpn_sz_label,label = " = 139",size=4,color="black") +
         geom_text(data = kor_sz_label,label = " = 37",size=4,color="black") +
         geom_text(data = nor_sz_label,label = " = 56",size=4,color="black") +
         geom_text(data = pol_sz_label,label = " = 1",size=4,color="black") +
         geom_text(data = por_sz_label,label = " = 24",size=4,color="black") +
         geom_text(data = spa_sz_label,label = " = 31",size=4,color="black") +
         geom_text(data = swe_sz_label,label = " = 16",size=4,color="black") +
         geom_text(data = zho_sz_label,label = " = 2",size=4,color="black")
         

ggsave("../figures/token_swu_rand_eff_n.pdf", width = 11.7, height = 6.2)

By language family

In [10]:
rand_swu_stats_fam = (rand_dat_inc_cg.groupby(["Language_Family","target_child_id","transcript_id","contingent"])
                                  .swu
                                  .agg(["mean"])
                                  .reset_index())
rand_swu_stats_fam = rand_swu_stats_fam.rename({'mean': 'means'}, axis=1)

In [11]:
%%R -i rand_swu_stats_fam

library('ggplot2')

p <- ggplot(rand_swu_stats_fam, aes(x = contingent, y = means, color = Language_Family)) +
     stat_summary(fun.y=mean, geom="point", shape=19, size=1.75) + 
     stat_summary(fun.data = mean_se, geom = "errorbar", size=1.25, width = .5) +
     facet_wrap(. ~ Language_Family,ncol=5) + 
     ylim(0, .5) +
     labs(y = "Prop. single word utt.", x = "") +
     theme_classic() +
     theme(text = element_text(size=16),
           axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
           legend.title = element_blank(),
           legend.background = element_rect(fill=alpha("white",0.90),
                                                            size=0, linetype="dotted",
                                                            colour = "white"),
           legend.text=element_text(size=16))

    ggsave("../figures/token_swu_family.pdf", width = 11.7, height = 6.2)

By language Genus

In [12]:
rand_swu_stats_gen = (rand_dat_inc_cg.groupby(["Language_Genus","target_child_id","transcript_id","contingent"])
                                  .swu
                                  .agg(["mean"])
                                  .reset_index())
rand_swu_stats_gen = rand_swu_stats_gen.rename({'mean': 'means'}, axis=1)

In [13]:
%%R -i rand_swu_stats_gen

library('ggplot2')

p <- ggplot(rand_swu_stats_gen, aes(x = contingent, y = means, color = Language_Genus)) +
     stat_summary(fun.y=mean, geom="point", shape=19, size=1.75) + 
     stat_summary(fun.data = mean_se, geom = "errorbar", size=1.25, width = .5) +
     facet_wrap(. ~ Language_Genus,ncol=8) + 
     ylim(0, .5) +
     labs(y = "Prop. single word utt.", x = "") +
     theme_classic() +
     theme(text = element_text(size=16),
           axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
           legend.title = element_blank(),
           legend.background = element_rect(fill=alpha("white",0.90),
                                                            size=0, linetype="dotted",
                                                            colour = "white"),
           legend.text=element_text(size=16))
    ggsave("../figures/token_swu_genus.pdf", width = 11.7, height = 6.2)

By agglutinative status

In [14]:
rand_swu_stats_aggl = (rand_dat_inc_cg.groupby(["Agglutinative","target_child_id","transcript_id","contingent"])
                                  .swu
                                  .agg(["mean"])
                                  .reset_index())
rand_swu_stats_aggl =  rand_swu_stats_aggl.rename({'mean': 'means'}, axis=1)

In [15]:
%%R -i rand_swu_stats_aggl

library('ggplot2')

p <- ggplot(rand_swu_stats_aggl, aes(x = contingent, y = means, color = Agglutinative)) +
     stat_summary(fun.y=mean, geom="point", shape=19, size=1.75) + 
     stat_summary(fun.data = mean_se, geom = "errorbar", size=1.25, width = .5) +
     facet_wrap(. ~ Agglutinative,ncol=2) + 
     ylim(0, .5) +
     labs(y = "Prop. single word utt.", x = "") +
     theme_classic() +
     theme(text = element_text(size=16),
           axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
           legend.title = element_blank(),
           legend.background = element_rect(fill=alpha("white",0.90),
                                                            size=0, linetype="dotted",
                                                            colour = "white"),
           legend.text=element_text(size=16))

    ggsave("../figures/token_swu_aggl.pdf", width = 11.7, height = 6.2)

----

#### SWU mixed models

In [16]:
deu=rand_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][rand_dat_inc_cg["language"]=="deu"]
eng=rand_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][rand_dat_inc_cg["language"]=="eng"]
est=rand_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][rand_dat_inc_cg["language"]=="est"]
fas=rand_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][rand_dat_inc_cg["language"]=="fas"]
fra=rand_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][rand_dat_inc_cg["language"]=="fra"]
hrv=rand_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][rand_dat_inc_cg["language"]=="hrv"]
jpn=rand_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][rand_dat_inc_cg["language"]=="jpn"]
kor=rand_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][rand_dat_inc_cg["language"]=="kor"]
nor=rand_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][rand_dat_inc_cg["language"]=="nor"]
pol=rand_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][rand_dat_inc_cg["language"]=="pol"]
por=rand_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][rand_dat_inc_cg["language"]=="por"]
spa=rand_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][rand_dat_inc_cg["language"]=="spa"]
swe=rand_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][rand_dat_inc_cg["language"]=="swe"]
zho=rand_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][rand_dat_inc_cg["language"]=="zho"]

In [17]:
%%R

library("lme4")
library("broom")
library("emmeans")
library("lmerTest")
library("tidyverse")

effect_sizes <- data.frame(matrix(ncol = 2, nrow = 0))
cols <- c("Language_name", "rand_effect_size")
colnames(effect_sizes) <- cols

R[write to console]: Loading required package: Matrix

R[write to console]: 
Attaching package: ‘lmerTest’


R[write to console]: The following object is masked from ‘package:lme4’:

    lmer


R[write to console]: The following object is masked from ‘package:stats’:

    step


R[write to console]: ── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.1 ──

R[write to console]: [32m✔[39m [34mtibble [39m 3.1.6     [32m✔[39m [34mdplyr  [39m 1.0.7
[32m✔[39m [34mtidyr  [39m 1.1.4     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 2.1.1     [32m✔[39m [34mforcats[39m 0.5.1
[32m✔[39m [34mpurrr  [39m 0.3.4     

R[write to console]: ── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mtidyr[39m::[32mexpand()[39m masks [34mMatrix[39m::expand()
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m 

In [18]:
%%R -i deu

lm2_1 <- lmer(swu ~ contingent + (1|target_child_id) + (1|transcript_id),data=deu)
emm2_1<-emmeans(lm2_1,pairwise~contingent)
pval<-summary(emm2_1$contrasts)$p.value
print(c(emm2_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# plot(emm2_1)
# summary(emmeans(lm2_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm2_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

deu_lname <- deu$language[1]

deu_eff <- eff_size(emm2_1,sigma = sigma(lm2_1), edf = df.residual(lm2_1))

deu_eff <- summary(deu_eff)$effect.size

effect_sizes[nrow(effect_sizes)+1,] <- c(deu_lname,deu_eff)
effect_sizes

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 4386' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 4386)' or larger];
but be warned that this may result in large computation time and memory use.

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 4386' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 4386)' or larger];
but be warned that this may result in large computation time and memory use.



[[1]]
 contrast                      estimate     SE  df z.ratio p.value
 contingent - (non-contingent)   0.0959 0.0127 Inf   7.524  <.0001

Degrees-of-freedom method: asymptotic 

[[2]]
[1] 6.616929e-14

[1] 9.263701e-13


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



  Language_name rand_effect_size
1           deu 0.23832519592315


In [19]:
%%R -i eng

lm3_1 <- lmer(swu ~ contingent + (1|target_child_id) + (1|transcript_id),data=eng, REML= FALSE)
emm3_1<-emmeans(lm3_1,pairwise~contingent)
pval<-summary(emm3_1$contrasts)$p.value
print(c(emm3_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm3_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm3_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

eng_lname <- eng$language[1]

eng_eff <- eff_size(emm3_1,sigma = sigma(lm3_1), edf = df.residual(lm3_1))

eng_eff <- summary(eng_eff)$effect.size

effect_sizes[nrow(effect_sizes)+1,] <- c(eng_lname,eng_eff)
effect_sizes

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 113635' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 113635)' or larger];
but be warned that this may result in large computation time and memory use.

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 113635' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 113635)' or larger];
but be warned that this may result in large computation time and memory use.



[[1]]
 contrast                      estimate      SE  df z.ratio p.value
 contingent - (non-contingent)   0.0984 0.00352 Inf  27.941  <.0001

Degrees-of-freedom method: asymptotic 

[[2]]
[1] 0

[1] 0


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



  Language_name  rand_effect_size
1           deu  0.23832519592315
2           eng 0.248424360241645


In [20]:
%%R -i est

lm4_1 <- lmer(swu ~ contingent + (1|target_child_id) + (1|transcript_id),data=est, REML= FALSE)
emm4_1<-emmeans(lm4_1,pairwise~contingent)
pval<-summary(emm4_1$contrasts)$p.value
print(c(emm4_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm4_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm4_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

est_lname <- est$language[1]

est_eff <- eff_size(emm4_1,sigma = sigma(lm4_1), edf = df.residual(lm4_1))

est_eff <- summary(est_eff)$effect.size

effect_sizes[nrow(effect_sizes)+1,] <- c(est_lname,est_eff)
effect_sizes

R[write to console]: Cannot use mode = "kenward-roger" because *pbkrtest* package is not installed



[[1]]
 contrast                      estimate    SE   df t.ratio p.value
 contingent - (non-contingent)   0.0294 0.015 2447   1.958  0.0503

Degrees-of-freedom method: satterthwaite 

[[2]]
[1] 0.05033405

[1] 0.7046767


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



  Language_name   rand_effect_size
1           deu   0.23832519592315
2           eng  0.248424360241645
3           est 0.0841060443546248


In [21]:
%%R -i fas

lm5_1 <- lmer(swu ~ contingent + (1|transcript_id),data=fas, REML= FALSE)
emm5_1<-emmeans(lm5_1,pairwise~contingent)
pval<-summary(emm5_1$contrasts)$p.value
print(c(emm5_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm5_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm5_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

fas_lname <- fas$language[1]

effect_sizes[nrow(effect_sizes)+1,] <- c(fas_lname,NaN)
effect_sizes

R[write to console]: boundary (singular) fit: see ?isSingular

R[write to console]: Cannot use mode = "kenward-roger" because *pbkrtest* package is not installed



[[1]]
 contrast                      estimate     SE  df t.ratio p.value
 contingent - (non-contingent)  0.00181 0.0528 548   0.034  0.9727

Degrees-of-freedom method: satterthwaite 

[[2]]
[1] 0.9726523

[1] 1
  Language_name   rand_effect_size
1           deu   0.23832519592315
2           eng  0.248424360241645
3           est 0.0841060443546248
4           fas                NaN


In [22]:
%%R -i fra
lm6_1 <- lmer(swu ~ contingent + (1|target_child_id) + (1|transcript_id),data=fra, REML= FALSE)
emm6_1<-emmeans(lm6_1,pairwise~contingent)
pval<-summary(emm6_1$contrasts)$p.value
print(c(emm6_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm6_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm6_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

fra_lname <- fra$language[1]

fra_eff <- eff_size(emm6_1,sigma = sigma(lm6_1), edf = df.residual(lm6_1))

fra_eff <- summary(fra_eff)$effect.size

effect_sizes[nrow(effect_sizes)+1,] <- c(fra_lname,fra_eff)
effect_sizes

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 21847' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 21847)' or larger];
but be warned that this may result in large computation time and memory use.

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 21847' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 21847)' or larger];
but be warned that this may result in large computation time and memory use.



[[1]]
 contrast                      estimate      SE  df z.ratio p.value
 contingent - (non-contingent)   0.0363 0.00644 Inf   5.633  <.0001

Degrees-of-freedom method: asymptotic 

[[2]]
[1] 1.766574e-08

[1] 2.473203e-07


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



  Language_name   rand_effect_size
1           deu   0.23832519592315
2           eng  0.248424360241645
3           est 0.0841060443546248
4           fas                NaN
5           fra 0.0931069943179454


In [23]:
%%R -i hrv

lm7_1 <- lmer(swu ~ contingent + (1|target_child_id) + (1|transcript_id),data=hrv, REML= FALSE)
emm7_1<-emmeans(lm7_1,pairwise~contingent)
pval<-summary(emm7_1$contrasts)$p.value
print(c(emm7_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm7_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm7_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

hrv_lname <- hrv$language[1]

hrv_eff <- eff_size(emm7_1,sigma = sigma(lm7_1), edf = df.residual(lm7_1))

hrv_eff <- summary(hrv_eff)$effect.size

effect_sizes[nrow(effect_sizes)+1,] <- c(hrv_lname,hrv_eff)
effect_sizes

R[write to console]: boundary (singular) fit: see ?isSingular

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 6427' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 6427)' or larger];
but be warned that this may result in large computation time and memory use.

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 6427' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 6427)' or larger];
but be warned that this may result in large computation time and memory use.



[[1]]
 contrast                      estimate     SE  df z.ratio p.value
 contingent - (non-contingent)   0.0689 0.0104 Inf   6.614  <.0001

Degrees-of-freedom method: asymptotic 

[[2]]
[1] 3.745693e-11

[1] 5.24397e-10


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



  Language_name   rand_effect_size
1           deu   0.23832519592315
2           eng  0.248424360241645
3           est 0.0841060443546248
4           fas                NaN
5           fra 0.0931069943179454
6           hrv  0.169570057494665


In [24]:
%%R -i jpn

lm8_1 <- lmer(swu ~ contingent + (1|target_child_id) + (1|transcript_id),data=jpn, REML= FALSE)
emm8_1<-emmeans(lm8_1,pairwise~contingent)
pval<-summary(emm8_1$contrasts)$p.value
print(c(emm8_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm8_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm8_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

jpn_lname <- jpn$language[1]

jpn_eff <- eff_size(emm8_1,sigma = sigma(lm8_1), edf = df.residual(lm8_1))

jpn_eff <- summary(jpn_eff)$effect.size

effect_sizes[nrow(effect_sizes)+1,] <- c(jpn_lname,jpn_eff)
effect_sizes

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 21545' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 21545)' or larger];
but be warned that this may result in large computation time and memory use.

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 21545' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 21545)' or larger];
but be warned that this may result in large computation time and memory use.



[[1]]
 contrast                      estimate      SE  df z.ratio p.value
 contingent - (non-contingent)    0.183 0.00684 Inf  26.732  <.0001

Degrees-of-freedom method: asymptotic 

[[2]]
[1] 0

[1] 0


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



  Language_name   rand_effect_size
1           deu   0.23832519592315
2           eng  0.248424360241645
3           est 0.0841060443546248
4           fas                NaN
5           fra 0.0931069943179454
6           hrv  0.169570057494665
7           jpn  0.383821342163643


In [25]:
%%R -i kor

lm9_1 <- lmer(swu ~ contingent + (1|transcript_id),data=kor, REML= FALSE)
emm9_1<-emmeans(lm9_1,pairwise~contingent)
pval<-summary(emm9_1$contrasts)$p.value
print(c(emm9_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm9_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm9_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

kor_lname <- kor$language[1]

kor_eff <- eff_size(emm9_1,sigma = sigma(lm9_1), edf = df.residual(lm9_1))

kor_eff <- summary(kor_eff)$effect.size

effect_sizes[nrow(effect_sizes)+1,] <- c(kor_lname,kor_eff)
effect_sizes

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 4518' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 4518)' or larger];
but be warned that this may result in large computation time and memory use.

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 4518' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 4518)' or larger];
but be warned that this may result in large computation time and memory use.



[[1]]
 contrast                      estimate     SE  df z.ratio p.value
 contingent - (non-contingent)   0.0978 0.0107 Inf   9.113  <.0001

Degrees-of-freedom method: asymptotic 

[[2]]
[1] 2.520206e-14

[1] 3.528289e-13


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



  Language_name   rand_effect_size
1           deu   0.23832519592315
2           eng  0.248424360241645
3           est 0.0841060443546248
4           fas                NaN
5           fra 0.0931069943179454
6           hrv  0.169570057494665
7           jpn  0.383821342163643
8           kor  0.296007909028206


In [26]:
%%R -i nor

lm10_1 <- lmer(swu ~ contingent + (1|target_child_id) + (1|transcript_id),data=nor, REML= FALSE)
emm10_1<-emmeans(lm10_1,pairwise~contingent)
pval<-summary(emm10_1$contrasts)$p.value
print(c(emm10_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm10_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm10_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

nor_lname <- nor$language[1]

nor_eff <- eff_size(emm10_1,sigma = sigma(lm10_1), edf = df.residual(lm10_1))

nor_eff <- summary(nor_eff)$effect.size

effect_sizes[nrow(effect_sizes)+1,] <- c(nor_lname,nor_eff)
effect_sizes

R[write to console]: boundary (singular) fit: see ?isSingular

R[write to console]: Cannot use mode = "kenward-roger" because *pbkrtest* package is not installed



[[1]]
 contrast                      estimate     SE   df t.ratio p.value
 contingent - (non-contingent)    0.102 0.0208 1903   4.881  <.0001

Degrees-of-freedom method: satterthwaite 

[[2]]
[1] 1.145097e-06

[1] 1.603136e-05


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



  Language_name   rand_effect_size
1           deu   0.23832519592315
2           eng  0.248424360241645
3           est 0.0841060443546248
4           fas                NaN
5           fra 0.0931069943179454
6           hrv  0.169570057494665
7           jpn  0.383821342163643
8           kor  0.296007909028206
9           nor  0.250014154267064


In [27]:
%%R -i pol
 
# simple linear model (no random effects, because only 1 transcript from 1 sub)

lm11_1 <- lm(swu ~ contingent,data=pol, REML= FALSE)
emm11_1<-emmeans(lm11_1,pairwise~contingent)
pval<-summary(emm11_1$contrasts)$p.value
print(c(emm11_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm11_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm11_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

pol_lname <- pol$language[1]

effect_sizes[nrow(effect_sizes)+1,] <- c(pol_lname,NaN)
effect_sizes

[[1]]
 contrast                      estimate     SE df t.ratio p.value
 contingent - (non-contingent)  0.00958 0.0805 92   0.119  0.9056


[[2]]
[1] 0.9056191

[1] 1
   Language_name   rand_effect_size
1            deu   0.23832519592315
2            eng  0.248424360241645
3            est 0.0841060443546248
4            fas                NaN
5            fra 0.0931069943179454
6            hrv  0.169570057494665
7            jpn  0.383821342163643
8            kor  0.296007909028206
9            nor  0.250014154267064
10           pol                NaN


In [28]:
%%R -i por

lm12_1 <- lmer(swu ~ contingent + (1|target_child_id) + (1|transcript_id),data=por, REML= FALSE)
emm12_1<-emmeans(lm12_1,pairwise~contingent)
pval<-summary(emm12_1$contrasts)$p.value
print(c(emm12_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm12_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm12_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

por_lname <- por$language[1]

por_eff <- eff_size(emm12_1,sigma = sigma(lm12_1), edf = df.residual(lm12_1))

por_eff <- summary(por_eff)$effect.size

effect_sizes[nrow(effect_sizes)+1,] <- c(por_lname,por_eff)
effect_sizes

R[write to console]: Cannot use mode = "kenward-roger" because *pbkrtest* package is not installed



[[1]]
 contrast                      estimate     SE   df t.ratio p.value
 contingent - (non-contingent)   0.0627 0.0135 2982   4.645  <.0001

Degrees-of-freedom method: satterthwaite 

[[2]]
[1] 3.539388e-06

[1] 4.955143e-05


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



   Language_name   rand_effect_size
1            deu   0.23832519592315
2            eng  0.248424360241645
3            est 0.0841060443546248
4            fas                NaN
5            fra 0.0931069943179454
6            hrv  0.169570057494665
7            jpn  0.383821342163643
8            kor  0.296007909028206
9            nor  0.250014154267064
10           pol                NaN
11           por  0.176616900871351


In [29]:
%%R -i spa

lm13_1 <- lmer(swu ~ contingent + (1|target_child_id) + (1|transcript_id),data=spa, REML= FALSE)
emm13_1<-emmeans(lm13_1,pairwise~contingent)
pval<-summary(emm13_1$contrasts)$p.value
print(c(emm13_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm13_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm13_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

spa_lname <- spa$language[1]

spa_eff <- eff_size(emm13_1,sigma = sigma(lm13_1), edf = df.residual(lm13_1))

spa_eff <- summary(spa_eff)$effect.size

effect_sizes[nrow(effect_sizes)+1,] <- c(spa_lname,spa_eff)
effect_sizes

R[write to console]: boundary (singular) fit: see ?isSingular

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 4469' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 4469)' or larger];
but be warned that this may result in large computation time and memory use.

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 4469' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 4469)' or larger];
but be warned that this may result in large computation time and memory use.



[[1]]
 contrast                      estimate     SE  df z.ratio p.value
 contingent - (non-contingent)   0.0707 0.0121 Inf   5.860  <.0001

Degrees-of-freedom method: asymptotic 

[[2]]
[1] 4.619836e-09

[1] 6.467771e-08


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



   Language_name   rand_effect_size
1            deu   0.23832519592315
2            eng  0.248424360241645
3            est 0.0841060443546248
4            fas                NaN
5            fra 0.0931069943179454
6            hrv  0.169570057494665
7            jpn  0.383821342163643
8            kor  0.296007909028206
9            nor  0.250014154267064
10           pol                NaN
11           por  0.176616900871351
12           spa  0.176382716285924


In [30]:
%%R -i swe


lm14_1 <- lmer(swu ~ contingent + (1|target_child_id) + (1|transcript_id),data=swe, REML= FALSE)
emm14_1<-emmeans(lm14_1,pairwise~contingent)
pval<-summary(emm14_1$contrasts)$p.value
print(c(emm14_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm14_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm14_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

swe_lname <- swe$language[1]

swe_eff <- eff_size(emm14_1,sigma = sigma(lm14_1), edf = df.residual(lm14_1))

swe_eff <- summary(swe_eff)$effect.size

effect_sizes[nrow(effect_sizes)+1,] <- c(swe_lname,swe_eff)
effect_sizes

R[write to console]: Cannot use mode = "kenward-roger" because *pbkrtest* package is not installed



[[1]]
 contrast                      estimate     SE   df t.ratio p.value
 contingent - (non-contingent)    0.134 0.0174 2679   7.702  <.0001

Degrees-of-freedom method: satterthwaite 

[[2]]
[1] 0

[1] 0


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



   Language_name   rand_effect_size
1            deu   0.23832519592315
2            eng  0.248424360241645
3            est 0.0841060443546248
4            fas                NaN
5            fra 0.0931069943179454
6            hrv  0.169570057494665
7            jpn  0.383821342163643
8            kor  0.296007909028206
9            nor  0.250014154267064
10           pol                NaN
11           por  0.176616900871351
12           spa  0.176382716285924
13           swe  0.308093961952412


In [31]:
%%R -i zho

lm15_1 <- lmer(swu ~ contingent + (1|transcript_id),data=zho, REML= FALSE)
emm15_1<-emmeans(lm15_1,pairwise~contingent)
pval<-summary(emm15_1$contrasts)$p.value
print(c(emm15_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm15_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm15_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

zho_lname <- zho$language[1]

effect_sizes[nrow(effect_sizes)+1,] <- c(zho_lname,NaN)
effect_sizes

R[write to console]: Cannot use mode = "kenward-roger" because *pbkrtest* package is not installed



[[1]]
 contrast                      estimate     SE  df t.ratio p.value
 contingent - (non-contingent)   0.0929 0.0437 343   2.127  0.0341

Degrees-of-freedom method: satterthwaite 

[[2]]
[1] 0.03412619

[1] 0.4777666
   Language_name   rand_effect_size
1            deu   0.23832519592315
2            eng  0.248424360241645
3            est 0.0841060443546248
4            fas                NaN
5            fra 0.0931069943179454
6            hrv  0.169570057494665
7            jpn  0.383821342163643
8            kor  0.296007909028206
9            nor  0.250014154267064
10           pol                NaN
11           por  0.176616900871351
12           spa  0.176382716285924
13           swe  0.308093961952412
14           zho                NaN


In [32]:
%%R
write.csv(x=effect_sizes,'../data/SWU_effect_sizes.csv', row.names = FALSE)