# Natural Statistics Cross-linguistic: 

#### Processing time hypothesis test

----

In [22]:
import pandas as pd
import numpy as np
import sys
sys.path.insert(0, "data_proc")
import analytic_proc
import warnings
warnings.filterwarnings('ignore')

In [23]:
rand_dat_inc = pd.read_csv("../data/rand_dat_inc_master.csv",index_col=0,low_memory=False)

In [24]:
rand_dat_inc = rand_dat_inc.sort_values(by=['transcript_id','media_start','caregiver']).reset_index(drop=True)

In [25]:
# swap 51136 and 51137
# this way the infant vocalization is the row directly before its num_tokens response

c = rand_dat_inc.iloc[51136]

temp = rand_dat_inc.iloc[51137].copy()
rand_dat_inc.iloc[51137] = c
rand_dat_inc.iloc[51136] = temp

In [26]:
# subset to all contingent caregiver utterances & the previous target_child vocalization

proc_t_dat = rand_dat_inc[(rand_dat_inc["contingent"].shift(-1)==1) | (rand_dat_inc["contingent"]==1)]

In [27]:
# subtract onsets from onsets to get latency

proc_t_dat["latency"] = proc_t_dat["media_start"]-proc_t_dat["media_start"].shift(-1)
proc_t_dat["latency"] = proc_t_dat["latency"].shift(1)

In [28]:
# detect any instances where infants' vocalization was not detected

# move media_start and media_end to furthest rightmost for debugging

cols = list(proc_t_dat.columns.values) 
cols.pop(cols.index('media_start')) 
cols.pop(cols.index('media_end')) 
proc_t_dat = proc_t_dat[cols+['media_start','media_end']] 

cols = list(rand_dat_inc.columns.values) 
cols.pop(cols.index('media_start')) 
cols.pop(cols.index('media_end')) 
rand_dat_inc = rand_dat_inc[cols+['media_start','media_end']]

In [29]:
debug=proc_t_dat[(proc_t_dat["caregiver"]==proc_t_dat["caregiver"].shift(-1))]

In [30]:
# convert child latencies to nans just to be safe 

proc_t_dat.loc[proc_t_dat["caregiver"]=="target_child","latency"]=np.nan

In [31]:
proc_t_dat = proc_t_dat[proc_t_dat["caregiver"]=="caregiver"]

In [32]:
# # subset using a buffer time

# buffer = -.3 # use 300 milliseconds
# proc_t_dat = proc_t_dat[proc_t_dat["latency"]<buffer] # if less than buffer, it was likely preplanned

In [33]:
latency = proc_t_dat[["num_tokens","Language_name","latency"]]
latency["Language_name"] = latency["Language_name"].astype(object)
latency.dtypes

num_tokens         int64
Language_name     object
latency          float64
dtype: object

In [34]:
latency["latency"] = latency["latency"]*(-1)

----
### Figures & Statistics


#### Utterance level

In [35]:
%load_ext rpy2.ipython

The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython


In [36]:
%%R -i latency

library('ggplot2')
library('repr')
options(repr.plot.width=8.27, repr.plot.height=11.69, repr.plot.res = 1200)

p <- ggplot(latency, aes(x=latency,
                    y=num_tokens,
                    color = Language_name)) +
     geom_point(shape=21,
                size=2) +
     facet_wrap(. ~ Language_name,ncol = 7) +
     geom_smooth(method='lm',
                 se = FALSE,
                 colour="black") +
     xlim(0, 3) +
     labs(y = "Length of Utterance", x = "Latency to Respond to Babbling") +
     theme_classic() +
     theme(text = element_text(size=16),
           axis.text.x = element_text(vjust = 0.5, hjust=1),
           aspect.ratio=1,
           legend.position = "bottom",
           legend.title = element_blank(),
           legend.background = element_rect(fill=alpha("white",0.90),
                                            size=0, linetype="dotted",
                                            colour = "white"),
           legend.text=element_text(size=16))
    
     ggsave("../figures/latency_x_utterance_length_utt_level.pdf", width = 11.69, height = 8.27)
#     ggsave("../figures/latency_x_utterance_length_utt_level_w_buffer.pdf", width = 11.69, height = 8.27)

R[write to console]: `geom_smooth()` using formula 'y ~ x'



In [37]:
%%R -i latency

library('ggplot2')
library('repr')
options(repr.plot.width=6, repr.plot.height=12)

deu_label <- data.frame(latency=c(1.5),num_tokens = c(35),Language_name="German")
eng_label <- data.frame(latency=c(1.5),num_tokens = c(35),Language_name="English")
est_ns_label <- data.frame(latency=c(1.5),num_tokens = c(35),Language_name="Estonian")
fas_ns_label <- data.frame(latency=c(1.5),num_tokens = c(35),Language_name="Persian")
fra_label <- data.frame(latency=c(1.5),num_tokens = c(35),Language_name="French")
hrv_label <- data.frame(latency=c(1.5),num_tokens = c(35),Language_name="Croatian")
jpn_label <- data.frame(latency=c(1.5),num_tokens = c(35),Language_name="Japanese")
kor_label <- data.frame(latency=c(1.5),num_tokens = c(35),Language_name="Korean")
nor_label <- data.frame(latency=c(1.5),num_tokens = c(35),Language_name="Norwegian")
pol_ns_label <- data.frame(latency=c(1.5),num_tokens = c(35),Language_name="Polish")
por_ns_label <- data.frame(latency=c(1.5),num_tokens = c(35),Language_name="Portuguese")
spa_label <- data.frame(latency=c(1.5),num_tokens = c(35),Language_name="Spanish")
swe_label <- data.frame(latency=c(1.5),num_tokens = c(35),Language_name="Swedish")
zho_label <- data.frame(latency=c(1.5),num_tokens = c(35),Language_name="Mandarin")


p <- ggplot(latency, aes(x=latency, y=num_tokens, color = Language_name)) +
     geom_point(shape=21, size=2) +
     facet_wrap(. ~ Language_name,ncol = 7) +
#      with buffer:
#      geom_text(data = deu_label,label = "*",size=8,color="black") +
#      geom_text(data = eng_label,label = "***",size=8,color="black") +
#      geom_text(data = est_ns_label,label = "ns",size=4,color="black",fontface = "italic") + 
#      geom_text(data = fas_ns_label,label = "ns",size=4,color="black",fontface = "italic") +
#      geom_text(data = fra_label,label = "ns",size=4,color="black",fontface = "italic") +
#      geom_text(data = hrv_label,label = "ns",size=4,color="black",fontface = "italic") +
#      geom_text(data = jpn_label,label = "***",size=8,color="black") + 
#      geom_text(data = kor_label,label = "ns",size=4,color="black",fontface = "italic") +  
#      geom_text(data = nor_label,label = "***",size=8,color="black") +  
#      geom_text(data = pol_ns_label,label = "", size=4,color="black",fontface = "italic") +  
#      geom_text(data = por_ns_label,label = "ns", size=4,color="black",fontface = "italic") +  
#      geom_text(data = spa_label,label = "***",size=8,color="black") + 
#      geom_text(data = swe_label,label = "ns", size=4,color="black",fontface = "italic") + 
#      geom_text(data = zho_label,label = "ns", size=4,color="black",fontface = "italic") +
#      no buffer:
     geom_text(data = deu_label,label = "ns",size=4,color="black",fontface = "italic") +
     geom_text(data = eng_label,label = "***",size=8,color="black") +
     geom_text(data = est_ns_label,label = "ns",size=4,color="black",fontface = "italic") + 
     geom_text(data = fas_ns_label,label = "ns",size=4,color="black",fontface = "italic") +
     geom_text(data = fra_label,label = "ns",size=4,color="black",fontface = "italic") +
     geom_text(data = hrv_label,label = "ns",size=4,color="black",fontface = "italic") +
     geom_text(data = jpn_label,label = "ns",size=4,color="black",fontface = "italic") + 
     geom_text(data = kor_label,label = "ns",size=4,color="black",fontface = "italic") +  
     geom_text(data = nor_label,label = "***",size=8,color="black") +  
     geom_text(data = pol_ns_label,label = "", size=4,color="black",fontface = "italic") +  
     geom_text(data = por_ns_label,label = "ns", size=4,color="black",fontface = "italic") +  
     geom_text(data = spa_label,label = "***",size=8,color="black") + 
     geom_text(data = swe_label,label = "ns", size=4,color="black",fontface = "italic") + 
     geom_text(data = zho_label,label = "ns", size=4,color="black",fontface = "italic") +
     geom_smooth(method='lm', se = FALSE, colour="black") +
     xlim(0, 3) +
     labs(y = "Length of Utterance", x = "Latency to Respond to Babbling") +
     theme_classic() +
     theme(text = element_text(size=16),
           axis.text.x = element_text(vjust = 0.5, hjust=1),
           aspect.ratio=1,
           legend.position = "bottom",
           legend.title = element_blank(),
           legend.background = element_rect(fill=alpha("white",0.90),
                                            size=0, linetype="dotted",
                                            colour = "white"),
           legend.text=element_text(size=16))
     ggsave("../figures/latency_x_utterance_length_utt_level_stat.pdf", width = 11.69, height = 8.27)
#      ggsave("../figures/latency_x_utterance_length_utt_level_w_buffer_stat.pdf", width = 11.69, height = 8.27)

R[write to console]: `geom_smooth()` using formula 'y ~ x'



In [38]:
%%R

deu_est_label <- data.frame(latency=c(.5),num_tokens = c(45),Language_name="German")
eng_est_label <- data.frame(latency=c(.5),num_tokens = c(45),Language_name="English")
est_est_label <- data.frame(latency=c(.5),num_tokens = c(45),Language_name="Estonian")
fas_est_label <- data.frame(latency=c(.5),num_tokens = c(45),Language_name="Persian")
fra_est_label <- data.frame(latency=c(.5),num_tokens = c(45),Language_name="French")
hrv_est_label <- data.frame(latency=c(.5),num_tokens = c(45),Language_name="Croatian")
jpn_est_label <- data.frame(latency=c(.5),num_tokens = c(45),Language_name="Japanese")
kor_est_label <- data.frame(latency=c(.5),num_tokens = c(45),Language_name="Korean")
nor_est_label <- data.frame(latency=c(.5),num_tokens = c(45),Language_name="Norwegian")
por_est_label <- data.frame(latency=c(.5),num_tokens = c(45),Language_name="Portuguese")
spa_est_label <- data.frame(latency=c(.5),num_tokens = c(45),Language_name="Spanish")
swe_est_label <- data.frame(latency=c(.5),num_tokens = c(45),Language_name="Swedish")
zho_est_label <- data.frame(latency=c(.5),num_tokens = c(45),Language_name="Mandarin")

# with buffer:
# p <- p + geom_text(data = deu_est_label,label = "r = .08",size=4,color="black") +
#          geom_text(data = eng_est_label,label = "r = .08",size=4,color="black") +
#          geom_text(data = est_est_label,label = "",size=4,color="black") +
#          geom_text(data = fas_est_label,label = "",size=4,color="black") +
#          geom_text(data = fra_est_label,label = "",size=4,color="black") +
#          geom_text(data = hrv_est_label,label = "",size=4,color="black") +
#          geom_text(data = jpn_est_label,label = "r = .04",size=4,color="black") +
#          geom_text(data = kor_est_label,label = "",size=4,color="black") +
#          geom_text(data = nor_est_label,label = "r = -.08",size=4,color="black") +
#          geom_text(data = por_est_label,label = "",size=4,color="black") +
#          geom_text(data = spa_est_label,label = "r = .08",size=4,color="black") +
#          geom_text(data = swe_est_label,label = "",size=4,color="black") +
#          geom_text(data = zho_est_label,label = "",size=4,color="black")

# no buffer:
p <- p + geom_text(data = deu_est_label,label = "",size=4,color="black") +
         geom_text(data = eng_est_label,label = "r = .08",size=4,color="black") +
         geom_text(data = est_est_label,label = "",size=4,color="black") +
         geom_text(data = fas_est_label,label = "",size=4,color="black") +
         geom_text(data = fra_est_label,label = "",size=4,color="black") +
         geom_text(data = hrv_est_label,label = "",size=4,color="black") +
         geom_text(data = jpn_est_label,label = "",size=4,color="black") +
         geom_text(data = kor_est_label,label = "",size=4,color="black") +
         geom_text(data = nor_est_label,label = "r = -.1",size=4,color="black") +
         geom_text(data = por_est_label,label = "",size=4,color="black") +
         geom_text(data = spa_est_label,label = "r = .08",size=4,color="black") +
         geom_text(data = swe_est_label,label = "",size=4,color="black") +
         geom_text(data = zho_est_label,label = "",size=4,color="black")
         
ggsave("../figures/latency_x_utterance_length_utt_level_stat_r.pdf", width = 11.69, height = 8.27)
# ggsave("../figures/latency_x_utterance_length_utt_level_w_buffer_stat_r.pdf", width = 11.69, height = 8.27)

R[write to console]: `geom_smooth()` using formula 'y ~ x'



In [39]:
%%R

deu_n_label <- data.frame(latency=c(2),num_tokens = c(45),Language_name="German")
eng_n_label <- data.frame(latency=c(1.8),num_tokens = c(45),Language_name="English")
est_n_label <- data.frame(latency=c(2),num_tokens = c(45),Language_name="Estonian")
fas_n_label <- data.frame(latency=c(2),num_tokens = c(45),Language_name="Persian")
fra_n_label <- data.frame(latency=c(1.8),num_tokens = c(45),Language_name="French")
hrv_n_label <- data.frame(latency=c(2),num_tokens = c(45),Language_name="Croatian")
jpn_n_label <- data.frame(latency=c(1.8),num_tokens = c(45),Language_name="Japanese")
kor_n_label <- data.frame(latency=c(2),num_tokens = c(45),Language_name="Korean")
nor_n_label <- data.frame(latency=c(2),num_tokens = c(45),Language_name="Norwegian")
pol_n_label <- data.frame(latency=c(2),num_tokens = c(45),Language_name="Polish")
por_n_label <- data.frame(latency=c(2),num_tokens = c(45),Language_name="Portuguese")
spa_n_label <- data.frame(latency=c(2),num_tokens = c(45),Language_name="Spanish")
swe_n_label <- data.frame(latency=c(2),num_tokens = c(45),Language_name="Swedish")
zho_n_label <- data.frame(latency=c(2),num_tokens = c(45),Language_name="Mandarin")

deu_sz_label <- data.frame(latency=c(2.5),num_tokens = c(45),Language_name="German")
eng_sz_label <- data.frame(latency=c(2.5),num_tokens = c(45),Language_name="English")
est_sz_label <- data.frame(latency=c(2.5),num_tokens = c(45),Language_name="Estonian")
fas_sz_label <- data.frame(latency=c(2.5),num_tokens = c(45),Language_name="Persian")
fra_sz_label <- data.frame(latency=c(2.5),num_tokens = c(45),Language_name="French")
hrv_sz_label <- data.frame(latency=c(2.5),num_tokens = c(45),Language_name="Croatian")
jpn_sz_label <- data.frame(latency=c(2.5),num_tokens = c(45),Language_name="Japanese")
kor_sz_label <- data.frame(latency=c(2.5),num_tokens = c(45),Language_name="Korean")
nor_sz_label <- data.frame(latency=c(2.5),num_tokens = c(45),Language_name="Norwegian")
pol_sz_label <- data.frame(latency=c(2.5),num_tokens = c(45),Language_name="Polish")
por_sz_label <- data.frame(latency=c(2.5),num_tokens = c(45),Language_name="Portuguese")
spa_sz_label <- data.frame(latency=c(2.5),num_tokens = c(45),Language_name="Spanish")
swe_sz_label <- data.frame(latency=c(2.5),num_tokens = c(45),Language_name="Swedish")
zho_sz_label <- data.frame(latency=c(2.5),num_tokens = c(45),Language_name="Mandarin")

p <- p + geom_text(data = deu_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = eng_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = est_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = fas_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = fra_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = hrv_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = jpn_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = kor_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = nor_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = pol_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = por_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = spa_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = swe_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = zho_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = deu_sz_label,label = " = 39",size=4,color="black") +
         geom_text(data = eng_sz_label,label = " = 1005",size=4,color="black") +
         geom_text(data = est_sz_label,label = " = 22",size=4,color="black") +
         geom_text(data = fas_sz_label,label = " = 12",size=4,color="black") +
         geom_text(data = fra_sz_label,label = " = 303",size=4,color="black") +
         geom_text(data = hrv_sz_label,label = " = 79",size=4,color="black") +
         geom_text(data = jpn_sz_label,label = " = 139",size=4,color="black") +
         geom_text(data = kor_sz_label,label = " = 37",size=4,color="black") +
         geom_text(data = nor_sz_label,label = " = 56",size=4,color="black") +
         geom_text(data = pol_sz_label,label = " = 1",size=4,color="black") +
         geom_text(data = por_sz_label,label = " = 24",size=4,color="black") +
         geom_text(data = spa_sz_label,label = " = 31",size=4,color="black") +
         geom_text(data = swe_sz_label,label = " = 16",size=4,color="black") +
         geom_text(data = zho_sz_label,label = " = 2",size=4,color="black")

         ggsave("../figures/latency_x_utterance_length_utt_level_stat_r_n.pdf", width = 11.69, height = 8.27)
#          ggsave("../figures/latency_x_utterance_length_utt_level_w_buffer_stat_r_n.pdf", width = 11.69, height = 8.27)

R[write to console]: `geom_smooth()` using formula 'y ~ x'



Statistics

In [40]:
%%R -i latency

library(rstatix)
library(tidyverse)
options(scipen=0, digits=7)

stats <- latency %>%
        group_by(Language_name) %>%
        cor_test(num_tokens,latency)
        
stats <- select(stats,c("Language_name","cor","statistic","p"))

makeStars <- function(x){
  stars <- c("***", "**", "*", "ns")
  vec <- c(0,0.001, 0.01, 0.05, 1)
  i <- findInterval(x, vec)
  stars[i]
}

stats$p_adj <- p.adjust(stats$p, method = "holm", n = length(stats$p))

stats$significance <- makeStars(stats$p_adj)

stats

[90m# A tibble: 14 × 6[39m
   Language_name     cor statistic        p    p_adj significance
   [3m[90m<chr>[39m[23m           [3m[90m<dbl>[39m[23m     [3m[90m<dbl>[39m[23m    [3m[90m<dbl>[39m[23m    [3m[90m<dbl>[39m[23m [3m[90m<chr>[39m[23m       
[90m 1[39m Croatian       0.054     2.78   5.45[90me[39m[31m- 3[39m 5.45[90me[39m[31m- 2[39m ns          
[90m 2[39m English        0.081    11.2    7.5 [90me[39m[31m-29[39m 1.05[90me[39m[31m-27[39m ***         
[90m 3[39m Estonian      -[31m0[39m[31m.[39m[31m0[39m[31m15[39m    -[31m0[39m[31m.[39m[31m474[39m  6.35[90me[39m[31m- 1[39m 1   [90me[39m+ 0 [31mNA[39m          
[90m 4[39m French         0.03      2.30   2.12[90me[39m[31m- 2[39m 1.70[90me[39m[31m- 1[39m ns          
[90m 5[39m German         0.05      2.04   4.17[90me[39m[31m- 2[39m 2.92[90me[39m[31m- 1[39m ns          
[90m 6[39m Japanese       0.029     2.82   4.77[90me[39m[31m- 3[39m 5

----
#### Subject level

In [41]:
proc_t_dat_sum_stats = (proc_t_dat.groupby(["Language_name","target_child_id","transcript_id"])
                                  .agg({"num_tokens":"mean",
                                       "latency":"mean"})
                                  .reset_index())

In [42]:
proc_t_dat_sum_stats["latency"] = proc_t_dat_sum_stats["latency"]*(-1)

In [43]:
%%R -i proc_t_dat_sum_stats

library('ggplot2')
library('repr')
options(repr.plot.width=8.27, repr.plot.height=11.69, repr.plot.res = 1200)

p <- ggplot(proc_t_dat_sum_stats, aes(x=latency,
                                      y=num_tokens,
                                      color = Language_name)) +
     geom_point(shape=21,
                size=2) +
     facet_wrap(. ~ Language_name,ncol = 7) +
     geom_smooth(method='lm',
                 se = FALSE,
                 colour="black") +
     xlim(0, 3) +
     ylim(0, 50) +
     labs(y = "Length of Utterance", x = "latency to Respond to Babbling") +
     theme_classic() +
     theme(text = element_text(size=16),
           axis.text.x = element_text(vjust = 0.5, hjust=1),
           aspect.ratio=1,
           legend.position = "bottom",
           legend.title = element_blank(),
           legend.background = element_rect(fill=alpha("white",0.90),
                                            size=0, linetype="dotted",
                                            colour = "white"),
           legend.text=element_text(size=16))
     
     ggsave("../figures/latency_x_utterance_length_subject_level.pdf", width = 11.69, height = 8.27)
#     ggsave("../figures/latency_x_utterance_length_subject_level_w_buffer.pdf", width = 11.69, height = 8.27)

R[write to console]: `geom_smooth()` using formula 'y ~ x'



In [44]:
%%R -i proc_t_dat_sum_stats

library('ggplot2')
library('repr')
options(repr.plot.width=6, repr.plot.height=12)

deu_ns_label <- data.frame(latency=c(1.5),num_tokens = c(35),Language_name="German")
eng_label <- data.frame(latency=c(1.5),num_tokens = c(35),Language_name="English")
est_ns_label <- data.frame(latency=c(1.5),num_tokens = c(35),Language_name="Estonian")
fas_ns_label <- data.frame(latency=c(1.5),num_tokens = c(35),Language_name="Persian")
fra_label <- data.frame(latency=c(1.5),num_tokens = c(35),Language_name="French")
hrv_label <- data.frame(latency=c(1.5),num_tokens = c(35),Language_name="Croatian")
jpn_ns_label <- data.frame(latency=c(1.5),num_tokens = c(35),Language_name="Japanese")
kor_ns_label <- data.frame(latency=c(1.5),num_tokens = c(35),Language_name="Korean")
nor_ns_label <- data.frame(latency=c(1.5),num_tokens = c(35),Language_name="Norwegian")
pol_ns_label <- data.frame(latency=c(1.5),num_tokens = c(35),Language_name="Polish")
por_label <- data.frame(latency=c(1.5),num_tokens = c(35),Language_name="Portuguese")
spa_label <- data.frame(latency=c(1.5),num_tokens = c(35),Language_name="Spanish")
swe_ns_label <- data.frame(latency=c(1.5),num_tokens = c(35),Language_name="Swedish")
zho_ns_label <- data.frame(latency=c(1.5),num_tokens = c(35),Language_name="Mandarin")


p <- ggplot(proc_t_dat_sum_stats, aes(x=latency,
                                      y=num_tokens,
                                      color = Language_name)) +
     geom_point(shape=21,
                size=2) +
     facet_wrap(. ~ Language_name,ncol = 7) +
#    with buffer:    
#      geom_text(data = deu_ns_label,label = "ns",size=4,color="black") + 
#      geom_text(data = eng_label,label = "***",size=8,color="black") +  
#      geom_text(data = est_ns_label,label = "ns",size=4,color="black") +  
#      geom_text(data = fas_ns_label,label = "ns",size=4, color="black",fontface = "italic") +
#      geom_text(data = fra_label,label = "ns",size=4, color="black",fontface = "italic") +  
#      geom_text(data = hrv_label,label = "*",size=8, color="black") + 
#      geom_text(data = jpn_label,label = "ns",size=4, color="black",fontface = "italic") + 
#      geom_text(data = kor_ns_label,label = "ns",size=4,color="black",fontface = "italic") +  
#      geom_text(data = nor_ns_label,label = "ns",size=4,color="black",fontface = "italic") +  
#      geom_text(data = por_label,label = "**",size=8,color="black") +  
#      geom_text(data = spa_label,label = "*",size=8,color="black") + 
#      geom_text(data = swe_ns_label,label = "ns",size=4,color="black",fontface = "italic") +
#    no buffer:
     geom_text(data = deu_ns_label,label = "ns",size=4,color="black") + 
     geom_text(data = eng_label,label = "***",size=8,color="black") +  
     geom_text(data = est_ns_label,label = "ns",size=4,color="black") +  
     geom_text(data = fas_ns_label,label = "ns",size=4, color="black",fontface = "italic") +
     geom_text(data = fra_label,label = "ns",size=4, color="black",fontface = "italic") +  
     geom_text(data = hrv_label,label = "*",size=8, color="black") + 
     geom_text(data = jpn_label,label = "ns",size=4, color="black",fontface = "italic") + 
     geom_text(data = kor_ns_label,label = "ns",size=4,color="black",fontface = "italic") +  
     geom_text(data = nor_ns_label,label = "ns",size=4,color="black",fontface = "italic") +  
     geom_text(data = por_label,label = "**",size=8,color="black") +  
     geom_text(data = spa_label,label = "*",size=8,color="black") + 
     geom_text(data = swe_ns_label,label = "ns",size=4,color="black",fontface = "italic") +
     geom_smooth(method='lm',
                 se = FALSE,
                 colour="black") +
     xlim(0, 3) +
     ylim(0, 50) +
     labs(y = "Length of Utterance", x = "latency to Respond to Babbling") +
     theme_classic() +
     theme(text = element_text(size=16),
           axis.text.x = element_text(vjust = 0.5, hjust=1),
           aspect.ratio=1,
           legend.position = "bottom",
           legend.title = element_blank(),
           legend.background = element_rect(fill=alpha("white",0.90),
                                            size=0, linetype="dotted",
                                            colour = "white"),
           legend.text=element_text(size=16))
     ggsave("../figures/latency_x_utterance_length_subject_level_stat.pdf", width = 11.7, height = 8.27)
#      ggsave("../figures/latency_x_utterance_length_subject_level_w_buffer_stat.pdf", width = 11.7, height = 8.27)

R[write to console]: `geom_smooth()` using formula 'y ~ x'



In [45]:
%%R

deu_est_label <- data.frame(latency=c(.5),num_tokens = c(45),Language_name="German")
eng_est_label <- data.frame(latency=c(.5),num_tokens = c(45),Language_name="English")
est_est_label <- data.frame(latency=c(.5),num_tokens = c(45),Language_name="Estonian")
fas_est_label <- data.frame(latency=c(.5),num_tokens = c(45),Language_name="Persian")
fra_est_label <- data.frame(latency=c(.5),num_tokens = c(45),Language_name="French")
hrv_est_label <- data.frame(latency=c(.5),num_tokens = c(45),Language_name="Croatian")
jpn_est_label <- data.frame(latency=c(.5),num_tokens = c(45),Language_name="Japanese")
kor_est_label <- data.frame(latency=c(.5),num_tokens = c(45),Language_name="Korean")
nor_est_label <- data.frame(latency=c(.5),num_tokens = c(45),Language_name="Norwegian")
por_est_label <- data.frame(latency=c(.5),num_tokens = c(45),Language_name="Portuguese")
spa_est_label <- data.frame(latency=c(.5),num_tokens = c(45),Language_name="Spanish")
swe_est_label <- data.frame(latency=c(.5),num_tokens = c(45),Language_name="Swedish")
zho_est_label <- data.frame(latency=c(.5),num_tokens = c(45),Language_name="Mandarin")

# with buffer:
# p <- p + geom_text(data = deu_est_label,label = "",size=4,color="black") +
#          geom_text(data = eng_est_label,label = "r = .20",size=4,color="black") +
#          geom_text(data = est_est_label,label = "",size=4,color="black") +
#          geom_text(data = fas_est_label,label = "",size=4,color="black") +
#          geom_text(data = fra_est_label,label = "",size=4,color="black") +
#          geom_text(data = hrv_est_label,label = "r = .37",size=4,color="black") +
#          geom_text(data = jpn_est_label,label = "",size=4,color="black") +
#          geom_text(data = kor_est_label,label = "",size=4,color="black") +
#          geom_text(data = nor_est_label,label = "",size=4,color="black") +
#          geom_text(data = por_est_label,label = "r = .69",size=4,color="black") +
#          geom_text(data = spa_est_label,label = "r = .08",size=4,color="black") +
#          geom_text(data = swe_est_label,label = "",size=4,color="black") +
#          geom_text(data = zho_est_label,label = "",size=4,color="black")
        
# no buffer:
p <- p + geom_text(data = deu_est_label,label = "",size=4,color="black") +
         geom_text(data = eng_est_label,label = "r = .21",size=4,color="black") +
         geom_text(data = est_est_label,label = "",size=4,color="black") +
         geom_text(data = fas_est_label,label = "",size=4,color="black") +
         geom_text(data = fra_est_label,label = "",size=4,color="black") +
         geom_text(data = hrv_est_label,label = "r = .39",size=4,color="black") +
         geom_text(data = jpn_est_label,label = "",size=4,color="black") +
         geom_text(data = kor_est_label,label = "",size=4,color="black") +
         geom_text(data = nor_est_label,label = "",size=4,color="black") +
         geom_text(data = por_est_label,label = "r = .68",size=4,color="black") +
         geom_text(data = spa_est_label,label = "r = .52",size=4,color="black") +
         geom_text(data = swe_est_label,label = "",size=4,color="black") +
         geom_text(data = zho_est_label,label = "",size=4,color="black")
         
ggsave("../figures/latency_x_utterance_length_subject_level_stat_r.pdf", width = 11.69, height = 8.27)
# ggsave("../figures/latency_x_utterance_length_subject_level_w_buffer_stat_r.pdf", width = 11.69, height = 8.27)

R[write to console]: `geom_smooth()` using formula 'y ~ x'



In [46]:
%%R

deu_n_label <- data.frame(latency=c(2),num_tokens = c(45),Language_name="German")
eng_n_label <- data.frame(latency=c(1.8),num_tokens = c(45),Language_name="English")
est_n_label <- data.frame(latency=c(2),num_tokens = c(45),Language_name="Estonian")
fas_n_label <- data.frame(latency=c(2),num_tokens = c(45),Language_name="Persian")
fra_n_label <- data.frame(latency=c(1.8),num_tokens = c(45),Language_name="French")
hrv_n_label <- data.frame(latency=c(2),num_tokens = c(45),Language_name="Croatian")
jpn_n_label <- data.frame(latency=c(1.8),num_tokens = c(45),Language_name="Japanese")
kor_n_label <- data.frame(latency=c(2),num_tokens = c(45),Language_name="Korean")
nor_n_label <- data.frame(latency=c(2),num_tokens = c(45),Language_name="Norwegian")
pol_n_label <- data.frame(latency=c(2),num_tokens = c(45),Language_name="Polish")
por_n_label <- data.frame(latency=c(2),num_tokens = c(45),Language_name="Portuguese")
spa_n_label <- data.frame(latency=c(2),num_tokens = c(45),Language_name="Spanish")
swe_n_label <- data.frame(latency=c(2),num_tokens = c(45),Language_name="Swedish")
zho_n_label <- data.frame(latency=c(2),num_tokens = c(45),Language_name="Mandarin")

deu_sz_label <- data.frame(latency=c(2.5),num_tokens = c(45),Language_name="German")
eng_sz_label <- data.frame(latency=c(2.5),num_tokens = c(45),Language_name="English")
est_sz_label <- data.frame(latency=c(2.5),num_tokens = c(45),Language_name="Estonian")
fas_sz_label <- data.frame(latency=c(2.5),num_tokens = c(45),Language_name="Persian")
fra_sz_label <- data.frame(latency=c(2.5),num_tokens = c(45),Language_name="French")
hrv_sz_label <- data.frame(latency=c(2.5),num_tokens = c(45),Language_name="Croatian")
jpn_sz_label <- data.frame(latency=c(2.5),num_tokens = c(45),Language_name="Japanese")
kor_sz_label <- data.frame(latency=c(2.5),num_tokens = c(45),Language_name="Korean")
nor_sz_label <- data.frame(latency=c(2.5),num_tokens = c(45),Language_name="Norwegian")
pol_sz_label <- data.frame(latency=c(2.5),num_tokens = c(45),Language_name="Polish")
por_sz_label <- data.frame(latency=c(2.5),num_tokens = c(45),Language_name="Portuguese")
spa_sz_label <- data.frame(latency=c(2.5),num_tokens = c(45),Language_name="Spanish")
swe_sz_label <- data.frame(latency=c(2.5),num_tokens = c(45),Language_name="Swedish")
zho_sz_label <- data.frame(latency=c(2.5),num_tokens = c(45),Language_name="Mandarin")

p <- p + geom_text(data = deu_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = eng_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = est_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = fas_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = fra_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = hrv_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = jpn_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = kor_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = nor_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = pol_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = por_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = spa_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = swe_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = zho_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = deu_sz_label,label = " = 39",size=4,color="black") +
         geom_text(data = eng_sz_label,label = " = 1005",size=4,color="black") +
         geom_text(data = est_sz_label,label = " = 22",size=4,color="black") +
         geom_text(data = fas_sz_label,label = " = 12",size=4,color="black") +
         geom_text(data = fra_sz_label,label = " = 303",size=4,color="black") +
         geom_text(data = hrv_sz_label,label = " = 79",size=4,color="black") +
         geom_text(data = jpn_sz_label,label = " = 139",size=4,color="black") +
         geom_text(data = kor_sz_label,label = " = 37",size=4,color="black") +
         geom_text(data = nor_sz_label,label = " = 56",size=4,color="black") +
         geom_text(data = pol_sz_label,label = " = 1",size=4,color="black") +
         geom_text(data = por_sz_label,label = " = 24",size=4,color="black") +
         geom_text(data = spa_sz_label,label = " = 31",size=4,color="black") +
         geom_text(data = swe_sz_label,label = " = 16",size=4,color="black") +
         geom_text(data = zho_sz_label,label = " = 2",size=4,color="black")

         ggsave("../figures/latency_x_utterance_length_subject_level_stat_r_n.pdf", width = 11.69, height = 8.27)
#          ggsave("../figures/latency_x_utterance_length_subject_level_w_buffer_stat_r_n.pdf", width = 11.69, height = 8.27)

R[write to console]: `geom_smooth()` using formula 'y ~ x'



Statistics

In [47]:
# proc_t_dat_sum_stats = proc_t_dat_sum_stats[proc_t_dat_sum_stats["Language_name"]!="Polish"]

proc_t_dat_sum_stats = proc_t_dat_sum_stats[(proc_t_dat_sum_stats["Language_name"]=="Croatian") |
                                            (proc_t_dat_sum_stats["Language_name"]=="English") |
                                            (proc_t_dat_sum_stats["Language_name"]=="Estonian")|
                                            (proc_t_dat_sum_stats["Language_name"]=="French")|
                                            (proc_t_dat_sum_stats["Language_name"]=="German")|
                                            (proc_t_dat_sum_stats["Language_name"]=="Japanese")|
                                            (proc_t_dat_sum_stats["Language_name"]=="Korean")|
#                                             (proc_t_dat_sum_stats["Language_name"]=="Mandarin")|
                                            (proc_t_dat_sum_stats["Language_name"]=="Norwegian")|
                                            (proc_t_dat_sum_stats["Language_name"]=="Persian")|
                                            (proc_t_dat_sum_stats["Language_name"]=="Portuguese")|
                                            (proc_t_dat_sum_stats["Language_name"]=="Spanish")|
                                            (proc_t_dat_sum_stats["Language_name"]=="Swedish")]

In [48]:
%load_ext rpy2.ipython

The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython


In [49]:
%%R -i proc_t_dat_sum_stats

library(rstatix)
library(tidyverse)

stats <- proc_t_dat_sum_stats %>%
        group_by(Language_name) %>%
        cor_test(num_tokens,latency)
        
stats <- select(stats,c("Language_name","cor","statistic","p"))

makeStars <- function(x){
  stars <- c("***", "**", "*", "ns")
  vec <- c(0,0.001, 0.01, 0.05, 1)
  i <- findInterval(x, vec)
  stars[i]
}

stats$p_adj <- p.adjust(stats$p, method = "holm", n = length(stats$p))

stats$significance <- makeStars(stats$p_adj)

stats

[90m# A tibble: 12 × 6[39m
   Language_name     cor statistic        p         p_adj significance
   [3m[90m<chr>[39m[23m           [3m[90m<dbl>[39m[23m     [3m[90m<dbl>[39m[23m    [3m[90m<dbl>[39m[23m         [3m[90m<dbl>[39m[23m [3m[90m<chr>[39m[23m       
[90m 1[39m Croatian       0.39      3.16   2.51[90me[39m[31m- 3[39m 0.025[4m1[24m        *           
[90m 2[39m English        0.21      6.42   2.14[90me[39m[31m-10[39m 0.000[4m0[24m[4m0[24m[4m0[24m002[4m5[24m[4m7[24m ***         
[90m 3[39m Estonian       0.43      2.12   4.68[90me[39m[31m- 2[39m 0.328         ns          
[90m 4[39m French        -[31m0[39m[31m.[39m[31m12[39m     -[31m2[39m[31m.[39m[31m0[39m[31m7[39m   3.95[90me[39m[31m- 2[39m 0.316         ns          
[90m 5[39m German         0.15      0.924  3.62[90me[39m[31m- 1[39m 1             [31mNA[39m          
[90m 6[39m Japanese      -[31m0[39m[31m.[39m[31m0[39m[31m33[39m   