In [None]:
library(ggplot2)
library(dplyr)
library(lme4)
library(lmerTest)
library(glmmTMB)
library(cowplot)
library(grid)
library(patchwork)

pallette = c("#318480", "#b4464f")
pallette2 = c("#052F5F", "#DA9307")
pallette2[[1]]

ignore = c("augment_tied")

# Humphreys & Bock 2005

In [None]:
hb_df <- read.csv("../evalsets/HumphreysBock2005/items.csv.reformat")


hb_df <- tidyr::separate(hb_df, condition, c("subj", "local", "prep"), sep=1:3)
hb_df$type <- substr(hb_df$model, 5, nchar(hb_df$model) - 1)
hb_df[hb_df$model == "gpt2",]$type <- "gpt2"
hb_df$model <- as.factor(hb_df$model)
hb_df <- subset(hb_df, hb_df$subj == "C" & hb_df$local == "P")
hb_df$prep <- as.factor(hb_df$prep)


coding_matrix <- matrix(c(1, -1), ncol=1)
contrasts(hb_df$prep) <- coding_matrix

hb_means <- hb_df %>% group_by(type, prep) %>% 
                        summarize(sample = mean(sample), max = mean(max))

hb_human <- data.frame(type=c("h", "h"),
                       prep=c("C", "D"),
                       sample=c(67,75))
hb_human$max = hb_human$sample

hb_means = rbind(hb_means, hb_human)
hb_means

hb_means <- subset(hb_means, !(hb_means$type %in% ignore))


In [None]:
options(repr.plot.width=6.5, repr.plot.height=3)
model_order = c("lmaug", "augment", "wiki_lm", "gpt2", "h")

# no lmwiki results in plot
hb_means <- subset(hb_means, type != "w")
hb_df <- subset(hb_df, type != "w")

hb_means$type <- factor(hb_means$type, levels=model_order)
hb_df$type <- factor(hb_df$type, levels=model_order)

ggplot(data=hb_means, aes(x=factor(type, levels=model_order), y=sample/100)) +
geom_point(data=subset(hb_means, prep=="D"), shape=21, size=2, color=pallette2[[1]], fill="#FFFFFF") +
stat_summary(data=subset(hb_df, prep="D"), geom="errorbar", width=0.35, 
                         aes(x=factor(type, levels=model_order), group=prep, color="D")) +
stat_summary(data=subset(hb_df, prep=="C"), geom="errorbar", width=0.35,  
                         aes(x=factor(type, levels=model_order), group=prep, color="C")) +
geom_line(data=hb_means, position=position_dodge(width=0.5), size=1.2, color="#999999") +
geom_point(data=subset(hb_means, prep=="D"), aes(shape="D", color="D"), size=2, stroke=1, fill="#FFFFFF") +
geom_point(data=subset(hb_means, prep=="C"), aes(shape="C", color="C"), size=2, stroke=1, fill="#FFFFFF") + 
labs(x="", manip="", y="% Plural Agreement", title="") +
scale_y_continuous(labels=scales::percent) +
scale_x_discrete(labels=c("lmaug"="LM Only \nWiki+PTB",
                          "augment"="LM+CCG \nWiki+PTB",
                          "wiki_lm"="LM Only \nWiki",
                          "ccglm"="LM Only \nPTB",
                          "gpt2"="GPT-2",
                          "h"="Humans")) +
scale_color_manual(name="",
                   labels=c("D"="Distributive",
                            "C"="Collective"),
                     values=c("D"=pallette2[[1]],
                             "C"=pallette2[[2]])) +
scale_shape_manual(name="",
                   labels=c("D"="Distributive", 
                            "C"="Collective"),
                   values=c("D"=21,"C"=22)) +
coord_cartesian(ylim = c(0.3,1.0)) +
theme_bw() +
theme(axis.text.x=element_text(size=12, angle=45, hjust=1), axis.text.y=element_text(size=12),
      axis.title.y=element_text(size=12), axis.title.x=element_text(size=10),
      strip.text=element_text(size=12), legend.text=element_text(size=12))

ggsave("plots/Humphreys_all.png", width=6.5, height=3)

In [None]:
hb_lm <- glmmTMB(data=subset(hb_df, type=="lmaug"), 
                  sample/100 ~ prep + (1 | item) + (1 | model),
                 family=beta_family())
summary(hb_lm)

hb_a <- glmmTMB(data=subset(hb_df, type=="augment"), 
                  sample/100 ~ prep + (1 | item) + (1 | model),
                 family=beta_family())
summary(hb_a)


hb_w <- glmmTMB(data=subset(hb_df, type=="wiki_lm"), 
                  sample/100 ~ prep + (1 | item) + (1 | model),
                 family=beta_family())
summary(hb_w)

hb_g <- glmmTMB(data=subset(hb_df, type=="gpt2"), 
                  sample/100 ~ prep + (1 | item),
                 family=beta_family())
summary(hb_g)

hb <- glmmTMB(data=hb_df, 
                  sample/100 ~ prep * type + (1 | item) + (1 | model),
                 family=beta_family())

summary(hb)

# Parker & An 2018

In [None]:
ignore = c("augment_tied", "wiki_lm")

pa_df <- read.csv("../evalsets/ParkerAn2018/items.comp.csv.reformatc")

pa_df$type <- substr(pa_df$model,5, nchar(pa_df$model) - 1)
unique(pa_df$model)
pa_df[pa_df$model == "gpt2",]$type <- "gpt2"
pa_df$type <- as.factor(pa_df$type)

pa_df <- tidyr::separate(pa_df, condition, c("argument", "attr", "gram"), sep="_")

coding_matrix <- matrix(c(1, -1), ncol=1)
pa_df$argument <- as.factor(pa_df$argument)
pa_df$attr <- as.factor(pa_df$attr)
pa_df$gram <- as.factor(pa_df$gram)

contrasts(pa_df$argument) <- coding_matrix
contrasts(pa_df$attr) <- coding_matrix
contrasts(pa_df$gram) <- coding_matrix
pa_df <- tidyr::gather(pa_df, "pos", "surp", starts_with("surp_pos"))
pa_df$pos <- as.numeric(substr(pa_df$pos, 10,10))

pa_df$pos_adj <- mapply(function(x, y) {
    return(ifelse(x == "CORE" & y > 2, y + 2, y + 1))
}, pa_df$argument, pa_df$pos)

pa_df$pos_adj <- as.numeric(lapply(pa_df$pos_adj, function(x) return(ifelse(x > 9, NA, x))))
pa_df <- subset(pa_df, !is.na(pa_df$pos_adj))
pa_df
                                   
# For lmer
pa_df_verb <- subset(pa_df, pos_adj== 8)
pa_df_verb

In [None]:
options(repr.plot.width=8, repr.plot.height=5)


pa_df_strip <- pa_df %>% group_by(item, argument, attr, gram, pos_adj, model, type) %>%
                         summarize(surp=mean(surp))

pa_u_human <- data.frame(item=rep(0, 16),
                         pos_adj=rep(c(7, 8 ,9 ,10), 4),
                         gram=rep(c("U"),16),
                         type=rep(c("h"),16),
                         model=rep(c("human"),16),
                         attr=rep(c(rep(c("S"), 4), rep(c("P"), 4)),2),
                         argument=rep(c(rep(c("OBLIQUE"), 8), rep(c("CORE"), 8))),
                         surp=c(340, 331, 357, 340, 332, 327, 332, 313, 330, 343, 349, 321, 327, 343, 351, 326))
pa_g_human <- data.frame(item=rep(0, 16),
                         pos_adj=rep(c(7, 8, 9, 10), 4),
                         gram=rep(c("G"),16),
                         type=rep(c("h"),16),
                         model=rep(c("human"),16),
                         attr=rep(c(rep(c("S"), 4), rep(c("P"), 4)),2),
                         argument=rep(c(rep(c("OBLIQUE"), 8), rep(c("CORE"), 8))),
                         surp=c(355, 330, 326, 310, 349, 327, 317, 318, 351, 335, 314, 313, 331, 330, 313, 317))

pa_df <- rbind(pa_df_strip, pa_u_human, pa_g_human)
pa_df <- subset(pa_df, !(pa_df$type %in% ignore))

In [None]:
model_order = c("lmaug", "augment", "gpt2", "h")

plot_pa_spr_x_m <- function(dataset, verb, xlim, ylim, no_leg=FALSE) {
    plt <- ggplot(subset(dataset, type != "h"), aes(x=pos_adj, y=surp, color=attr, shape=attr, linetype=argument, group=interaction(attr, argument))) + 
    facet_grid(.~factor(type, levels=model_order), labeller=as_labeller(c("lmaug"="LM Only \nWiki+PTB",
                          "augment"="LM+CCG \nWiki+PTB",
                          "wiki_lm"="LM Only \nWiki",
                          "gpt2"="GPT-2",
                          "ccglm"="LM Only \nPTB",
                          "h"="Humans")), scales="free_y") +
    stat_summary(geom="point", size=2.3, fun.data=mean_se) +
    stat_summary(geom="line", fun.data=mean_se) +
    stat_summary(geom="errorbar", width=0.2, fun.data=mean_se, show.legend=FALSE) +
    labs(x="", y="Surprisal (bits)", color="", shape="", linetype="") +
    scale_color_manual(labels=c(P="Mismatch (\"...girls...\")", S="Match (\"...girl...\")"), values=pallette) +
    scale_shape_manual(labels=c(P="Mismatch (\"...girls...\")", S="Match (\"...girl...\")"), values=c("P"=15, "S"=16)) +
    scale_linetype_discrete(labels=c(CORE="Core Argument (RC)", OBLIQUE="Oblique Argument (PP)")) +
    scale_x_continuous(labels=c("The", "waitress", "who", "sat", "(near)", "the", "girl(s)", "unsurprisingly", verb, "unhappy", "about"), breaks=0:10, limits=c(0,10)) +
    coord_cartesian(ylim=ylim, xlim=xlim) +
    theme_bw() + 
    theme(axis.text.x=element_text(angle=0, size=12), axis.text.y=element_text(size=12),
          axis.title.y=element_text(size=12),
          strip.text=element_text(size=12), legend.text=element_text(size=8), 
          panel.grid.minor=element_blank())
    
    if (no_leg) {
        plt = plt + theme(legend.position="none")
    }
    plt
}
plot_pa_spr_x_h <- function(dataset, verb, xlim, ylim, no_leg=FALSE) {
    plt <- ggplot(subset(dataset, type == "h"), aes(x=pos_adj, y=surp, color=attr, shape=attr, linetype=argument, group=interaction(attr, argument))) + 
    facet_grid(.~factor(type, levels=model_order), labeller=as_labeller(c("lmaug"="LM Only \nWiki+PTB",
                          "augment"="LM+CCG \nWiki+PTB",
                          "wiki_lm"="LM Only \nWiki",
                          "ccglm"="LM Only \nPTB",
                          "h"="Humans")), scales="free_y") +
    stat_summary(geom="point", size=2.3, fun.data=mean_se) +
    stat_summary(geom="line", fun.data=mean_se) +
    stat_summary(geom="errorbar", width=0.2, fun.data=mean_se, show.legend=FALSE) +
    labs(x="", y="RT (ms)", color="", shape="", linetype="") +
    scale_color_manual(labels=c(P="Mismatch (\"...girls...\")", S="Match (\"...girl...\")"), values=pallette) +
    scale_shape_manual(labels=c(P="Mismatch (\"...girls...\")", S="Match (\"...girl...\")"), values=c("P"=15, "S"=16)) +
    scale_linetype_discrete(labels=c(CORE="Core Argument (RC)", OBLIQUE="Oblique Argument (PP)")) +
    scale_x_continuous(labels=c("The", "waitress", "who", "sat", "(near)", "the", "girl(s)", "unsurprisingly", verb, "unhappy", "about"), breaks=0:10, limits=c(0,10)) +
    scale_y_continuous(position="right") +
    coord_cartesian(ylim=ylim, xlim=xlim) +
    theme_bw() + 
    theme(axis.text.x=element_text(angle=0, size=12), axis.text.y=element_text(size=12),
          axis.title.y=element_text(size=12),
          strip.text=element_text(size=12), legend.text=element_text(size=8), panel.grid.minor=element_blank())
    
    if (no_leg) {
        plt = plt + theme(legend.position="none")
    }
    plt
}

plot_pa_spr_x <- function(dataset, verb, xlim_m, xlim_h, ylim_m, ylim_h) {
    plt_m <- plot_pa_spr_x_m(dataset, verb, xlim_m, ylim_m, no_leg=TRUE)
    plt_h <- plot_pa_spr_x_h(dataset, verb, xlim_h, ylim_h, no_leg=TRUE)
    plt_leg <- plot_pa_spr_x_m(dataset, verb, xlim_m, ylim_m) + theme(legend.position="bottom")
    leg <- get_legend(plt_leg)
    p1 <- cowplot::plot_grid(plt_m, plt_h)
    cowplot::plot_grid(p1, leg, rel_heights=c(1, 0.1), ncol=1)+ 
    theme(plot.background=element_rect(fill="white", color="white"))
}

In [None]:
options(repr.plot.width=6.5, repr.plot.height=5)

unique(pa_df$type)

plot_pa_spr_x(subset(pa_df, gram=="G"), "was", c(7.75,8.25), c(7.75, 10.25), c(2,11), c(300, 360))
ggsave("plots/ParkerAn_G_popout.png", width= 6.5, height = 3.25)

plot_pa_spr_x(subset(pa_df, gram=="U"), "were", c(7.75,8.25), c(7.75, 10.25), c(2,11), c(300, 360)) 
ggsave("plots/ParkerAn_U_popout.png", width= 6.5, height = 3.25)

In [None]:

plot_pa_spr_y_m <- function(dataset, verb, xlim, ylim, no_leg=FALSE) {
    plt <- ggplot(subset(dataset, type != "h"), aes(x=pos_adj, y=surp, color=attr, shape=attr, linetype=argument, group=interaction(attr, argument))) + 
    facet_grid(factor(type, levels=model_order)~., labeller=as_labeller(c("lmaug"="LM Only \nWiki+PTB",
                          "augment"="LM+CCG \nWiki+PTB",
                          "wiki_lm"="LM Only \nWiki",
                          "ccglm"="LM Only \nPTB",
                          "gpt2"="GPT-2",
                          "h"="Humans"))) +
    stat_summary(geom="point", size=1.6, fun.data=mean_se) +
    stat_summary(geom="line", fun.data=mean_se) +
    stat_summary(geom="errorbar", width=0.3, fun.data=mean_se) +
    labs(x="", y="Surprisal (bits)", color="", linetype="", shape="") +
    scale_color_manual(labels=c(P="Mismatch (\"...girls...\")", S="Match (\"...girl...\")"), values=pallette) +
    scale_shape_manual(labels=c(P="Mismatch (\"...girls...\")", S="Match (\"...girl...\")"), values=c("P"=15, "S"=16)) + 
    scale_linetype_discrete(labels=c(CORE="Core Argument (RC)", OBLIQUE="Oblique Argument (PP)")) +
    scale_x_continuous(labels=c("The", "waitress", "who", "sat", "(near)", "the", "girl(s)", "unsurprisingly", verb, "unhappy", "about"), breaks=0:10, limits=c(0,10)) +
    coord_cartesian(ylim=ylim, xlim=xlim) +
    theme_bw() + 
    theme(axis.text.x=element_text(angle=45, hjust=1, size=12), axis.text.y=element_text(size=12),
          axis.title.y=element_text(size=12),
          strip.text=element_text(size=12, angle=0), legend.text=element_text(size=8), 
          panel.grid.minor=element_blank(),
          strip.text.y=element_text(angle=0), legend.position="top")
    
    if (no_leg) {
        plt = plt + theme(legend.position="none")
    }
    plt
}
plot_pa_spr_y_h <- function(dataset, verb, xlim, ylim, no_leg=FALSE) {
    plt <- ggplot(subset(dataset, type == "h"), aes(x=pos_adj, y=surp, color=attr, shape=attr, linetype=argument, group=interaction(attr, argument))) + 
    facet_grid(factor(type, levels=model_order)~., labeller=as_labeller(c("lmaug"="LM Only \nWiki+PTB",
                          "augment"="LM+CCG \nWiki+PTB",
                          "wiki_lm"="LM Only \nWiki",
                          "ccglm"="LM Only \nPTB",
                          "gpt2"="GPT-2",
                          "h"=" Humans "))) +
    stat_summary(geom="point", size=1.6, fun.data=mean_se) +
    stat_summary(geom="line", fun.data=mean_se) +
    stat_summary(geom="errorbar", width=0.3, fun.data=mean_se) +
    labs(x="", y="RT (ms)", color="", linetype="", shape="") +
    scale_color_manual(labels=c(P="Mismatch (\"...girls...\")", S="Match (\"...girl...\")"), values=pallette) +
    scale_shape_manual(labels=c(P="Mismatch (\"...girls...\")", S="Match (\"...girl...\")"), values=c("P"=15, "S"=16)) + 
    scale_linetype_discrete(labels=c(CORE="Core Argument (RC)", OBLIQUE="Oblique Argument (PP)")) +
    scale_x_continuous(labels=c("The", "waitress", "who", "sat", "(near)", "the", "girl(s)", "unsurprisingly", verb, "unhappy", "about"), breaks=0:10, limits=c(0,10)) +
    coord_cartesian(ylim=ylim, xlim=xlim) +
    theme_bw() + 
    theme(axis.text.x=element_text(angle=45, hjust=1, size=12), axis.text.y=element_text(size=12),
          axis.title.y=element_text(size=12),
          strip.text=element_text(size=12, angle=0), legend.text=element_text(size=8), 
          panel.grid.minor=element_blank(),
          strip.text.y=element_text(angle=0), legend.position="bottom")
    
    if (no_leg) {
        plt = plt + theme(legend.position="none")
    }
    plt
}

plot_pa_spr_y <- function(dataset, verb, ylim_m, ylim_h) {
    plt_m <- plot_pa_spr_y_m(dataset, verb, c(0,10), ylim_m)
    plt_h <- plot_pa_spr_y_h(dataset, verb, c(7,10.3), ylim_h, no_leg=TRUE)
    cowplot::plot_grid(plt_m, cowplot::plot_grid(NULL, plt_h, rel_widths=c(1, 0.87)), 
                       ncol=1, rel_heights=c(1, 0.55)) + 
    theme(plot.background=element_rect(fill="white", color="white"))

}

In [None]:
head(pa_df)

In [None]:
options(repr.plot.width=6.5, repr.plot.height=9)

plot_pa_spr_y(subset(pa_df, gram=="G"), "was", c(0,13), c(300,375))
ggsave("plots/Parker_G_full.png", width=6.5, height=7)


plot_pa_spr_y(subset(pa_df, gram=="U"), "were", c(0,13), c(300,375))
ggsave("plots/Parker_U_full.png", width=6.5, height=7)

In [None]:

print("grammatical")
pa_lm <- lmer(data=subset(pa_df_verb, pa_df_verb$type == "lmaug" & pa_df_verb$gram=="G"), 
                 surp ~ argument * attr  + (1|item) + (1 | model))

summary(pa_lm)

pa_lm <- lmer(data=subset(pa_df_verb, pa_df_verb$type == "lmaug" & pa_df_verb$gram=="U"), 
                 surp ~ argument * attr  + (1|item) + (1 | model))

summary(pa_lm)

print("ungram - obl")
pa_lm <- lmer(data=subset(pa_df_verb, pa_df_verb$type == "lmaug" & pa_df_verb$gram=="U" & argument=="OBLIQUE"), 
                 surp ~ attr  + (1|item) + (1 | model))

summary(pa_lm)

print("ungram - core")
pa_lm <- lmer(data=subset(pa_df_verb, pa_df_verb$type == "lmaug" & pa_df_verb$gram=="U" & argument=="CORE"), 
                 surp ~ attr  + (1|item) + (1 | model))

summary(pa_lm)

pa_lm <- lmer(data=subset(pa_df_verb, pa_df_verb$type == "augment" & pa_df_verb$gram=="G"), 
                 surp ~ argument * attr  + (1|item) + (1 | model))

summary(pa_lm)
pa_lm <- lmer(data=subset(pa_df_verb, pa_df_verb$type == "augment" & pa_df_verb$gram=="U"), 
                 surp ~ argument * attr  + (1|item) + (1 | model))

summary(pa_lm)

print("ungram - obl")
pa_lm <- lmer(data=subset(pa_df_verb, pa_df_verb$type == "augment" & pa_df_verb$gram=="U" & argument=="OBLIQUE"), 
                 surp ~ attr  + (1|item) + (1 | model))

summary(pa_lm)

print("ungram - core")
pa_lm <- lmer(data=subset(pa_df_verb, pa_df_verb$type == "augment" & pa_df_verb$gram=="U" & argument=="CORE"), 
                 surp ~ attr  + (1|item) + (1 | model))

summary(pa_lm)


pa_lm <- lmer(data=subset(pa_df_verb, pa_df_verb$gram=="G"), 
                 surp ~ argument * attr * type  + (1|item) + (1 | model))

summary(pa_lm)
              
pa_lm <- lmer(data=subset(pa_df_verb, pa_df_verb$gram=="U"), 
                 surp ~ argument * attr * type  + (1|item) + (1 | model))

summary(pa_lm)


pa_lm <- lmer(data=subset(pa_df_verb, pa_df_verb$gram=="G" & pa_df_verb$type == "gpt2"), 
                 surp ~ argument * attr + (1|item))

summary(pa_lm)
              
pa_lm <- lmer(data=subset(pa_df_verb, pa_df_verb$gram=="U" & pa_df_verb$type == "gpt2"), 
                 surp ~ argument * attr + (1|item))

summary(pa_lm)

# Wagers (2009)

In [None]:
wagers_df <- read.csv('../evalsets/Wagers2009/23_illusion/items.csv.reformatc')

wagers_df <- tidyr::separate(wagers_df, "condition", c("subj", "attr", "gram"), sep=c(1,2))
wagers_df <- tidyr::gather(wagers_df, "pos", "surp", starts_with("surp_pos"))
wagers_df$pos <- as.numeric(substr(wagers_df$pos, 10,10)) +1
wagers_df$match <- wagers_df$subj == wagers_df$attr
wagers_df$vmatch <- as.factor(xor(wagers_df$match, wagers_df$gram == "U"))

wagers_df$type <- substr(wagers_df$model,5, nchar(wagers_df$model)-1)
wagers_df[wagers_df$model == "gpt2",]$type <- "gpt2"
wagers_df$type <- as.factor(wagers_df$type)

wagers_df

In [None]:
options(repr.plot.width=8, repr.plot.height=5)


wagers_df_strip <- wagers_df %>% group_by(
    pos, gram, attr, subj, match, vmatch, surp, model, type) %>%
                                 summarize(surp=mean(surp))

wagers_s_human <- data.frame(pos=rep(c(4, 5 ,6, 7), 4),
                         subj=rep(c("S"), 16),
                         model=rep(c("human"), 16),
                         type= rep(c("h"), 16),
                         attr=rep(c(rep(c("S"), 4), rep(c("P"), 4)),2),
                         gram=rep(c(rep(c("G"), 8), rep(c("U"), 8))),
                         surp=c(327, 352, 348, 345, 331.5, 344, 356, 361, 326, 342, 415, 370.5, 331.5, 355, 386, 335.5))

wagers_p_human <- data.frame(pos=rep(c(4, 5 ,6, 7), 4),
                         subj= rep(c("P"), 16),
                         model=rep(c("human"), 16),
                         type= rep(c("h"), 16),
                         attr=rep(c(rep(c("S"), 4), rep(c("P"), 4)),2),
                         gram=rep(c(rep(c("G"), 8), rep(c("U"), 8))),
                         surp=c(328, 364, 355, 353, 357, 374, 358, 344, 328, 364.5, 410.5, 356.5, 335, 359, 402, 361.5))
wagers_s_human$match <- wagers_s_human$attr=="S"
wagers_p_human$match <- wagers_p_human$attr=="P"
wagers_s_human$vmatch <- factor(xor(wagers_s_human$match, wagers_s_human$gram == "U"))
wagers_p_human$vmatch <- factor(xor(wagers_p_human$match, wagers_p_human$gram == "U"))


wagers_all <- rbind(wagers_s_human, wagers_p_human, wagers_df_strip)


coding_matrix <- matrix(c(1, -1), ncol=1)
wagers_all$match <- as.factor(wagers_all$match)
wagers_all$vmatch <- as.factor(wagers_all$vmatch)
wagers_all$subj <- as.factor(wagers_all$subj)
wagers_all$gram <- as.factor(wagers_all$gram)

contrasts(wagers_all$match) <- coding_matrix
contrasts(wagers_all$vmatch) <- coding_matrix
contrasts(wagers_all$subj) <- coding_matrix
contrasts(wagers_all$gram) <- coding_matrix

wagers_all

In [None]:
no_h <- c("lmaug", "augment")

plt1 <- ggplot(subset(wagers_all, type %in% no_h), aes(x=pos, y=surp, color=match,, shape=match, linetype=gram, group=interaction(match, gram))) + 
    facet_grid(.~factor(type, levels=no_h), labeller=as_labeller(c("lmaug"="LM Only",
                          "augment"="LM+CCG",
                          "wiki_lm"="LM Only \nWiki",
                          "ccglm"="LM Only \nPTB",
                          "gpt2"="GPT-2",
                          "h"="Humans")), scales="free_y") +
    stat_summary(geom="point", size=3, fun.data=mean_cl_boot, position=position_dodge(width=0.2)) +
    stat_summary(geom="errorbar", width=0.2, fun.data=mean_cl_boot, position=position_dodge(width=0.2)) +
    labs(x="", y="Surprisal (bits)", color="", shape="", linetype="") +
    scale_y_continuous(position="right") +
    scale_linetype_discrete(labels=c("G"="Grammatical", 
                                     "U"="Ungrammatical")) +
    scale_x_continuous(labels=c("The", "musician(s)", "who", "the", "_subj_", "praise(s)", "so", "highly", "will", "probably..."), breaks=0:9, limits=c(0,9)) +
    coord_cartesian(xlim=c(4.7, 5.3), ylim=c(8, 13)) +
    theme_bw() + 
    theme(axis.text.x=element_text(angle=0, size=12), axis.text.y=element_text(size=12),
          axis.title.y=element_text(size=12), axis.title.x=element_text(size=12, hjust=0),
          strip.text=element_text(size=12), legend.text=element_text(size=8), legend.position="bottom",
          panel.background=element_rect(fill="#FFFFFF"),
          panel.grid.minor=element_blank()) +
    theme(legend.position="none") +
    scale_color_manual(labels=c("FALSE"="Mismatch \n(\"The musicians\")", 
                                    "TRUE"="Match \n(\"The musician\")"), 
                           values=pallette, breaks=c(FALSE, TRUE)) + 
                     scale_shape_manual(labels=c("FALSE"="Mismatch \n(\"The musicians\")", 
                                    "TRUE"="Match \n(\"The musician\")"), 
                           values=c("TRUE"=16, "FALSE"=15), breaks=c(FALSE, TRUE)) 
plt1

plt2 <- ggplot(subset(wagers_all, type =="h"), aes(x=pos, y=surp, color=match,, shape=match, linetype=gram, group=interaction(match, gram))) + 
    facet_grid(.~factor(type, levels=c("h")), labeller=as_labeller(c("lmaug"="LM Only",
                          "augment"="LM+CCG",
                          "wiki_lm"="LM Only \nWiki",
                          "ccglm"="LM Only \nPTB",
                          "gpt2"="GPT-2",
                          "h"="Humans")), scales="free_y") +
    stat_summary(geom="point", size=3, fun.data=mean_cl_boot, position=position_dodge(width=0.2)) +
    labs(x="", y="Reading Time (ms)", color="", shape="", linetype="") +
    scale_linetype_discrete(labels=c("G"="Grammatical", 
                                     "U"="Ungrammatical")) +
    scale_x_continuous(labels=c("The", "musician(s)", "who", "the", "_subj_", "praise(s)", "so", "highly", "will", "probably..."), breaks=0:9, limits=c(0,9)) +
    coord_cartesian(xlim=c(5.7, 6.3)) +
    theme_bw() + 
    theme(axis.text.x=element_text(angle=0, size=12), axis.text.y=element_text(size=12),
          axis.title.y=element_text(size=12), axis.title.x=element_text(size=12, hjust=0),
          strip.text=element_text(size=12), legend.text=element_text(size=8), legend.position="bottom",
          panel.background=element_rect(fill="#FFFFFF"),
          panel.grid.minor=element_blank()) +
    theme(legend.position="none") +
    scale_color_manual(labels=c("FALSE"="Mismatch \n(\"The musicians\")", 
                                    "TRUE"="Match \n(\"The musician\")"), 
                           values=pallette, breaks=c(FALSE, TRUE)) + 
                     scale_shape_manual(labels=c("FALSE"="Mismatch \n(\"The musicians\")", 
                                    "TRUE"="Match \n(\"The musician\")"), 
                           values=c("TRUE"=16, "FALSE"=15), breaks=c(FALSE, TRUE)) 

plt2

p1 <- cowplot::plot_grid(plt2, plt1, rel_widths=c(3,5))
p1
ggsave("plots/Wagers_lmccglmonly.png", width=6.5, height=4)

In [None]:
plot_wagers_spr_x_m <- function(dataset, subj, verbg, verbu, xlim, ylim, no_leg=FALSE) {
    plt <- ggplot(subset(dataset, type != "h"), aes(x=pos, y=surp, color=match,, shape=match, linetype=gram, group=interaction(match, gram))) + 
    facet_grid(.~factor(type, levels=model_order), labeller=as_labeller(c("lmaug"="LM Only \nWiki+PTB",
                          "augment"="LM+CCG \nWiki+PTB",
                          "wiki_lm"="LM Only \nWiki",
                          "ccglm"="LM Only \nPTB",
                          "gpt2"="GPT-2",
                          "h"="Humans")), scales="free_y") +
    stat_summary(geom="point", size=2.3, fun.data=mean_se) +
    stat_summary(geom="line", fun.data=mean_se) +
    stat_summary(geom="errorbar", width=0.2, fun.data=mean_se) +
    labs(x=paste0("The musician(s) who the ", subj, "..."), y="Surprisal (bits)", color="", shape="", linetype="") +
    scale_linetype_discrete(labels=c("G"=paste0("Grammatical \n(\"", verbg, "\")"), 
                                     "U"=paste0("Ungrammatical \n(\"", verbu, "\")"))) +
    scale_x_continuous(labels=c("The", "musician(s)", "who", "the", subj, "praise(s)", "so", "highly", "will", "probably..."), breaks=0:9, limits=c(0,9)) +
    coord_cartesian(ylim=ylim, xlim=xlim) +
    theme_bw() + 
    theme(axis.text.x=element_text(angle=0, size=12), axis.text.y=element_text(size=12),
          axis.title.y=element_text(size=12), axis.title.x=element_text(size=12, hjust=0),
          strip.text=element_text(size=12), legend.text=element_text(size=8), legend.position="bottom",
          panel.background=element_rect(fill="#FFFFFF"),
          panel.grid.minor=element_blank())
    
    if (no_leg) {
        plt = plt + theme(legend.position="none")
    }
    
    if (subj=="reviewer") {
        plt <- plt + scale_color_manual(labels=c("FALSE"="Mismatch \n(\"The musicians\")", 
                                    "TRUE"="Match \n(\"The musician\")"), 
                           values=pallette, breaks=c(FALSE, TRUE)) + 
                     scale_shape_manual(labels=c("FALSE"="Mismatch \n(\"The musicians\")", 
                                    "TRUE"="Match \n(\"The musician\")"), 
                           values=c("TRUE"=16, "FALSE"=15), breaks=c(FALSE, TRUE)) 
    }
    else {
        plt <- plt + scale_color_manual(labels=c("FALSE"="Mismatch \n(\"The musician\")", 
                                    "TRUE"="Match \n(\"The musicians\")"), 
                           values=pallette, breaks=c(FALSE, TRUE)) + 
                     scale_shape_manual(labels=c("FALSE"="Mismatch \n(\"The musician\")", 
                                    "TRUE"="Match \n(\"The musicians\")"), 
                           values=c("TRUE"=16, "FALSE"=15), breaks=c(FALSE, TRUE)) 
    }
    
    plt
}
plot_wagers_spr_x_h <- function(dataset, subj, verbg, verbu, xlim, ylim, no_leg=FALSE) {
    plt <- ggplot(subset(dataset, type == "h"), aes(x=pos, y=surp, color=match, shape=match,
                                                    linetype=gram, group=interaction(match, gram))) + 
    facet_grid(.~factor(type, levels=model_order), labeller=as_labeller(c("lmaug"="LM Only \nWiki+PTB",
                          "augment"="LM+CCG \nWiki+PTB",
                          "wiki_lm"="LM Only \nWiki",
                          "ccglm"="LM Only \nPTB",
                          "gpt2"="GPT-2",
                          "h"="Humans\n")), scales="free_y") +
    stat_summary(geom="point", size=2.3, fun.data=mean_se) +
    stat_summary(geom="line", fun.data=mean_se) +
    stat_summary(geom="errorbar", width=0.2, fun.data=mean_se) +
    labs(x=" ", y="RT (ms)", color="", linetype="", shape="") +
    scale_linetype_discrete(labels=c("G"=paste0("Grammatical \n(\"", verbg, "\")"), 
                                     "U"=paste0("Ungrammatical \n(\"", verbu, "\")"))) +
    scale_x_continuous(labels=c("The", "musician(s)", "who", "the", subj, "praise(s)", "so", "highly", "will", "probably..."), 
                       breaks=0:9, limits=c(0,9)) +
    scale_y_continuous(position="right") +
    coord_cartesian(ylim=ylim, xlim=xlim) +
    theme_bw() + 
    theme(axis.text.x=element_text(angle=0, size=12), axis.text.y=element_text(size=12),
          axis.title.y=element_text(size=12), axis.title.x=element_text(size=12, hjust=0),
          strip.text=element_text(size=12), legend.text=element_text(size=8), legend.position="bottom",
          panel.background=element_rect(fill="#FFFFFF"),
          panel.grid.minor=element_blank())
    
    if (no_leg) {
        plt = plt + theme(legend.position="none")
    }
    
    if (subj=="reviewer") {
        plt <- plt + scale_color_manual(labels=c("FALSE"="Mismatch \n(\"The musicians\")", 
                                    "TRUE"="Match \n(\"The musician\")"), 
                           values=pallette, breaks=c(FALSE, TRUE)) +
                     scale_shape_manual(labels=c("FALSE"="Mismatch \n(\"The musicians\")", 
                                    "TRUE"="Match \n(\"The musician\")"), 
                           values=c("TRUE"=16, "FALSE"=15), breaks=c(FALSE, TRUE))
    }
    else {
        plt <- plt + scale_color_manual(labels=c("FALSE"="Mismatch \n(\"The musician\")", 
                                    "TRUE"="Match \n(\"The musicians\")"), 
                           values=pallette, breaks=c(FALSE, TRUE)) +
                     scale_shape_manual(labels=c("FALSE"="Mismatch \n(\"The musician\")", 
                                    "TRUE"="Match \n(\"The musicians\")"), 
                           values=c("TRUE"=16, "FALSE"=15), breaks=c(FALSE, TRUE))
    }
    
    plt
}

plot_wagers_spr_x <- function(dataset, subject, verbg, verbu, xlim_m, xlim_h, ylim_m, ylim_h) {
    plt_m <- plot_wagers_spr_x_m(dataset, subject, verbg, verbu, xlim_m, ylim_m, no_leg=TRUE)
    plt_h <- plot_wagers_spr_x_h(dataset, subject, verbg, verbu, xlim_h, ylim_h, no_leg=TRUE)
    plt_leg <- plot_wagers_spr_x_m(dataset, subject, verbg, verbu, xlim_m, ylim_m, no_leg=FALSE)
    p1 <- cowplot::plot_grid(plt_m, plt_h, rel_widths=c(3,3))
    
    leg <- get_legend(plt_leg)
    
    cowplot::plot_grid(p1, leg, rel_heights=c(1, 0.1), ncol=1) + 
    theme(plot.background=element_rect(fill="white", color="white"))
}

In [None]:
options(repr.plot.width=6.5, repr.plot.height=3.25)

#no lmonly wiki! 
wagers_all <- subset(wagers_all, !(wagers_all$type %in% ignore))

plot_wagers_spr_x(subset(wagers_all, subj=="S"), 
                "reviewer", "praises", "praise", c(4.7,5.25), c(4.7,7.25), c(7,13), c(325, 425))
ggsave("plots/Wagers_Ssubj_popout.png", width=6.5, height=3.25)


plot_wagers_spr_x(subset(wagers_all, subj=="P"), 
                "reviewers", "praise", "praises", c(4.7,5.25), c(4.7,7.25), c(7,13), c(325, 425))
ggsave("plots/Wagers_Psubj_popout.png", width=6.5, height=3.25)


In [None]:
plot_wagers_spr_y_m <- function(dataset, subj, verbg, verbu, xlim, ylim, no_leg=FALSE) {
    plt <- ggplot(subset(dataset, type != "h"), aes(x=pos, y=surp, color=match, shape=match, linetype=gram, group=interaction(match, gram))) + 
    facet_grid(factor(type, levels=model_order)~., labeller=as_labeller(c("lmaug"="LM Only \nWiki+PTB",
                          "augment"="LM+CCG \nWiki+PTB",
                          "wiki_lm"="LM Only \nWiki",
                          "ccglm"="LM Only \nPTB",
                          "gpt2"="GPT-2",
                          "h"="Humans\n")), scales="free_y") +
    stat_summary(geom="point", size=2.3, fun.data=mean_se) +
    stat_summary(geom="line", fun.data=mean_se) +
    stat_summary(geom="errorbar", width=0.2, fun.data=mean_se, size=0.7) +
    labs(x="", y="Surprisal (bits)", color="", linetype="", shape="") +
    scale_linetype_discrete(labels=c("G"=paste0("Grammatical \n(\"", verbg, "\")"), 
                                     "U"=paste0("Ungrammatical \n(\"", verbu, "\")"))) +
    scale_x_continuous(labels=c("The", "musician(s)", "who", "the", subj, "praise(s)", "so", "highly", "will", "probably..."), breaks=0:9, limits=c(0,9)) +
    coord_cartesian(ylim=ylim, xlim=xlim) +
    theme_bw() + 
    theme(axis.text.x=element_text(angle=45, hjust=1, size=12), axis.text.y=element_text(size=12),
          axis.title.y=element_text(size=12), axis.title.x=element_text(size=13, hjust=0),
          strip.text=element_text(size=12), legend.text=element_text(size=8), 
          panel.grid.minor=element_blank(), legend.position="top")
    
    if (no_leg) {
        plt = plt + theme(legend.position="none")
    }
    
    if (subj=="reviewer") {
        plt <- plt + scale_color_manual(labels=c("FALSE"="Mismatch \n(\"The musicians\")", 
                                    "TRUE"="Match \n(\"The musician\")"), 
                           values=pallette, breaks=c(FALSE, TRUE)) +
                     scale_shape_manual(labels=c("FALSE"="Mismatch \n(\"The musicians\")", 
                                    "TRUE"="Match \n(\"The musician\")"), 
                           values=c("TRUE"=16, "FALSE"=15), breaks=c(FALSE, TRUE))
    }
    else {
        plt <- plt + scale_color_manual(labels=c("FALSE"="Mismatch \n(\"The musician\")", 
                                    "TRUE"="Match \n(\"The musicians\")"), 
                           values=pallette, breaks=c(FALSE, TRUE)) + 
                     scale_shape_manual(labels=c("FALSE"="Mismatch \n(\"The musician\")", 
                                    "TRUE"="Match \n(\"The musicians\")"), 
                           values=c("TRUE"=16, "FALSE"=15), breaks=c(FALSE, TRUE))
    }
    plt
}
plot_wagers_spr_y_h <- function(dataset, subj, verbg, verbu, xlim, ylim, no_leg=FALSE) {
    plt <- ggplot(subset(dataset, type == "h"), aes(x=pos, y=surp, color=match, shape=match, linetype=gram, group=interaction(match, gram))) + 
    facet_grid(factor(type, levels=model_order)~., labeller=as_labeller(c("lmaug"="LM Only \nWiki+PTB",
                          "augment"="LM+CCG \nWiki+PTB",
                          "wiki_lm"="LM Only \nWiki",
                          "ccglm"="LM Only \nPTB",
                          "gpt2"="GPT-2",
                          "h"="Humans\n")), scales="free_y") +
    stat_summary(geom="point", size=2.3, fun.data=mean_se) +
    stat_summary(geom="line", fun.data=mean_se, size=0.7) +
    stat_summary(geom="errorbar", width=0.2, fun.data=mean_se) +
    labs(x=" ", y="RT (ms)", color="", linetype="", shape="") +
    scale_linetype_discrete(labels=c("G"=paste0("Grammatical \n(\"", verbg, "\")"), 
                                     "U"=paste0("Ungrammatical \n(\"", verbu, "\")"))) +scale_x_continuous(labels=c("The", "musician(s)", "who", "the", subj, "praise(s)", "so", "highly", "will", "probably..."), breaks=0:9, limits=c(0,9)) +
    coord_cartesian(ylim=ylim, xlim=xlim) +
    theme_bw() + 
    theme(axis.text.x=element_text(angle=45, hjust=1, size=12), axis.text.y=element_text(size=12),
          axis.title.y=element_text(size=12), axis.title.x=element_text(size=12, hjust=0),
          strip.text=element_text(size=12), legend.text=element_text(size=8), 
          panel.grid.minor=element_blank(), legend.position="top")
    
    if (subj=="reviewer") {
        plt <- plt + scale_color_manual(labels=c("FALSE"="Mismatch \n(\"The musicians\")", 
                                    "TRUE"="Match \n(\"The musician\")"), 
                           values=pallette, breaks=c(FALSE, TRUE)) + 
                     scale_shape_manual(labels=c("FALSE"="Mismatch \n(\"The musicians\")", 
                                    "TRUE"="Match \n(\"The musician\")"), 
                           values=c("TRUE"=16, "FALSE"=15), breaks=c(FALSE, TRUE)) 
    }
    else {
        plt <- plt + scale_color_manual(labels=c("FALSE"="Mismatch \n(\"The musician\")", 
                                    "TRUE"="Match \n(\"The musicians\")"), 
                           values=pallette, breaks=c(FALSE, TRUE)) + 
                     scale_shape_manual(labels=c("FALSE"="Mismatch \n(\"The musician\")", 
                                    "TRUE"="Match \n(\"The musicians\")"), 
                           values=c("TRUE"=16, "FALSE"=15), breaks=c(FALSE, TRUE)) 
    }
    if (no_leg) {
        plt = plt + theme(legend.position="none")
    }
    plt
}

plot_wagers_spr_y <- function(dataset, subject, verbg, verbu,  ylim_m, ylim_h) {
    plt_m <- plot_wagers_spr_y_m(dataset, subject, verbg, verbu, c(0,9), ylim_m)
    plt_h <- plot_wagers_spr_y_h(dataset, subject, verbg, verbu, c(4,7), ylim_h, no_leg=TRUE)
    cowplot::plot_grid(plt_m, cowplot::plot_grid(NULL, plt_h, NULL, rel_widths=c(1, 1.10,0.53), ncol=3), 
                       ncol=1, rel_heights=c(1, 0.55))  + 
    theme(plot.background=element_rect(fill="white", color="white"))
    
}

In [None]:
options(repr.plot.width=6.5, repr.plot.height=7)

plot_wagers_spr_y(subset(wagers_all, subj=="S"), 
                "reviewer", "praises", "praise", c(3,13), c(325, 425))
ggsave("plots/Wagers_Ssubj_all.png", width=6.5, height=7)


plot_wagers_spr_y(subset(wagers_all, subj=="P"), 
                "reviewers", "praise", "praises", c(3,13), c(325, 425))
ggsave("plots/Wagers_Psubj_all.png", width=6.5, height=7)


In [None]:
wagers_df_verb <- subset(wagers_df, pos == 5)

wagers_lm <- lmer(data=subset(wagers_df_verb, wagers_df_verb$type == "lmaug" & wagers_df_verb$gram == "G"), 
                 surp ~ match + (1|item) + (1 | model))

summary(wagers_lm)

wagers_lm <- lmer(data=subset(wagers_df_verb, wagers_df_verb$type == "lmaug" & wagers_df_verb$gram == "U"), 
                 surp ~ match+ (1|item) + (1 | model))


summary(wagers_lm)

wagers_lm <- lmer(data=subset(wagers_df_verb, wagers_df_verb$type == "lmaug"), 
                 surp ~ vmatch * gram + (1|item) + (1 | model))

summary(wagers_lm)

wagers_lm <- lmer(data=subset(wagers_df_verb, wagers_df_verb$type == "augment" & wagers_df_verb$gram == "G"), 
                 surp ~ match + (1|item) + (1 | model))

summary(wagers_lm)

wagers_lm <- lmer(data=subset(wagers_df_verb, wagers_df_verb$type == "augment" & wagers_df_verb$gram == "U"), 
                 surp ~ match+ (1|item) + (1 | model))


summary(wagers_lm)

wagers_lm <- lmer(data=subset(wagers_df_verb, wagers_df_verb$type == "augment"), 
                 surp ~ vmatch * gram + (1|item) + (1 | model))

summary(wagers_lm)

wagers_lm <- lmer(data=subset(wagers_df_verb), 
                 surp ~ type * vmatch * gram + (1|item) + (1 | model))

summary(wagers_lm)

wagers_lm <- lmer(data=subset(wagers_df_verb, wagers_df_verb$type == "gpt2"& wagers_df_verb$gram == "G"), 
                 surp ~ match  + (1|item))

summary(wagers_lm)

wagers_lm <- lmer(data=subset(wagers_df_verb, wagers_df_verb$type == "gpt2"& wagers_df_verb$gram == "U"), 
                 surp ~ match  + (1|item))

summary(wagers_lm)

wagers_lm <- lmer(data=subset(wagers_df_verb, wagers_df_verb$type == "gpt2"& wagers_df_verb$gram == "U" & wagers_df_verb$subj == "S"), 
                 surp ~ match  + (1|item))

summary(wagers_lm)

wagers_lm <- lmer(data=subset(wagers_df_verb, wagers_df_verb$type == "gpt2"), 
                 surp ~ vmatch * gram + (1|item))

summary(wagers_lm)