In [2]:
# Source the modified version of stargazer
# I modified the stargazer source code so that it prints out the p values in scientific 
# notation when they are very small.
source("stargazer_modified/stargazer.R")
library(lme4)

stats_df <- read.csv(file='sct_data.csv')

print(paste("There are ", nrow(stats_df), " total tokens in the data."))

      
# Remove anything other than AdvSVO, SAdvVO, SVAdvO, and SVOAdv
stats_df <- stats_df[stats_df$Word_Order %in% c("AdvSVO", "SAdvVO", "SVAdvO", "SVOAdv"), ]

print(paste("There are ", nrow(stats_df), " tokens after removing some data."))

# Create columns for boolean values for the output word orders
stats_df$AdvSVO <- ifelse(stats_df$Word_Order == "AdvSVO", 1, 0)
stats_df$SAdvVO <- ifelse(stats_df$Word_Order == "SAdvVO", 1, 0)
stats_df$SVAdvO <- ifelse(stats_df$Word_Order == "SVAdvO", 1, 0)
stats_df$SVOAdv <- ifelse(stats_df$Word_Order == "SVOAdv", 1, 0)


# Use deviation contrast coding
named.contr.sum<-function(x, ...) {
    if (is.factor(x)) {
        x <- levels(x)
    } else if (is.numeric(x) & length(x)==1L) {
        stop("cannot create names with integer value. Pass factor levels")
    }
    x<-contr.sum(x, ...)
    colnames(x) <- apply(x,2,function(x)names(x[x>0])
    )
    x
}

stats_df$Group.f = factor(stats_df$Group)
stats_df$Adverb.f = factor(stats_df$Adverb)
stats_df$Mente.f = factor(stats_df$Mente)
stats_df$Adverb_Class.f = factor(stats_df$Adverb_Class)
stats_df$Word_Order.f = factor(stats_df$Word_Order)

contrasts(stats_df$Group.f) = named.contr.sum(stats_df$Group.f)
contrasts(stats_df$Adverb.f) = named.contr.sum(stats_df$Adverb.f)
contrasts(stats_df$Mente.f) = named.contr.sum(stats_df$Mente.f)
contrasts(stats_df$Adverb_Class.f) = named.contr.sum(stats_df$Adverb_Class.f)
      
head(stats_df)

Loading required package: Matrix



[1] "There are  2176  total tokens in the data."
[1] "There are  2106  tokens after removing some data."


Unnamed: 0_level_0,PID,QID,Word_Order,Group,Adverb_Class,Mente,Adverb,AdvSVO,SAdvVO,SVAdvO,SVOAdv,Group.f,Adverb.f,Mente.f,Adverb_Class.f,Word_Order.f
Unnamed: 0_level_1,<int>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<fct>,<fct>,<fct>,<fct>,<fct>
1,4,SCT0_A-_aqui,SVOAdv,Intermediate,Place,No,aqui,0,0,0,1,Intermediate,aqui,No,Place,SVOAdv
2,4,SCT1_A-_aqui,SVOAdv,Intermediate,Place,No,aqui,0,0,0,1,Intermediate,aqui,No,Place,SVOAdv
3,4,SCT0_A-_afuera,SVOAdv,Intermediate,Place,No,afuera,0,0,0,1,Intermediate,afuera,No,Place,SVOAdv
4,4,SCT1_A-_afuera,SVOAdv,Intermediate,Place,No,afuera,0,0,0,1,Intermediate,afuera,No,Place,SVOAdv
5,4,SCT0_A-_cerca,SVAdvO,Intermediate,Place,No,cerca,0,0,1,0,Intermediate,cerca,No,Place,SVAdvO
6,4,SCT1_A-_cerca,SVOAdv,Intermediate,Place,No,cerca,0,0,0,1,Intermediate,cerca,No,Place,SVOAdv


In [3]:
# This function will make an html file for each model in models and combine them with the python function
combine_output <- function(output_filename, models, title) {
    # Write the model output to a file    
    i = 1
    file_strings = ""
    for (model in models) {
        
        filename = paste("tmp", toString(i), ".html", sep="")
        fileConn<-file(filename)
    
        writeLines(
            capture.output(stargazer(model, title=title, report=('vc*tp'), type='html')),
            fileConn
        )

        close(fileConn)
        
        file_strings = paste(file_strings, filename)
        
        i = i + 1
    }
    
    system(
        paste('py combine_model_outputs.py', file_strings, output_filename, sep=' ')
    )
}

In [4]:
# Look at the interaction of group and adverb
advsvo_model_1 = glm(AdvSVO ~ Group.f * Adverb.f, data=stats_df)
sadvvo_model_1 = glm(SAdvVO ~ Group.f * Adverb.f, data=stats_df)
svadvo_model_1 = glm(SVAdvO ~ Group.f * Adverb.f, data=stats_df)
svoadv_model_1 = glm(SVOAdv ~ Group.f * Adverb.f, data=stats_df)

# Relevel the adverb
stats_df$Adverb.f <- relevel(stats_df$Adverb.f, ref=(tail(levels(stats_df$Adverb.f), n=1)))
contrasts(stats_df$Adverb.f) = named.contr.sum(stats_df$Adverb.f)

advsvo_model_2 = glm(AdvSVO ~ Group.f * Adverb.f, data=stats_df)
sadvvo_model_2 = glm(SAdvVO ~ Group.f * Adverb.f, data=stats_df)
svadvo_model_2 = glm(SVAdvO ~ Group.f * Adverb.f, data=stats_df)
svoadv_model_2 = glm(SVOAdv ~ Group.f * Adverb.f, data=stats_df)

# Relevel the group
stats_df$Group.f <- relevel(stats_df$Group.f, ref=(tail(levels(stats_df$Group.f), n=1)))
contrasts(stats_df$Group.f) = named.contr.sum(stats_df$Group.f)

advsvo_model_3 = glm(AdvSVO ~ Group.f * Adverb.f, data=stats_df)
sadvvo_model_3 = glm(SAdvVO ~ Group.f * Adverb.f, data=stats_df)
svadvo_model_3 = glm(SVAdvO ~ Group.f * Adverb.f, data=stats_df)
svoadv_model_3 = glm(SVOAdv ~ Group.f * Adverb.f, data=stats_df)

# Relevel the adverb back
stats_df$Adverb.f <- relevel(stats_df$Adverb.f, ref=(tail(levels(stats_df$Adverb.f), n=1)))
contrasts(stats_df$Adverb.f) = named.contr.sum(stats_df$Adverb.f)

advsvo_model_4 = glm(AdvSVO ~ Group.f * Adverb.f, data=stats_df)
sadvvo_model_4 = glm(SAdvVO ~ Group.f * Adverb.f, data=stats_df)
svadvo_model_4 = glm(SVAdvO ~ Group.f * Adverb.f, data=stats_df)
svoadv_model_4 = glm(SVOAdv ~ Group.f * Adverb.f, data=stats_df)

combine_output("model_outputs/SCT_AdvSVO_group_and_adverb.html", list(advsvo_model_1, advsvo_model_2, advsvo_model_3, advsvo_model_4), "AdvSVO")
combine_output("model_outputs/SCT_SAdvVO_group_and_adverb.html", list(sadvvo_model_1, sadvvo_model_2, sadvvo_model_3, sadvvo_model_4), "SAdvVO")
combine_output("model_outputs/SCT_SVAdvO_group_and_adverb.html", list(svadvo_model_1, svadvo_model_2, svadvo_model_3, svadvo_model_4), "SVAdvO")
combine_output("model_outputs/SCT_SVOAdv_group_and_adverb.html", list(svoadv_model_1, svoadv_model_2, svoadv_model_3, svoadv_model_4), "SVOAdv")