In [2]:
library(FactoMineR)
library(factoextra)#fviz_eig
library(psych)

library(ggplot2)
library(pheatmap)
library(RColorBrewer)

library(tidyverse)
library(reshape)#melt

library(reshape2)
library(repr)
library(plyr)
library(Rmisc)
library(extrafont)

In [3]:
#define the model name
models <- c('Wish_1976_model','Triandis_1968_model','Marwell_1970_model',
            'Osgood_1957_model','Fiske_1992_model','Clark_2011_model','Carpendale_2004_model',
            'Foa_2012_model','Bugental_2000_model','Goffman_1959_model','Hamilton_1981_model',
            'Burton_1975_model','Montgomery_1988_model','Rands_1979_model','Weiss_1998_model')

# define the model
FAVEE_model <- c("Formality.and.Regulation","Activeness","Valence.Evaluation","Goods.Exchange","Equality")
Wish_1976_model <- c("Formality.and.Regulation","Equality","Valence.Evaluation","Activity.Intensity")
Triandis_1968_model <- c("Valence.Evaluation","Equality","Intimacy") 
Marwell_1970_model <- c("Visibility","Formality.and.Regulation","Intimacy")
Osgood_1957_model <- c("Valence.Evaluation","Equality","Activity.Intensity")
Fiske_1992_model <- c("Communal.Sharing","Equality","Strategic","Expected.Reciprocity")
Clark_2011_model <- c("Communal.Sharing","Strategic","Expected.Reciprocity")
Carpendale_2004_model <- c("Importance.for.individuals","Importance.for.society")
Foa_2012_model <- c("Concreteness","Uniqueness")
Bugental_2000_model <- c("Attachment","Affiliation.Coalition","Mating","Expected.Reciprocity","Equality")
Goffman_1959_model <- c("Valence.Evaluation","Affiliation.Coalition","Conflict","Negotiation","Coercion")
Hamilton_1981_model <- c("Valence.Evaluation","Equality")
Burton_1975_model <- c("Valence.Evaluation","Equality","Occupational")
Montgomery_1988_model <- c("Valence.Evaluation","Equality","Intimacy") 
Rands_1979_model <- c("Formality.and.Regulation","Socioemotional")
Weiss_1998_model <- c("Attachment","Affiliation.Coalition")

In [4]:
# define the model-comparison function
model_compare <- function(model1,model2,data){
    data_regression <- data[-which(names(data) %in% c(model1,model2))]
    
    AdjR_model1 <- c()
    AdjR_model2 <- c()
    BIC_model1 <- c()
    BIC_model2 <- c()
    
    for (trait in c(1:ncol(data_regression))) {
        # Extract each trait to be used as dependent variable in our modeling
        y = data_regression[,trait]
        
        # Model comparison in others' model
        f_model1 <- as.formula(paste("y ~ ", paste(model1, collapse = "+")))
        lm_model1 = lm(f_model1, data = data) # Modeling
        AdjR_model1 <- c(AdjR_model1, summary(lm_model1)$adj.r.squared) # Extract each model's index and append it to the dependent variables' lists
        BIC_model1 <- c(BIC_model1, BIC(lm_model1))
        
        # model comparison in our 5D model
        f_model2 <- as.formula(paste("y ~ ", paste(model2, collapse = "+")))
        lm_model2 = lm(f_model2, data = data) # Modeling
        AdjR_model2 <- c(AdjR_model2, summary(lm_model2)$adj.r.squared) # Extract each model's index and append it to the dependent variables' lists
        BIC_model2 <- c(BIC_model2, BIC(lm_model2))    
    }
    
    res_model1 <- c(mean(AdjR_model1),mean(BIC_model1))
    res_model2 <- c(mean(AdjR_model2),mean(BIC_model2))
    res_list <- list(res_model1,res_model2)
    
    return(res_list)
}

In [67]:
model_compares_plot = function(file_path){
    #1.Read Nation data
    data = read.csv(file_path, header = TRUE, stringsAsFactors = FALSE,row.names = 1)
    
    #2.Do model comparison
    model_comparesion_res <- data.frame(Model=character(),AdjR=numeric(),BIC=numeric(),Group=numeric(),Model_Names=character())

    for(i in c(1:length(models))){
        model_compare_list <- model_compare(FAVEE_model, get(models[i]), data=data)

        model_comparesion_res <- rbind(model_comparesion_res,c('FAVEE_Model',unlist(model_compare_list[1]),i,models[i]))
        model_comparesion_res <- rbind(model_comparesion_res,c('Others_Model',unlist(model_compare_list[2]),i,models[i]))
    }

    colnames(model_comparesion_res) <- c('Model', 'AdjR', 'BIC','Group','Model_Names')

    #re-define the model's name
    for(i in c(1:nrow(model_comparesion_res))){
        name <- strsplit(model_comparesion_res$Model_Names[i],'_')
        model_comparesion_res$Model_Names[i] = paste0(unlist(name)[1],', ',unlist(name)[2])
    }

    model_comparesion_res$BIC <- as.numeric(model_comparesion_res$BIC)
    model_comparesion_res$AdjR <- as.numeric(model_comparesion_res$AdjR)

    # compute the SE to plot data
    FAVEE_Model <- model_comparesion_res %>% filter(Model == "FAVEE_Model")
    Others_Model <- model_comparesion_res %>% filter(Model == "Others_Model")

    BIC_SE <- summarySE(model_comparesion_res, measurevar="BIC", groupvars=c("Model"))
    BIC_SE_FAVEE <- BIC_SE[BIC_SE$Model=='FAVEE_Model',]
    BIC_SE_OthersModel <- BIC_SE[BIC_SE$Model=='Others_Model',]

    AdjR_SE <- summarySE(model_comparesion_res, measurevar="AdjR", groupvars=c("Model"))
    AdjR_SE_FAVEE <- AdjR_SE[AdjR_SE$Model=='FAVEE_Model',]
    AdjR_SE_OthersModel <- AdjR_SE[AdjR_SE$Model=='Others_Model',]
    
    #3.Plot results
    #3.1 AdjR
    options(repr.plot.width=30,repr.plot.height=20)
    AdjR_plot = ggplot(model_comparesion_res)+
  
      #add mean and standard deviation for both groups
      geom_rect(xmin = AdjR_SE_FAVEE$AdjR-AdjR_SE_FAVEE$se, xmax = AdjR_SE_FAVEE$AdjR+AdjR_SE_FAVEE$se,
                ymin = 0, ymax =20, fill = "#6600CC", alpha = .05)+
      geom_vline(xintercept = AdjR_SE_FAVEE$AdjR, linetype = "solid", size = .5, alpha = .8, color = "#6600CC")+

      geom_rect(xmin = AdjR_SE_OthersModel$AdjR-AdjR_SE_OthersModel$se, xmax = AdjR_SE_OthersModel$AdjR+AdjR_SE_OthersModel$se,
                ymin = 0, ymax =20, fill = "#009688", alpha = .05)+
      geom_vline(xintercept = AdjR_SE_OthersModel$AdjR, linetype = "solid", size = .5, alpha = .8, color = "#009688")+

      #add point range
      geom_segment(data = FAVEE_Model, aes(x = AdjR, y = Model_Names, yend = Others_Model$Model_Names, , xend = Others_Model$AdjR),
                   color = "#aeb6bf", size = 14.5, alpha = .5) +

      #add points
      geom_point(aes(x = AdjR, y = Model_Names, color = Model), size = 18, show.legend = FALSE) +

      #color points
      scale_color_manual(values = c("#6600CC","#009688"))+
      #add point-range labels
      #geom_text(data = diff, aes(label = paste("D: ",diff), x = x_pos, y = Year), fill = "white", color = "black", size = 2.5, family = "Segoe UI") +

      #add annotations for mean and standard deviations
      #geom_text(x =  data_temp_summary_OthersModel$AdjR-1, y = 'Bugental5D', label = "MEAN", angle = 90, size = 4.5, color = "black", family = "Segoe UI")+
      #geom_text(x =  data_temp_summary_OthersModel$AdjR+ data_temp_summary_OthersModel$se-1, y = 'Bugental5D', label = "SE", angle = 90, size = 4.5, color = "black", family = "Segoe UI")+

      #add facets for more control
      facet_grid(Model_Names ~ ., scales = "free", switch = "y") +
      #add title
      #ggtitle("Model Comparison")+
      #theming
      theme_minimal()+
      theme(panel.grid.major.y = element_blank(),
            panel.grid.minor.y = element_blank(),
            panel.grid.major.x = element_blank(),
            panel.grid.minor.x = element_blank(),
            axis.title.y = element_text(size=60, face = "bold"),
            axis.title.x = element_text(size=60, vjust=-0.1, face = "bold"),
            axis.text.y = element_blank(),
            #axis.text.y = element_text(size=28),
            axis.text.x = element_text(size=55,color = "black"),
            axis.ticks.y = element_blank(),
            axis.ticks.x = element_line(color = "black"),
            text = element_text( color = "black"),
            strip.text.y.left  = element_text(angle = 0,size=60),
            panel.background = element_rect(fill = "white", color = "white"),
            strip.background = element_rect(fill = "white", color = "white"),
            strip.text = element_text(color = "black"),
            plot.background = element_rect(fill = "white", color = "white"),
            panel.spacing = unit(0, "lines"),
            plot.margin = margin(1,1,1,1, "cm"))+
        #theme(aspect.ratio = 0.05)+
        labs(y="Model")+
        labs(x="Adjusted R-Squared")
    
    #3.2 BIC
    BIC_plot = ggplot(model_comparesion_res)+
  
      #add mean and standard deviation for both groups
      geom_rect(xmin = BIC_SE_FAVEE$BIC-BIC_SE_FAVEE$se, xmax = BIC_SE_FAVEE$BIC+BIC_SE_FAVEE$se,
                ymin = 0, ymax =20, fill = "#6600CC", alpha = .05)+
      geom_vline(xintercept = BIC_SE_FAVEE$BIC, linetype = "solid", size = .5, alpha = .8, color = "#6600CC")+

      geom_rect(xmin = BIC_SE_OthersModel$BIC-BIC_SE_OthersModel$se, xmax = BIC_SE_OthersModel$BIC+BIC_SE_OthersModel$se,
                ymin = 0, ymax =20, fill = "#009688", alpha = .05)+
      geom_vline(xintercept = BIC_SE_OthersModel$BIC, linetype = "solid", size = .5, alpha = .8, color = "#009688")+

      #add point range
      geom_segment(data = FAVEE_Model, aes(x = BIC, y = Model_Names, yend = Others_Model$Model_Names, , xend = Others_Model$BIC),
                   color = "#aeb6bf", size = 14.5, alpha = .5) +

      #add points
      geom_point(aes(x = BIC, y = Model_Names, color = Model), size = 18, show.legend = FALSE) +

      #color points
      scale_color_manual(values = c("#6600CC","#009688"))+
      #add point-range labels
      #geom_text(data = diff, aes(label = paste("D: ",diff), x = x_pos, y = Year), fill = "white", color = "black", size = 2.5, family = "Segoe UI") +

      #add annotations for mean and standard deviations
      #geom_text(x =  data_temp_summary_OthersModel$BIC-1, y = 'Bugental5D', label = "MEAN", angle = 90, size = 4.5, color = "black", family = "Segoe UI")+
      #geom_text(x =  data_temp_summary_OthersModel$BIC+ data_temp_summary_OthersModel$se-1, y = 'Bugental5D', label = "SE", angle = 90, size = 4.5, color = "black", family = "Segoe UI")+

      #add facets for more control
      facet_grid(Model_Names ~ ., scales = "free", switch = "y") +
      #add title
      #ggtitle("Model Comparison")+
      #theming
      theme_minimal()+
      theme(panel.grid.major.y = element_blank(),
            panel.grid.minor.y = element_blank(),
            panel.grid.major.x = element_blank(),
            panel.grid.minor.x = element_blank(),
            axis.title.y = element_text(size=60),
            axis.title.x = element_text(size=60, vjust=-0.1, face = "bold"),
            axis.text.y = element_blank(),
            #axis.text.y = element_text(size=28),
            axis.text.x = element_text(size=55,color = "black"),
            axis.ticks.y = element_blank(),
            axis.ticks.x = element_line(color = "black"),
            text = element_text( color = "black"),
            strip.text.y.left  = element_text(angle = 0,size=60),
            panel.background = element_rect(fill = "white", color = "white"),
            strip.background = element_rect(fill = "white", color = "white"),
            strip.text = element_text(color = "black"),
            plot.background = element_rect(fill = "white", color = "white"),
            panel.spacing = unit(0, "lines"),
            plot.margin = margin(1,1,1,1, "cm"))+
        #theme(aspect.ratio = 0.05)+
        labs(y="Model")+
        labs(x="Bayesian Information Criterion (BIC)")
    
    result_df = list(BIC_SE=BIC_SE,AdjR_SE=AdjR_SE,
                     BIC_plot=BIC_plot,AdjR_plot=AdjR_plot)
    
    return(result_df)
}

In [6]:
regions = c(
    # English
    'USA','UK','Australia','South_africa',
    # Germany
    'Germany',
    # Japanese
    'Japan',
    # Hebrew
    'Israel',
    # Chinese
    'CHN','HK',
    # French
    'France',
    # Spanish
    'Spain','Mexico','Chile',
    # Portuguese
    'Portugal','Brazil',
    # Russian
    'Russia',
    # Arabic
    'Egypt','Qatar',
    'India')

In [79]:
BIC_df = data.frame(region=character(19),FAVEE=numeric(19),Others=numeric(19))
BIC_df$'region' = regions
AdjR_df = data.frame(region=character(19),FAVEE=numeric(19),Others=numeric(19))
AdjR_df$'region' = regions

for (region in regions){
    file_path = '../DataCleanPCA/output_data/cleaning_results/'
    file_path = paste(file_path,region,'/',region,'_dim_rel_scaled.csv',sep = '')
    region_result = model_compares_plot(file_path)
    
    # BIC
    BIC = region_result$BIC_SE
    BIC_df[BIC_df$'region'==region,'FAVEE'] = BIC[BIC$'Model'=='FAVEE_Model','BIC']
    BIC_df[BIC_df$'region'==region,'Others'] = BIC[BIC$'Model'=='Others_Model','BIC']
    
    png(paste('BIC/',region,'_BIC.png',seq=''), 
        bg="transparent",family = 'sans',units='in',width=30,height=20,res=300)
    print(region_result$BIC_plot)
    dev.off()
    
    # AdjR
    AdjR = region_result$AdjR_SE
    AdjR_df[AdjR_df$'region'==region,'FAVEE'] = AdjR[AdjR$'Model'=='FAVEE_Model','AdjR']
    AdjR_df[AdjR_df$'region'==region,'Others'] = AdjR[AdjR$'Model'=='Others_Model','AdjR']
    
    png(paste('AdjR/',region,'_AdjR.png',seq=''), 
        bg="transparent",family = 'sans',units='in',width=30,height=20,res=300)
    print(region_result$AdjR_plot)
    dev.off()    
} 