In [1]:
library(FactoMineR)
library(factoextra)#fviz_eig
library(psych)

library(ggplot2)
library(pheatmap)
library(RColorBrewer)

library(tidyverse)
library(reshape)#melt

library(reshape2)
library(repr)
library(plyr)
library(Rmisc)
library(extrafont)
library(ggthemes)

library(lmtest)

library(ggpubr)

library(boot)

Loading required package: ggplot2

Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa


Attaching package: 'psych'


The following objects are masked from 'package:ggplot2':

    %+%, alpha


── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mlubridate[39m 1.9.3     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mpurrr    [39m 1.0.2     [32m✔[39m [34mtidyr    [39m 1.3.1
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mpsych[39m::[32m%+%()[39m    masks [34mggplot2[39m::%+%()
[31m✖[39m [34mpsych[39m::[32malpha()[39m  masks [34mggplot2[39m::alpha()
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[3

# Define Model

In [2]:
#define the model name
models <- c('FAVEE_model','Wish_1976_model','Triandis_1968_model','Marwell_1970_model',
            'Osgood_1957_model','Fiske_1992_model','Clark_2011_model','Carpendale_2004_model',
            'Foa_2012_model','Bugental_2000_model','Goffman_1959_model','Hamilton_1981_model',
            'Burton_1975_model','Montgomery_1988_model','Rands_1979_model','Weiss_1998_model')

# define the model
FAVEE_model <- c("Formality.and.Regulation","Activeness","Valence.Evaluation","Goods.Exchange","Equality")
Wish_1976_model <- c("Formality.and.Regulation","Equality","Valence.Evaluation","Activity.Intensity")
Triandis_1968_model <- c("Valence.Evaluation","Equality","Intimacy") 
Marwell_1970_model <- c("Visibility","Formality.and.Regulation","Intimacy")
Osgood_1957_model <- c("Valence.Evaluation","Equality","Activity.Intensity")
Fiske_1992_model <- c("Communal.Sharing","Equality","Strategic","Expected.Reciprocity")
Clark_2011_model <- c("Communal.Sharing","Strategic","Expected.Reciprocity")
Carpendale_2004_model <- c("Importance.for.individuals","Importance.for.society")
Foa_2012_model <- c("Concreteness","Uniqueness")
Bugental_2000_model <- c("Attachment","Affiliation.Coalition","Mating","Expected.Reciprocity","Equality")
Goffman_1959_model <- c("Valence.Evaluation","Affiliation.Coalition","Conflict","Negotiation","Coercion")
Hamilton_1981_model <- c("Valence.Evaluation","Equality")
Burton_1975_model <- c("Valence.Evaluation","Equality","Occupational")
Montgomery_1988_model <- c("Valence.Evaluation","Equality","Intimacy")
Rands_1979_model <- c("Formality.and.Regulation","Socioemotional")
Weiss_1998_model <- c("Attachment","Affiliation.Coalition")

# Model Compare Function

In [3]:
model_compare <- function(model1,model2){
    data_regression <- data[-which(names(data) %in% c(model1,model2))]
    
    AdjR_model1 <- c()
    AdjR_model2 <- c()
    BIC_model1 <- c()
    BIC_model2 <- c()
    R_squared_change_Fvalue <- c()
    coxtest_z <- c()
    
    for (trait in c(1:ncol(data_regression))) {
        # Extract each trait to be used as dependent variable in our modeling
        y = data_regression[,trait]
        
        # Model comparison in model 1
        f_model1 <- as.formula(paste("y ~ ", paste(model1, collapse = "+")))
        lm_model1 = lm(f_model1, data = data) # Modeling
        AdjR_model1 <- c(AdjR_model1, summary(lm_model1)$adj.r.squared) # Extract each model's index and append it to the dependent variables' lists
        BIC_model1 <- c(BIC_model1, BIC(lm_model1))
        
        # model comparison in model 2
        f_model2 <- as.formula(paste("y ~ ", paste(model2, collapse = "+")))
        lm_model2 = lm(f_model2, data = data) # Modeling
        AdjR_model2 <- c(AdjR_model2, summary(lm_model2)$adj.r.squared) # Extract each model's index and append it to the dependent variables' lists
        BIC_model2 <- c(BIC_model2, BIC(lm_model2))

        R_anova <- anova(lm_model1, lm_model2)$F[2]

        coxtest_z <- c(coxtest_z, coxtest(lm_model1,lm_model2)[2,3])

        if(!is.na(R_anova)){
            R_squared_change_Fvalue <- c(R_squared_change_Fvalue, R_anova)
        }else{
            R_squared_change_Fvalue <- c(R_squared_change_Fvalue, -anova(lm_model2, lm_model1)$F[2])
        }
        
    }
    
    R_squared_change_Fvalue <- ifelse(is.na(R_squared_change_Fvalue), 0, R_squared_change_Fvalue)
    
    res_model1 <- c(mean(AdjR_model1),mean(BIC_model1),mean(R_squared_change_Fvalue),mean(coxtest_z))
    res_model2 <- c(mean(AdjR_model2),mean(BIC_model2),mean(R_squared_change_Fvalue),mean(coxtest_z))
    res_list <- list(res_model1,res_model2)
    
    return(res_list)
}

# Do model comparison

In [4]:
region_list <- c('Australia', 'Brazil', 'Chile', 'CHN', 'Egypt', 'France', 'Germany', 'HK', 'India', 'Israel', 'Japan', 'Mexico', 'Portugal', 'Qatar', 'Russia', 'South_africa', 'Spain', 'UK', 'USA')
model_rank_regions <- data.frame(region=character(), Model=character(), Rank=numeric(), Coxtext_Z=numeric())
model_rank_res <- data.frame(Region = character(), Model = character(), AdjR = numeric(), BIC = numeric())

for(region_index in c(1:length(region_list))){
    model_rank_res_temp <- data.frame(Region = character(), Model = character(), AdjR = numeric(), BIC = numeric())
    region <- region_list[region_index]
    data <- read.csv(file =  paste0("output_data/cleaning_results/", region, "/", region, "_dim_rel_scaled.csv"), header = TRUE, stringsAsFactors = FALSE,row.names = 1)
    
    for(i in c(1:length(models))){

        data_regression <- data[-which(names(data) %in% get(c(models[i])))]

        AdjR_model <- c()
        BIC_model <- c()

        for (trait in c(1:ncol(data_regression))) {
            # Extract each trait to be used as dependent variable in our modeling
            y = data_regression[,trait]
            
            # Model comparison in model 1
            f_model <- as.formula(paste("y ~ ", paste(get(c(models[i])), collapse = "+")))
            lm_model = lm(f_model, data = data) # Modeling
            AdjR_model <- c(AdjR_model, summary(lm_model)$adj.r.squared) # Extract each model's index and append it to the dependent variables' lists
            BIC_model <- c(BIC_model, BIC(lm_model))  
        }

        model_rank_res_temp <- rbind(model_rank_res_temp, c(region, models[i], mean(AdjR_model), mean(BIC_model)) )

    }

    colnames(model_rank_res_temp) <- c('Region', 'Model', 'AdjR', 'BIC')

    model_rank_res_temp <- model_rank_res_temp %>% arrange(desc(AdjR))
    model_rank_res_temp <- cbind(model_rank_res_temp, c(1:16))
    colnames(model_rank_res_temp) <- c('Region', 'Model', 'AdjR', 'BIC', 'Rank')

    model_rank_res <- rbind(model_rank_res, model_rank_res_temp)

    colnames(model_rank_res_temp) <- c('Region', 'Model', 'AdjR', 'BIC', 'Rank')

}



In [5]:
print(model_rank_res)

          Region                 Model               AdjR              BIC Rank
1      Australia           FAVEE_model  0.500417051196312 363.977667310676    1
2      Australia       Wish_1976_model  0.461432925617939 367.655322912596    2
3      Australia   Bugental_2000_model  0.421008660876702  381.42276772025    3
4      Australia     Burton_1975_model  0.402023613032549 381.839665442097    4
5      Australia   Triandis_1968_model  0.400009234524276 381.885150002643    5
6      Australia Montgomery_1988_model  0.400009234524276 381.885150002643    6
7      Australia     Osgood_1957_model  0.389657869535446 386.657347191253    7
8      Australia      Fiske_1992_model   0.35292138889263 398.962604316685    8
9      Australia      Weiss_1998_model  0.333331623689663 393.746653516445    9
10     Australia    Marwell_1970_model  0.314763075095916 403.647570639783   10
11     Australia      Clark_2011_model  0.310234412878906 405.361471139846   11
12     Australia    Goffman_1959_model  

In [6]:
write.csv(model_rank_res, file = "output_data/compare_model/model_rank_regions.csv", append = FALSE, quote = TRUE, sep = ",")

"attempt to set 'append' ignored"
"attempt to set 'sep' ignored"


In [7]:
wilcox.test(model_rank_res$Rank[model_rank_res$Model == 'FAVEE_model'], model_rank_res$Rank[model_rank_res$Model == 'Bugental_2000_model'], paired = TRUE)

wilcox.test(model_rank_res$Rank[model_rank_res$Model == 'FAVEE_model'], model_rank_res$Rank[model_rank_res$Model == 'Wish_1976_model'], paired = TRUE)

"cannot compute exact p-value with ties"



	Wilcoxon signed rank test with continuity correction

data:  model_rank_res$Rank[model_rank_res$Model == "FAVEE_model"] and model_rank_res$Rank[model_rank_res$Model == "Bugental_2000_model"]
V = 14, p-value = 0.0007429
alternative hypothesis: true location shift is not equal to 0


"cannot compute exact p-value with ties"



	Wilcoxon signed rank test with continuity correction

data:  model_rank_res$Rank[model_rank_res$Model == "FAVEE_model"] and model_rank_res$Rank[model_rank_res$Model == "Wish_1976_model"]
V = 0, p-value = 0.0001142
alternative hypothesis: true location shift is not equal to 0


In [8]:
FAVEE_adjR <- model_rank_res$AdjR[model_rank_res$Model == 'FAVEE_model']
Bugental_adjR <- model_rank_res$AdjR[model_rank_res$Model == 'Bugental_2000_model']
Wish_adjR <- model_rank_res$AdjR[model_rank_res$Model == 'Wish_1976_model']

In [9]:
mean(as.numeric(Wish_adjR))

In [10]:
FAVEE_Bugental_diff <- as.numeric(FAVEE_adjR) - as.numeric(Bugental_adjR)
bmean <- function(x,i){
  return(mean(x[i]))
}
set.seed(2023)
bout <- boot(FAVEE_Bugental_diff,bmean,100000)
bci <- boot.ci(bout,type="bca",conf = .999)$bca[4:5]
bci
mean(FAVEE_Bugental_diff)

In [11]:
FAVEE_Wish_diff <- as.numeric(FAVEE_adjR) - as.numeric(Wish_adjR)
set.seed(2023)
bout <- boot(FAVEE_Wish_diff,bmean,100000)
bci <- boot.ci(bout,type="bca",conf = .999)$bca[4:5]
bci
mean(FAVEE_Wish_diff)

In [12]:
FAVEE_BIC <- model_rank_res$BIC[model_rank_res$Model == 'FAVEE_model']
Bugental_BIC <- model_rank_res$BIC[model_rank_res$Model == 'Bugental_2000_model']
Wish_BIC <- model_rank_res$BIC[model_rank_res$Model == 'Wish_1976_model']

In [13]:
FAVEE_Bugental_BIC_diff <- as.numeric(FAVEE_BIC) - as.numeric(Bugental_BIC)
bmean <- function(x,i){
  return(mean(x[i]))
}
set.seed(2023)
bout <- boot(FAVEE_Bugental_BIC_diff,bmean,100000)
bci <- boot.ci(bout,type="bca",conf = .999)$bca[4:5]
bci
mean(FAVEE_Bugental_BIC_diff)

In [14]:
FAVEE_Wish_BIC_diff <- as.numeric(FAVEE_BIC) - as.numeric(Wish_BIC)
set.seed(2023)
bout <- boot(FAVEE_Wish_BIC_diff,bmean,100000)
bci <- boot.ci(bout,type="bca",conf = .999)$bca[4:5]
bci
mean(FAVEE_Wish_BIC_diff)