In [1]:
library(stringr)
library(ggplot2)

"程辑包'stringr'是用R版本4.1.3 来建造的"
"程辑包'ggplot2'是用R版本4.1.3 来建造的"


<table><tr><td bgcolor="green">Variables</font></td></tr></table>

In [4]:
regions_missing = c('')
regions_order = c('USA','UK','Australia','South Africa','Germany',
                  'Japan','Israel','CHN','HK(region)','France',
                  'Spain','Mexico','Chile','Portugal','Brazil',
                  'Russia','Egypt','Qatar','India')

if (sum(regions_missing!='')>0){
    regions_order = regions_order[-which(regions_order%in%regions_missing)]
}

y_variable = 'checked/life_expectancy'

<table><tr><td bgcolor="blue">Helper function</font></td></tr></table>

In [5]:
fix_region = function(df){
    regions = rownames(df)
    regions = str_replace(regions,'United States','USA')
    regions = str_replace(regions,'United Kingdom','UK')
    regions = str_replace(regions,'China','CHN')
    regions = str_replace(regions,'South_africa','South Africa')
    regions = str_replace(regions,'Spain','Spanish')
    regions = str_replace(regions,'HK','HK(region)')
    regions = gsub('Hong Kong SAR, CHN','HK(region)',regions,fixed=TRUE)
    regions = str_replace(regions,'Hong Kong','HK(region)')
    regions = str_replace(regions,'Spanish','Spain')
    regions = str_replace(regions,'Russian Federation','Russia')
    regions = str_replace(regions,'Russian','Russia')
    regions = str_replace(regions,'Egypt, Arab Rep.','Egypt')
    
    rownames(df) = regions
    colnames(df) = regions
    
    df = df[regions_order,regions_order]
    return(df)
}

lowerTriangle <- function(m){
  return(m[lower.tri(m,diag = FALSE)])
}

In [6]:
y_rdm = read.csv(paste0('../../rdm/',y_variable,'_dist.csv'), 
                      row.names=1, encoding = "UTF-8",check.names = FALSE)
y_rdm = fix_region(y_rdm)

# FAVEE-HPP rdm

In [8]:
root_path = '../../../RSA_Regression/output_data/models_rdm/'

full_rdm = read.csv(paste0(root_path,'full_feature/raw33d_dissim_dist.csv'),
                    row.names =1, check.names = FALSE)
full_rdm = full_rdm[regions_order,regions_order]

# FAVEE
favee_rdm = read.csv(paste0(root_path,'dimensional/favee_dissim_dist.csv'),
                    row.names =1, check.names = FALSE)
favee_rdm = favee_rdm[regions_order,regions_order]

formality_rdm = read.csv(paste0(root_path,'dimensional/formality_dissim_dist.csv'),
                    row.names =1, check.names = FALSE)
formality_rdm = formality_rdm[regions_order,regions_order]

activeness_rdm = read.csv(paste0(root_path,'dimensional/activeness_dissim_dist.csv'),
                    row.names =1, check.names = FALSE)
activeness_rdm = activeness_rdm[regions_order,regions_order]

valence_rdm = read.csv(paste0(root_path,'dimensional/valence_dissim_dist.csv'),
                    row.names =1, check.names = FALSE)
valence_rdm = valence_rdm[regions_order,regions_order]

exchange_rdm = read.csv(paste0(root_path,'dimensional/exchange_dissim_dist.csv'),
                    row.names =1, check.names = FALSE)
exchange_rdm = exchange_rdm[regions_order,regions_order]

equality_rdm = read.csv(paste0(root_path,'dimensional/equality_dissim_dist.csv'),
                    row.names =1, check.names = FALSE)
equality_rdm = equality_rdm[regions_order,regions_order]

# HPP
hpp_rdm = read.csv(paste0(root_path,'categorical/hpp_dissim_dist.csv'),
                    row.names =1, check.names = FALSE)
hpp_rdm = hpp_rdm[regions_order,regions_order]

hostile_rdm = read.csv(paste0(root_path,'categorical/hostile_dissim_dist.csv'),
                    row.names =1, check.names = FALSE)
hostile_rdm = hostile_rdm[regions_order,regions_order]

private_rdm = read.csv(paste0(root_path,'categorical/private_dissim_dist.csv'),
                    row.names =1, check.names = FALSE)
private_rdm = private_rdm[regions_order,regions_order]

public_rdm = read.csv(paste0(root_path,'categorical/public_dissim_dist.csv'),
                    row.names =1, check.names = FALSE)
public_rdm = public_rdm[regions_order,regions_order]

# Predict results

<table><tr><td bgcolor="blue">Helper function</font></td></tr></table>

In [9]:
x_variables = function(variables_names){
    variables <- list()
    for(i in c(1:length(variables_names))){
      variables[[i]] = get(variables_names[i])
    }
    return(variables)
}

In [10]:
permutation_result = function(culture_regress_standard_model){
    # create permutation matrix
    nperm <- 10000
    permf <- matrix(NA,nperm)
    permr2 <- matrix(NA,nperm)
    # contain beta
    permt <- matrix(NA,nperm,length(variables_names)) # 12variables
    colnames(permt) <- variables_names
    # caculate proportation of beta or F value over the model results
    permf_pvals <- NA 
    permt_pvals <- NA
    tstats = NA

    # the model results
    fstats <- culture_regress_standard_model$fstatistic[[1]]
    for (i in 1:length(variables_names)){
      tstats[i] <- culture_regress_standard_model$coefficients[1+i,1]
    }
    r2stats <- culture_regress_standard_model$r.squared
    print(paste('fstats:',fstats))
    #print('tstats:')
    #tstats
    print(paste('r2stats:',r2stats))

    set.seed(2)
    for (i in 1:nperm){
      psel <- sample(length(regions_order)) # permuted index, total 17 regions
      x <- do.call(cbind,lapply(variables,function(x) lowerTriangle(x[psel,psel]))) # generate permuted predictors
      rfit <- lm(scale(y_rdm_array)~scale(x)) # fit permuted model
      srfit <- summary(rfit)
      permf[i,1] <- srfit$fstatistic[1] # F-stat
      permr2[i,1] <- srfit$r.squared # R-squared
      #permf[,i] <- unlist(lapply(srfit,function(x) x$fstatistic[1])) # F-stat
      #permr2[,i] <- unlist(lapply(srfit,function(x) x$r.squared)) # R-squared
      for (j in 1:length(variables_names)){
        permt[i,j] <- srfit$coefficients[1+j,1] # each var t-stat
      }
      #print(i)
    }
    print(i)

    # F statistics
    permf_pvals <- mean(permf[,1] >= fstats)
    print(paste('permf_pvals',permf_pvals))

    # Each variable's regression
    for (i in 1:length(variables_names)){
      permt_pvals[i] <- mean(permt[,i] >= tstats[i])
    }

    permt_pvals <- as.data.frame(t(permt_pvals)) 

    colnames(permt_pvals) <- variables_names
    return(permt_pvals)
} 

***

In [12]:
variables_names_list <- list()
variables_names_list[['full_rdm']] <- 'full_rdm'


variables_names_list[['favee_rdm']] <- 'favee_rdm'
variables_names_list[['favee_separate']] <- c('formality_rdm','activeness_rdm','valence_rdm',
                                              'exchange_rdm','equality_rdm')
variables_names_list[['formality_rdm']] <- 'formality_rdm'
variables_names_list[['activeness_rdm']] <- 'activeness_rdm'
variables_names_list[['valence_rdm']] <- 'valence_rdm'
variables_names_list[['exchange_rdm']] <- 'exchange_rdm'
variables_names_list[['equality_rdm']] <- 'equality_rdm'


variables_names_list[['hpp_rdm']] <- 'hpp_rdm'
variables_names_list[['hpp_separate']] <- c('hostile_rdm','private_rdm','public_rdm')
variables_names_list[['hostile_rdm']] <- 'hostile_rdm'
variables_names_list[['private_rdm']] <- 'private_rdm'
variables_names_list[['public_rdm']] <- 'public_rdm'

In [13]:
y_rdm_array = lowerTriangle(y_rdm)
for (variables_label in names(variables_names_list)){
    print(variables_label)
    variables_names = variables_names_list[[variables_label]]
    variables = x_variables(variables_names=variables_names)
    
    x <- do.call(cbind,lapply(variables,function(x) lowerTriangle(x)))
    colnames(x) = variables_names
    culture_regress_standard <- lm(scale(y_rdm_array)~scale(x))
    culture_regress_standard_model <- summary(culture_regress_standard)
    print(culture_regress_standard_model)                          
                              
    p_value = permutation_result(culture_regress_standard_model)
    print(p_value)
    print('###############################################################')
}

[1] "full_rdm"

Call:
lm(formula = scale(y_rdm_array) ~ scale(x))

Residuals:
    Min      1Q  Median      3Q     Max 
-1.6388 -0.8063 -0.1452  0.6027  2.8662 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)   
(Intercept) -6.802e-17  7.468e-02   0.000  1.00000   
scale(x)     2.278e-01  7.490e-02   3.041  0.00273 **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.9766 on 169 degrees of freedom
Multiple R-squared:  0.05189,	Adjusted R-squared:  0.04628 
F-statistic: 9.249 on 1 and 169 DF,  p-value: 0.002732

[1] "fstats: 9.24880836901757"
[1] "r2stats: 0.0518870698415572"
[1] 10000
[1] "permf_pvals 0.1114"
  full_rdm
1   0.0916
[1] "###############################################################"
[1] "favee_rdm"

Call:
lm(formula = scale(y_rdm_array) ~ scale(x))

Residuals:
    Min      1Q  Median      3Q     Max 
-1.5229 -0.7982 -0.2145  0.6602  2.8632 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)   