In [1]:
options (warn = -1)
library(openxlsx)
library(tidyverse)
library(reshape)
library(mgsub) # mapping

library(ggplot2)
library(ggpubr)
library(RColorBrewer)
library(corrplot)
library(corrr)

-- [1mAttaching core tidyverse packages[22m ------------------------ tidyverse 2.0.0 --
[32mv[39m [34mdplyr    [39m 1.1.0     [32mv[39m [34mreadr    [39m 2.1.4
[32mv[39m [34mforcats  [39m 1.0.0     [32mv[39m [34mstringr  [39m 1.5.0
[32mv[39m [34mggplot2  [39m 3.4.1     [32mv[39m [34mtibble   [39m 3.1.7
[32mv[39m [34mlubridate[39m 1.9.2     [32mv[39m [34mtidyr    [39m 1.3.0
[32mv[39m [34mpurrr    [39m 1.0.1     
-- [1mConflicts[22m ------------------------------------------ tidyverse_conflicts() --
[31mx[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31mx[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mi[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors

载入程辑包：'reshape'


The following object is masked from 'package:lubridate':

    stamp


The following object is masked from 'package:dplyr':

    rename


The following

In [2]:
# Organize relationship score
regions = c(
    # English
    'USA','UK','Australia','South Africa',
    # Germany
    'Germany',
    # Japanese
    'Japan',
    # Hebrew
    'Israel',
    # Chinese
    'CHN','HK(region)',

    # French
    'France',
    # Spanish
    'Spain','Mexico','Chile',
    # Portuguese
    'Portugal','Brazil',
    # Russian
    'Russia',
    # Arabic
    'Egypt','Qatar',
    'India')

In [3]:
file_path = 'output_data/pca_results/'

relationship_score_list = list()
for (region in dir(file_path)){
    file = paste(file_path,region,sep='')
    file = paste(paste(file,'/',sep=''),region,sep='')
    file = paste(file,'_scores_var_5c_33d.csv',sep='')
    favee_df = read.csv(file,row.names=1)
    
    region = gsub('HK','HK(region)',region) 
    region = gsub('South_africa','South Africa',region)
    relationship_score_list[[region]] = favee_df}

rdm_list = list()
for (region in regions){
    region_model = relationship_score_list[[region]]
    rdm_list[[region]] = c(dist(region_model,
                             method = 'euclidean'))}

combine_rdm = cbind(rdm_list[[1]],rdm_list[[2]])
# combine all regions' results
for (region in regions[3:length(regions)]){
    combine_rdm = cbind(combine_rdm,rdm_list[[region]])}
colnames(combine_rdm) = regions

assign labels to dimensions

In [4]:
# summarise model labels
model_label_df = read.xlsx('input_data/dimensions_check.xlsx',
                           sheet='Model_33d_5c')
model_label_df = rename(model_label_df,c('South.Africa'='South Africa'))
model_label_df = model_label_df[1:5,regions]
# transform dataframe into dictionary(list)
model_label_list = as.list(model_label_df)

# add model labal to raw models
relationship_score_name_list = list()
for (region in names(model_label_list)){
    # grab the model labels
    model_label = model_label_list[[region]]
    reverse = rep(1,5)
    col_numbers = seq(1,5)
    
    count = 1
    for (label in model_label){
        # label of one dimension
        temp = strsplit(label,'(',fixed = TRUE)[[1]]
        model_label[count] = temp[1]
        
        if (!is.na(temp[2])){
            # whether the dimension required to be reversed or not
            reverse[count] = -1
        }
        
        count = count + 1
    }
    region_df = relationship_score_list[[region]]
    colnames(region_df) = model_label
    # reverse the column
    for (col in col_numbers){region_df[col] = region_df[col]*reverse[col]}
    relationship_score_name_list[[region]] = region_df
    
    write.csv(region_df,paste0('output_data/pca_each_region_labels/',region,'.csv'))
    print(region)
    print(reverse)
}

[1] "USA"
[1] -1  1 -1  1  1
[1] "UK"
[1]  1 -1 -1 -1  1
[1] "Australia"
[1]  1  1 -1  1  1
[1] "South Africa"
[1] -1  1  1 -1 -1
[1] "Germany"
[1]  1  1 -1 -1  1
[1] "Japan"
[1] -1  1  1 -1  1
[1] "Israel"
[1] -1  1 -1 -1  1
[1] "CHN"
[1] -1  1  1  1 -1
[1] "HK(region)"
[1] -1  1  1  1  1
[1] "France"
[1]  1 -1 -1  1  1
[1] "Spain"
[1] -1  1 -1  1  1
[1] "Mexico"
[1] -1  1  1  1 -1
[1] "Chile"
[1] 1 1 1 1 1
[1] "Portugal"
[1]  1  1  1 -1 -1
[1] "Brazil"
[1] -1  1  1 -1 -1
[1] "Russia"
[1] -1 -1 -1 -1 -1
[1] "Egypt"
[1] -1  1 -1 -1 -1
[1] "Qatar"
[1] -1  1  1  1 -1
[1] "India"
[1]  1  1 -1 -1  1
