In [8]:
import pandas as pd
import numpy as np
import glob
import re

import seaborn as sns
import matplotlib.pyplot as plt

# Familiar

## Import data

In [2]:
relationships = pd.read_csv('../DataClean_FAVEE_HPP/input_data/RelDim_labels/159relationships_fix_label.csv',index_col=0)
relationships = relationships.iloc[:,0].to_list()

relationships_258 = pd.read_csv('../DataClean_FAVEE_HPP/input_data/RelDim_labels/258relationships_fix_label.csv',index_col=0)
relationships_258 = relationships_258.iloc[:,0].to_list()

In [3]:
files_path = glob.glob('../DataClean_FAVEE_HPP/output_data/familiarity/*.csv')
# abstract regions we selected
regions = [re.search('(?<=familiarity\\\\)(.*).csv',path).group(1) for path in files_path]

## Caculate mean: each region/averaged regions

In [4]:
region_relationship_df = pd.DataFrame(index=regions, columns=relationships)
for reg_path in files_path:
    region = reg_path.split('\\')[1].split('.csv')[0]
    region_df = pd.read_csv(reg_path,index_col = 0)
    region_df = region_df.apply(pd.to_numeric)
    
    # different regions contain different columns
    results = re.search('CHN',reg_path)
    if results:
        region_df = region_df.iloc[:,:-1]
        region_df.columns = relationships_258
        region_df = region_df.loc[:,relationships]
    
    elif len(region_df.columns) == 160: # some regions contain subjects'ID
        region_df = region_df.iloc[:,:-1]
        region_df = region_df.apply(pd.to_numeric)
        
    region_df.columns = relationships
    # caculate mean results
    region_mean_df = region_df.mean(axis=0)
    # row: 19 regions; column: 159 relationships
    region_relationship_df.loc[region,] = region_mean_df

# all averaged regions' results
world_df = pd.DataFrame(region_relationship_df.mean(),columns=['familiar'])

In [6]:
region_relationship_df.to_csv('output_data/familiarity/familiarity_19regions.csv')
world_df.to_csv('output_data/familiarity/familiarity_19regions_averaged.csv')

## Caculate variability of familiarity

In [9]:
familiar_var = region_relationship_df.apply(np.var)
familiar_var.to_csv('output_data/familiarity/familiar_var.csv')

familiar_std = region_relationship_df.apply(np.std)
familiar_std.to_csv('output_data/familiarity/familiar_std.csv')

# The correlation between variability of _familiarity_ and _rating on 33D_

## Import data

In [2]:
# Packages
library(reshape)
library(tidyverse)
library(dplyr)
library(jmuOutlier) #permutation

In [3]:
loocv_rating = read.csv('output_data/loocv/rel_loocv_df_avg.csv',row.names=1)
loocv_rating['rel'] = rownames(loocv_rating)
familiar_var = read.csv('output_data/familiarity/familiar_var.csv',row.names=1)
familiar_var['rel'] = rownames(familiar_var)
familiar_std = read.csv('output_data/familiarity/familiar_std.csv',row.names=1)
familiar_std['rel'] = rownames(familiar_std)

In [4]:
rating_familiar = left_join(loocv_rating,familiar_var,by='rel')
rating_familiar = left_join(rating_familiar,familiar_std,by='rel')
rating_familiar = select(rating_familiar,-rel)
colnames(rating_familiar) = c('rating','familiar_var','familiar_std')
head(rating_familiar)

Unnamed: 0_level_0,rating,familiar_var,familiar_std
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>
1,0.3757564,201.80471,14.205799
2,0.4779412,81.34909,9.019373
3,0.4999824,154.35921,12.424138
4,0.5230791,147.01684,12.12505
5,0.5244336,282.10528,16.79599
6,0.5288313,185.26998,13.611392


## correlation coefficient

In [5]:
cor(rating_familiar,method='spearman')
cor(rating_familiar,method='pearson')

Unnamed: 0,rating,familiar_var,familiar_std
rating,1.0,-0.1790573,-0.1790573
familiar_var,-0.1790573,1.0,1.0
familiar_std,-0.1790573,1.0,1.0


Unnamed: 0,rating,familiar_var,familiar_std
rating,1.0,-0.1975111,-0.2164322
familiar_var,-0.1975111,1.0,0.9894052
familiar_std,-0.2164322,0.9894052,1.0


In [6]:
perm.cor.test(rating_familiar$'rating',rating_familiar$'familiar_var',
              "greater","pearson", num.sim = 20000)# report this results
perm.cor.test(rating_familiar$'rating',rating_familiar$'familiar_var',
              "greater","spearman", num.sim = 20000)

perm.cor.test(rating_familiar$'rating',rating_familiar$'familiar_std',
              "greater","pearson", num.sim = 20000)
perm.cor.test(rating_familiar$'rating',rating_familiar$'familiar_std',
              "greater","spearman", num.sim = 20000)