In [10]:
import pandas as pd
import numpy as np
import EmotionEvaluationCorrelationHelper as helper
from scipy.stats import t, norm, pearsonr

In [11]:
df_study_3 = pd.read_csv('data/study3_olr.csv', delimiter='\t')
df_study_3_AA = df_study_3.loc[df_study_3['Ethinicity_1AA2CA']==1]
df_study_3_CA = df_study_3.loc[df_study_3['Ethinicity_1AA2CA']==2]

corr_df_study_CA = helper.getPairwiseCorrelations(df_study_3_CA,'Good','Pleasant')
corr_df_study_AA = helper.getPairwiseCorrelations(df_study_3_AA,'Good','Pleasant')

sorted_corrs_CA = corr_df_study_CA.sort_values('corr_val_dim',ascending=True).reset_index(drop=True)
sorted_corrs_AA = corr_df_study_AA.sort_values('corr_val_dim',ascending=True).reset_index(drop=True)

In [12]:
def independent_corr(xy, ab, n, n2 = None, twotailed=True, conf_level=0.95, method='fisher'):
    """
    Calculates the statistic significance between two independent correlation coefficients
    @param xy: correlation coefficient between x and y
    @param xz: correlation coefficient between a and b
    @param n: number of elements in xy
    @param n2: number of elements in ab (if distinct from n)
    @param twotailed: whether to calculate a one or two tailed test, only works for 'fisher' method
    @param conf_level: confidence level, only works for 'zou' method
    @param method: defines the method uses, 'fisher' or 'zou'
    @return: z and p-val
    """

    if method == 'fisher':
        xy_z = 0.5 * np.log((1 + xy)/(1 - xy))
        ab_z = 0.5 * np.log((1 + ab)/(1 - ab))
        if n2 is None:
            n2 = n

        se_diff_r = np.sqrt(1/(n - 3) + 1/(n2 - 3))
        diff = xy_z - ab_z
        z = abs(diff / se_diff_r)
        p = (1 - norm.cdf(z))
        if twotailed:
            p *= 2

        return z, p
    elif method == 'zou':
        L1 = rz_ci(xy, n, conf_level=conf_level)[0]
        U1 = rz_ci(xy, n, conf_level=conf_level)[1]
        L2 = rz_ci(ab, n2, conf_level=conf_level)[0]
        U2 = rz_ci(ab, n2, conf_level=conf_level)[1]
        lower = xy - ab - pow((pow((xy - L1), 2) + pow((U2 - ab), 2)), 0.5)
        upper = xy - ab + pow((pow((U1 - xy), 2) + pow((ab - L2), 2)), 0.5)
        return lower, upper
    else:
        raise Exception('Wrong method!')


In [13]:
n_ca = len(df_study_3_CA)
n_aa = len(df_study_3_AA)
emotions = []
AA_corr = []
CA_corr = []
z_score = []
p_val =[]
for emotion in sorted_corrs_AA['emotions']:
    AA_corr.append(sorted_corrs_AA.loc[sorted_corrs_AA['emotions']==emotion]['corr_val_dim'].values[0])
    CA_corr.append(sorted_corrs_CA.loc[sorted_corrs_CA['emotions']==emotion]['corr_val_dim'].values[0])
    emotions.append(emotion)
    z, p = independent_corr(AA_corr[-1],CA_corr[-1],n_aa,n_ca)
    z_score.append(z)
    p_val.append(p)
corr_diff_df = pd.DataFrame(data={
        'emotion':emotions,
        'Asian American R Value':AA_corr,
        'Caucasian American R Value':CA_corr,
        'Z-value':z_score,
        'p':p_val
    })

In [None]:
corr_diff_df