In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from pathlib import Path
import platform

In [None]:
## Paths Input Here
if platform.uname().system == 'Darwin': #------------------------------Mac OS X---------------------------------------------------------------
    conn_path = r'/Users/cu135/Dropbox (Partners HealthCare)/memory/analyses/roi-roi_correl/matrix_corrMx_AvgR.csv'
    clin_path = r'/Users/cu135/Dropbox (Partners HealthCare)/memory/patient_data/AD_Clinical_Data_CDR_ADAS_COG_13.xlsx'
    # clin_path = 'path to clinical values'
    out_dir = r'/Users/cu135/Dropbox (Partners HealthCare)/memory/analyses/AD_to_memory_net/permutations'
    #out_dir = r'path to out dir here'
    x_roi_names = r'/Users/cu135/Dropbox (Partners HealthCare)/memory/analyses/roi-roi_correl/matrix_corrMx_names.csv'
    #roi_names = '<path to roi name location>'
    print('I have set pathnames in the Mac style')
else: #----------------------------------------------------------------Windows----------------------------------------------------------------
    conn_path = r'C:\Users\calvin.howard\Dropbox (Partners HealthCare)\memory\analyses\roi-roi_correl\matrix_corrMx_AvgR.csv'
    clin_path = r'C:\Users\calvin.howard\Dropbox (Partners HealthCare)\memory\patient_data\AD_Clinical_Data_CDR_ADAS_COG_13.xlsx'
    # clin_path = 'path to clinical values'
    print(pathlib.Path(os.path.join(base,conn_path)))
    #out_dir = r'path to out dir here'
    x_roi_names = r'C:\Users\calvin.howard\Dropbox (Partners HealthCare)\memory\analyses\roi-roi_correl\matrix_corrMx_names.csv'
    #roi_names = '<path to roi name location>'
    print('I have set pathnames in the Windows style')
    out_dir = r'C:\Users\calvin.howard\Dropbox (Partners HealthCare)\memory\analyses\AD_to_memory_net\permutations'

## Import X Vals

In [None]:
try:
    name_df = pd.read_csv(x_roi_names, names=['arb'], header=None)
    # name_df.tail(5)
    colnames = name_df.arb.values.tolist()
    newname = []
    for name in colnames:
        arb = os.path.basename(name).split('.nii')[0]
        arb = arb[0:8]
        newname.append(arb)
    # print('NAMES: ',newname)

    x_df = pd.read_csv(conn_path, names=newname, header=None)#, ignore_index=True)
    x_df.index = newname
    x_df = x_df.iloc[8:,:8]
    x_df = x_df.reset_index(drop=True)
except:
    print('excepted')
    x_df = pd.read_csv(conn_path)
    colnames = x_df.columns.values
    newname = []
    for name in colnames:
        arb = os.path.basename(name).split('.nii')[0]
        arb = arb[0:8]
        newname.append(arb)
    x_df = x_df.set_axis(newname, axis=1, inplace=False)
    try:
        x_df.pop('Unnamed:')
    except:
        print('no x_df.pop(<name>) column to pop')

corr_df = x_df
display(corr_df)

In [None]:
#Assess connectivity values fundamentally
corr_description = corr_df.describe().transpose()
display(corr_description)

## Import Y Vals

In [None]:
sheet_name = 'AD_Clinical_Scores'
alphab_cols = 'C, D, E, F, G, J, V'
clin_df = pd.read_excel(clin_path, sheet_name=sheet_name, usecols=alphab_cols, nrows=50)
print('Num NaNs: ', clin_df.isna().sum().sum())
# clin_df.tail(5)
display(clin_df)

In [None]:
## Organize the clinical dataframe
# clin_df = clin_df.sort_values(by=['Patient # CDR, ADAS'], kind='quicksort', axis=0, ascending=True, ignore_index=True)
# clin_df.tail(10)

In [None]:
##One-hot-encode the dataframe | sham=0 stim=1 
shams = (clin_df['Randomization Arm'] == 'sham-stim')
clin_df.loc[shams, 'Randomization Arm'] = 0
stims = (clin_df['Randomization Arm'] == 'stim-sham')
clin_df.loc[stims, 'Randomization Arm'] = 1
clin_df.tail(10)

In [None]:
## Develop Understanding of the Clinical Data
try:
    clin_description = clin_df.describe().transpose()
    display(clin_description)
except:
    print('Failed to describe clinical dataframe, unknown cause')

## Merge Dataframes

In [None]:
corrd_df = corr_df.reset_index()
total_df = pd.concat([clin_df, corr_df], axis=1)
display(total_df)

In [None]:
#Handle NaNs
total_df = total_df.fillna(method='ffill')
print('Num NaNs: ', total_df.isna().sum().sum())

#Generate metrics for understanding
try:
    total_desc = total_df.describe().transpose()
    total_desc
except:
    print('Failed to generate total metrics, unkown cause')

## Select Split Vals

In [None]:
#subgrouping by age
metric = '% Change from baseline (ADAS-Cog11)'
metric_val = np.mean(total_df[metric])
print(f'{metric} = {metric_val}')

## Choose variables of interest.
x_name = '00_memor'
z_name = '% Change from baseline (ADAS-Cog11)'

#----------------------------------------------------------------User Inpuots Above

index_one = (total_df[metric] > metric_val) #example, all individuals over 65
index_two = (total_df[metric] <= metric_val) #example, all individuals under/equal to 65

##Example of how to use these indices to manipulate data:
#### clin_df.loc[index_one, '<names of relevant columns>'] = 0 ## can use iloc too. 


#----------------------------------------------------------------User Inpouts below
#set pos?
set_pos=True
if set_pos==True:
    total_df[z_name] = total_df[z_name]+abs(np.mean(total_df[z_name]))
    total_df[x_name] = np.abs(total_df[x_name])
    pos_val='absval'
    print('I will set all z values positive')
else:
    pos_val='nonabsval'
    print('I will set all vals positive')

#permute x and y together (sngl_perm), or separately (dbl perm)?
dbl_perm = False
if dbl_perm == True:
    perm_no='dbl_perm'
    print('I will double perm')
else:
    perm_no='sngl_perm'
    print('I will single perm')
    
total_df.tail(3)

## Perform Analysis

In [None]:
from scipy.stats import pearsonr
x_vals = total_df[x_name]
z_vals = total_df[z_name]
# print(len(x_vals[index_one]))
# print(len(x_vals[index_two]))

r_over, p_over = pearsonr(x_vals[index_one], z_vals[index_one])
r_under, p_under = pearsonr(x_vals[index_two], z_vals[index_two])
outcomes_df = pd.DataFrame({'r_over': r_over, 'p_over': p_over, 'r_under': r_under, 'p_under': p_under, 'index':[0]})

## Perform Permutation for Significance

In [None]:
indices = total_df.index.tolist()
num_perms = 10000
perm_indices = np.zeros(np.shape([len(indices), num_perms]))

r_over_perm = []; p_over_perm = []
r_under_perm = []; p_under_perm = []

for i in range(0, num_perms):
    if dbl_perm==False:
        perm_indices = np.random.permutation(indices)
        r_over_arb, p_over_arb = pearsonr(total_df[x_name].loc[perm_indices[0:16]], total_df[z_name].loc[perm_indices[0:16]])
        r_over_perm.append(r_over_arb); p_over_perm.append(p_over_arb)
        
        r_under_arb, p_under_arb = pearsonr(total_df[x_name].loc[perm_indices[16:]], total_df[z_name].loc[perm_indices[16:]])
        r_under_perm.append(r_under_arb); p_under_perm.append(p_under_arb)
    elif dbl_perm==True:
        x_perm_indices = np.random.permutation(indices)
        z_perm_indices = np.random.permutation(indices)
        r_over_arb, p_over_arb = pearsonr(total_df[x_name].loc[x_perm_indices[0:16]], total_df[z_name].loc[z_perm_indices[0:16]])
        r_over_perm.append(r_over_arb); p_over_perm.append(p_over_arb)
        
        r_under_arb, p_under_arb = pearsonr(total_df[x_name].loc[x_perm_indices[16:]], total_df[z_name].loc[z_perm_indices[16:]])
        r_under_perm.append(r_under_arb); p_under_perm.append(p_under_arb)
    else:
        print('failed')

In [None]:
perm_df = pd.DataFrame({'r_over_perm': r_over_perm, 'p_over_perm': p_over_perm, 'r_under_perm': r_under_perm, 'p_under_perm': p_under_perm})
print(f'Running {num_perms} permutations over {len(indices)} incidences \n outcome: {z_name} \n based on: {x_name} \n split by: {metric}')
perm_df['delta_r'] = perm_df['r_over_perm'] - perm_df['r_under_perm']
perm_df.tail(5)  

## Check Data

In [None]:
print(f'There were {len(perm_df)} permutations identified')

counts_over = np.count_nonzero(perm_df.r_over_perm[r_over_perm < r_over])#/len(perm_df[:])
counts_under = np.count_nonzero(perm_df.r_under_perm[r_under_perm < r_under])#/len(indices)

print(f'The were {counts_over} permuted observations >{metric} occuring below the experiminary value of {r_over}')
print(f'The were {counts_under} permuted observations <={metric} occuring below the experiminary threshold of {r_under}')

prop_over = counts_over/len(perm_df)
prop_under = counts_under/len(perm_df)
print('prop perm r under exptl r (>metric): ', prop_over)
print('prop perm r under exptl r (<=metric): ', prop_under)

fig = plt.figure();
ax1 = fig.add_subplot(121);
ax1.set_title(f'Hist {metric}>{metric_val}')
ax1.set_xlabel('R Value'); ax1.set_ylabel('counts')

ax2 = fig.add_subplot(122);
ax2.set_title(f'Hist {metric}<={metric_val}')
ax2.set_xlabel('R Value'); ax2.set_ylabel('counts')


ax1.hist(perm_df.r_over_perm);
ax2.hist(perm_df.r_under_perm);

In [None]:
fig2 = plt.figure(figsize= (15,5));
ax1 = fig2.add_subplot(131);
ax1.set_title(f'Hist+KDE {metric}>{metric_val}');
sns.distplot(perm_df.r_over_perm, kde=True, bins=100,
                 color='skyblue', hist_kws={'linewidth': 15, 'alpha':0.65});

ax2 = fig2.add_subplot(132);
ax2.set_title(f'Hist+KDE {metric}<={metric_val}');
sns.distplot(perm_df.r_under_perm, kde=True, bins=100,
                 color='skyblue', hist_kws={'linewidth': 15, 'alpha':.65});

ax3 = fig2.add_subplot(133);
ax3.set_title(f'Hist+KDE Delta');
sns.distplot(perm_df.delta_r, kde=True, bins=100,
             color='skyblue', hist_kws={'linewidth': 15, 'alpha':.65});

## Calculate Z&P Score

In [None]:
import scipy.stats
#Calc Z vals
ovr_avg = np.mean(perm_df.r_over_perm)
und_avg = np.mean(perm_df.r_under_perm)
ovr_std = np.std(perm_df.r_over_perm)
und_std = np.std(perm_df.r_under_perm)

dlt_avg = np.mean(perm_df.delta_r)
dlt_std = np.std(perm_df.delta_r)

ovr_z = (r_over - ovr_avg)/ovr_std
und_z = (r_under - und_avg)/und_std
r_delta = r_over - r_under
dlt_z = (r_delta - dlt_avg)/dlt_std

# Calc P Vals
ovr_p = scipy.stats.norm.sf(abs(ovr_z))
und_p = scipy.stats.norm.sf(abs(und_z))
dlt_p = scipy.stats.norm.sf(abs(dlt_z))
print('#----------------------------------------------------------------')
print(f'p value {metric}>{metric_val}: {ovr_p}')
print(f'p value {metric}<={metric_val}: {und_p}')
print(f'p value delta metric: {dlt_p}')
print('#----------------------------------------------------------------')

## Sanity Check

In [None]:
print('Vals over metric')
display(perm_df.r_over_perm)

In [None]:
#Save the results
final_df = pd.DataFrame({f'{metric}>{metric_val}': [r_over, p_over, num_perms, counts_over, prop_over,
                                                    ovr_z, ovr_p], 
                         f'{metric}<={metric_val}': [r_under, p_under, num_perms, counts_under, prop_under,
                                                     und_z, und_p],
                         f'delta_vals': [r_delta, None, num_perms, None, None,
                                         dlt_z, dlt_p]}, 
                        index= ['exptl_r', 'exptl_p', 'num_perms', 
                                'prop_r_obvs_under_r_exptl', 'prop_perm_occurs_under_exptl_r',
                                'z_val', 'p_val'])
display(final_df)

In [None]:
if os.path.isdir(out_dir)!=True:
    os.mkdir(out_dir)

analysis = f'abs_{z_name}_by_{x_name}_split_by_{metric}_{pos_val}_{perm_no}'

try:
    final_df.to_csv(os.path.join(out_dir, analysis+'.csv'))
    print(f'{analysis} saved to: \n {out_dir}')
except:
    pass

try:
    fig2.savefig(os.path.join(out_dir, analysis+'.png'))
    print(f'{analysis} saved to: \n {out_dir}')
except:
    pass

In [None]:
#-----
#Final notes; in order to compare the difference of the r values, run this again, but with all Y values +np.min()