In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from os.path import join
from scipy.stats import ttest_rel
import warnings

warnings.filterwarnings("ignore")

In [None]:
PROJ_DIR = "/Volumes/projects_herting/LABDOCS/Personnel/Katie/deltaABCD_clustering/"
DATA_DIR = "data/"
FIGS_DIR = "figures/"
OUTP_DIR = "output/"

In [None]:
df = pd.read_pickle(join(PROJ_DIR, DATA_DIR, "data_qcd.pkl"))
# grap ppt ids for people who pass QC
ppts = df.filter(regex="rsfmri_c_.*change_score").dropna().index
df = None
# we need ids for the vectorized upper triangle!
network_names = ['dt', 'ca', 'smh', 'dla', 'ad', 'smm', 'sa', 'fo', 'vs', 'cgc', 'vta', 'rspltp']

In [None]:
#reading in the dataframes for each type of change score
sign_change = pd.read_pickle(join(PROJ_DIR, DATA_DIR, 'delta_rsFC-sign_changes.pkl'))
graph_df = pd.read_pickle(join(PROJ_DIR, OUTP_DIR, 'delta_rsFC-graph_measures-global.pkl'))
local_df = pd.read_pickle(join(PROJ_DIR, OUTP_DIR, 'delta_rsFC-graph_measures-local.pkl'))

In [None]:
# I'm just interested in ppts who pass the quality control process
quality_ppts = list(set(ppts) & set(graph_df.index))

graph_df = graph_df.loc[quality_ppts]
local_df = local_df.loc[quality_ppts]

# graph_df.head()

In [None]:
############### global measures ###################
# one per person per timepoint
# in graph_df
# measures are 'modularity' and 'global_efficiency'

# calculate descriptives for baseline and 2-year follow-up using .describe()

In [None]:
# global efficiency descriptives
graph_descr = pd.DataFrame()
graph_df_descr = graph_df.describe()
descriptives = pd.concat([graph_descr, graph_df_descr], axis=1)
descriptives.head()

In [None]:
descriptives.to_csv(join(PROJ_DIR, OUTP_DIR,'graph_df_descr.csv'))

In [None]:
# modularity descriptives
local_descr = pd.DataFrame()
local_df_descr = local_df.describe()
descriptive = pd.concat([local_descr, local_df_descr], axis=1)
descriptive.head()

In [None]:
descriptive.to_csv(join(PROJ_DIR, OUTP_DIR,'local_df_descr.csv'))

In [None]:
# now let's do a paired t-test to see if they increase or decrease

efficiency = graph_df.swaplevel(axis=1)['global_efficiency']
efficiency = efficiency.dropna()
#print(efficiency.describe())

In [None]:
change_eff = ttest_rel(efficiency['2_year_follow_up_y_arm_1'], efficiency['baseline_year_1_arm_1'])
# save out the ttest results (t, p) and the number of participants included in the test
# can find number of participants via len(efficiency.index)
num_ppts = len(efficiency.dropna().index)
print(num_ppts)

In [None]:
ttest = {
    't' : change_eff.statistic ,
    'p' : change_eff.pvalue,
    'num_ppts' : change_eff.df
}
ttest_change_eff = pd.DataFrame(ttest, index=[0])
print(ttest_change_eff)

In [None]:
ttest_change_eff.to_csv(join(PROJ_DIR, OUTP_DIR,'ttest_change_eff.csv'))

In [None]:
# now do the same for modularity

modularity = graph_df.swaplevel(axis=1)['modularity']
modularity = modularity.dropna()
# print(modularity.describe())

change_mod = ttest_rel(modularity['2_year_follow_up_y_arm_1'], modularity['baseline_year_1_arm_1'])
# save out the ttest results (t, p) and the number of participants included in the test
# can find number of participants via len(efficiency.index)
num_ppts = len(modularity.index)
# print(change_mod)
ttest = {
    't' : change_mod.statistic,
    'p' : change_mod.pvalue,
    'num_ppts' : change_mod.df
}
ttest_change_mod = pd.DataFrame(ttest, index=[0])
#print(ttest_change_eff)

# and save out the results (however you like)

ttest_change_mod.to_csv(join(PROJ_DIR, OUTP_DIR,'ttest_change_mod.csv'))

In [None]:
############# local measures ###################
# one per brain network per person per timepoint
# in local_df
# measures are 'clust_coeff' and 'btwn_cent'
local_df = local_df.dropna(how='all')

In [None]:
base_local = local_df.swaplevel(axis=0).loc['baseline_year_1_arm_1']
y2fu_local = local_df.swaplevel(axis=0).loc['2_year_follow_up_y_arm_1']

In [None]:
# use .describe() to get descriptives for each local measure and each network
# separately for baseline and 2-year follow-up

In [None]:
# baseline local descriptives
base_local_descr = pd.DataFrame()
base_descr = base_local.describe()
descriptives = pd.concat([base_local_descr, base_descr], axis=1)
descriptives.head()

In [None]:
descriptives.to_csv(join(PROJ_DIR, OUTP_DIR,'base_local_descr.csv'))

In [None]:
# # y2fu local descriptives
y2fu_local_descr = pd.DataFrame()
y2fu_descr = y2fu_local.describe()
descriptives = pd.concat([y2fu_local_descr, y2fu_descr], axis=1)
descriptives.head()

In [None]:
descriptives.to_csv(join(PROJ_DIR, OUTP_DIR,'y2fu_local_descr.csv'))

In [None]:
# now we'll assess change, again
# here's how you'd do it for one network:
# it's probably easiest to just make a temporary dataframe per network
# per measure to make sure the participants are aligned

In [None]:
# Katie's example:

# temp_cc_ad = pd.concat(
#     [base_local['clust_coeff']['ad'].rename('base'),
#       y2fu_local['clust_coeff']['ad'].rename('y2fu')], 
#       axis=1).dropna()


In [None]:
#CLUST_COEFF
change = pd.DataFrame(dtype=float)
ttest_df = pd.DataFrame(dtype=float)
for ntwk in base_local['clust_coeff'].columns:
    temp_base = base_local['clust_coeff'][ntwk]
    temp_base.name = f'{ntwk}-base'
    temp_base_df = pd.DataFrame(temp_base)
    #print(temp_base_df)
    temp_y2fu = y2fu_local['clust_coeff'][ntwk]
    temp_y2fu.name = f'{ntwk}-y2fu'
    temp_y2fu_df = pd.DataFrame(temp_y2fu)
    change = pd.concat((change, temp_base_df, temp_y2fu_df), axis=1).dropna()
    
    
    ttests = ttest_rel(change[f'{ntwk}-y2fu'], change[f'{ntwk}-base'])
    print(ntwk, ttests)
    ttest_df.at[ntwk, 't'] = ttests.statistic
    ttest_df.at[ntwk, 'p'] = ttests.pvalue
    ttest_df.at[ntwk, 'df'] = ttests.df
print(ttest_df)

In [None]:
ttest_df.to_csv(join(PROJ_DIR, OUTP_DIR,'clust_coeff_ttest.csv'))
change.to_csv(join(PROJ_DIR, OUTP_DIR,'clust_coeff_change.csv'))

In [None]:
#BTWN_CENT
change = pd.DataFrame(dtype=float)
ttest_df = pd.DataFrame(dtype=float)
for ntwk in base_local['btwn_cent'].columns:
    temp_base = base_local['btwn_cent'][ntwk]
    temp_base.name = f'{ntwk}-base'
    temp_base_df = pd.DataFrame(temp_base)
    #print(temp_base_df)
    temp_y2fu = y2fu_local['btwn_cent'][ntwk]
    temp_y2fu.name = f'{ntwk}-y2fu'
    temp_y2fu_df = pd.DataFrame(temp_y2fu)
    change = pd.concat((change, temp_base_df, temp_y2fu_df), axis=1).dropna()
    
    
    ttests = ttest_rel(change[f'{ntwk}-y2fu'], change[f'{ntwk}-base'])
    #print(ntwk, ttests)
    ttest_df.at[ntwk, 't'] = ttests.statistic
    ttest_df.at[ntwk, 'p'] = ttests.pvalue
    ttest_df.at[ntwk, 'df'] = ttests.df
print(change)

In [None]:
ttest_df.to_csv(join(PROJ_DIR, OUTP_DIR,'btwn_cent_ttest.csv'))
change.to_csv(join(PROJ_DIR, OUTP_DIR,'btwn_cent_change.csv'))