In [None]:
#!/usr/bin/env python
# coding: utf-8

import numpy as np
import pandas as pd
import seaborn as sns

from os.path import join
import warnings

import matplotlib.pyplot as plt

warnings.filterwarnings("ignore")

In [None]:
PROJ_DIR = "/Volumes/projects_herting/LABDOCS/Personnel/Katie/deltaABCD_clustering/"
DATA_DIR = "data/"
FIGS_DIR = "figures/"
OUTP_DIR = "output/"

In [None]:
# df = pd.read_pickle(join(PROJ_DIR, DATA_DIR, "data_qcd.pkl"))

rsfc = pd.read_csv(
    "/Volumes/projects_herting/LABDOCS/PROJECTS/ABCD/Data/release5.0/core/imaging/mri_y_rsfmr_cor_gp_gp.csv",
    header=0,
    index_col=[0,1]
    ).dropna()

ppts = rsfc.filter(regex="rsfmri_c_ngd_.*change_score").dropna().index

tpts = [
    'baseline_year_1_arm_1',
    '2_year_follow_up_y_arm_1'
]

In [None]:
# make a list of all of the column names (resting-state FC estimates) that represent
# within-network connections
within_network = [i for i in rsfc.columns if i.split('_')[3] == i.split('_')[5]]
within_network.remove('rsfmri_c_ngd_n_ngd_n')

In [None]:
#base_df = rsfc.swaplevel(axis=0).loc['baseline_year_1_arm_1']
#y2fu_df = rsfc.swaplevel(axis=0).loc['2_year_follow_up_y_arm_1']

In [None]:
# we need ids for the vectorized upper triangle!
network_names = ['dt', 'ca', 'smh', 'dla', 'ad', 'smm', 'sa', 'fo', 'vs', 'cgc', 'vta', 'rspltp']

In [None]:
variable_df = pd.DataFrame(dtype=str)
for ntwk1 in network_names:
    i = network_names.index(ntwk1)
    for ntwk2 in network_names:
        j = network_names.index(ntwk2)
        variable_df.at[i,j] = f'rsfmri_c_ngd_{ntwk1}_ngd_{ntwk2}'


In [None]:
upper_tri = np.triu_indices(12)
upper_tri_vars = list(variable_df.values[upper_tri])


In [None]:
# reading in the dataframes for each type of change score
sign_change = pd.read_pickle(join(PROJ_DIR, DATA_DIR, 'delta_rsFC-sign_changes.pkl'))
rci = pd.read_pickle(join(PROJ_DIR, DATA_DIR, 'delta_rsFC-rci.pkl'))
rci_abs = pd.read_pickle(join(PROJ_DIR, DATA_DIR, 'delta_rsFC-rci_abs.pkl'))
change = pd.read_pickle(join(PROJ_DIR, DATA_DIR, 'delta_rsFC-change.pkl'))
change_abs = pd.read_pickle(join(PROJ_DIR, DATA_DIR, 'delta_rsFC-change_abs.pkl'))
change_plus1 = pd.read_pickle(join(PROJ_DIR, DATA_DIR, 'delta_rsFC-change_plus1.pkl'))


In [None]:
sign_changes = list(sign_change.iloc[0].unique())
sign_changes

In [None]:
measures = {
    'rci': rci,
    '|rci|': rci_abs,
    'apd': change,
    '|apd|': change_abs,
    'delta+1': change_plus1
}

In [None]:
for measure in measures.values():
    measure = measure[upper_tri_vars]

In [None]:
btwn = upper_tri_vars

In [None]:
# make a list per network of all of that network's connections
network_wise = {}
for i in [j.split('_')[3] for j in within_network]:
    network_wise[i] = [k for k in btwn if i == k.split('_')[3]] + [k for k in btwn if i == k.split('_')[5]]
network_wise

In [None]:
# make a list of all between-network connections
between_network = {}
for network in network_wise.keys():
    between_network[network] = [i for i in network_wise[network] if i.split('_')[3] != i.split('_')[5]]
print(between_network)

In [None]:
# compute the descriptive statistics for 
# 1. all connections of each network (see `network_wise`)
# 2. each within-network connection (see `within_network`)
# 3. all of the between-network connections, per network


In [None]:
network_descriptives = pd.DataFrame()

In [None]:
# network wise descriptives
for ntwk in network_wise.keys():
    temp_conns = network_wise[ntwk]
    for measure in measures.keys():
        temp_df = measures[measure][temp_conns]
        all_temp = temp_df.melt(value_name = ntwk)
        temp_desc = all_temp.describe()
        temp_desc.columns = [f'{ntwk}-{measure}']
        network_descriptives = pd.concat([network_descriptives,temp_desc], axis=1)
print(network_descriptives)

In [None]:
# multi indexing the network wise descriptives

columns = pd.MultiIndex.from_product([measures, network_names])
index = network_descriptives.index
multi_index_dataframe = pd.DataFrame(dtype=float, columns=columns, index=index)
for column in network_descriptives.columns:
    new_cols = column.split('-')
    multi_index_dataframe[(new_cols[1], new_cols[0])] = network_descriptives[column]
multi_index_dataframe

In [None]:
# melting and saving out the df
melt_ntwk_wise = multi_index_dataframe.melt()
melt_ntwk_wise
melt_ntwk_wise.to_pickle(join(PROJ_DIR, OUTP_DIR, 'melt_ntwk_wise.pkl'))

In [None]:
# network wise sign change descriptives 
network_descriptives = pd.DataFrame()
for signs in sign_changes:
    for ntwk in network_wise.keys():
        temp_conns = network_wise[ntwk]
        for measure in measures.keys():
            temp_df = measures[measure][temp_conns]
            sign_long = sign_change.loc[temp_df.index][temp_df.columns].melt(value_name = ntwk)
            keep = sign_long[sign_long[ntwk] == signs].index
            all_temp = temp_df.melt(value_name = ntwk)
            temp_desc = all_temp.loc[keep].describe()
            temp_desc.columns = [f'{ntwk}-{measure}-({signs})']
            network_descriptives = pd.concat([network_descriptives,temp_desc], axis=1)

In [None]:
network_descriptives.filter(like='(- to +)')

In [None]:
network_descriptives.to_csv(join(PROJ_DIR, OUTP_DIR, 'ntwk_wise_desc+sign_change.csv'))
network_descriptives.to_pickle(join(PROJ_DIR, OUTP_DIR, 'ntwk_wise_desc+sign_change.pkl'))

In [None]:
#  within-network connectivity descriptives
network_descriptives = pd.DataFrame()
for measure in measures.keys():
    for conn in within_network:
        network_name = conn.split('_')[3]
        temp = measures[measure][conn]
        temp_descr = temp.describe()
        temp_descr.name = f'{network_name}-{measure}'
        network_descriptives = pd.concat([network_descriptives, temp_descr], axis=1)
network_descriptives

In [None]:
network_descriptives.filter(like='|apd|')

In [None]:
network_descriptives.to_csv(join(PROJ_DIR, OUTP_DIR, 'within_netwk_desc.csv'))
network_descriptives.to_pickle(join(PROJ_DIR, OUTP_DIR, 'within_netwk_desc.pkl'))

In [None]:
#  within-network connectivity sign change descriptives 

network_descriptives = pd.DataFrame()
for signs in sign_changes:
    for measure in measures.keys():
        for conn in within_network:
            network_name = conn.split('_')[3]
            temp = measures[measure][conn]
            temp_descr = temp.describe()
            temp_descr.name = f'{ntwk}-{measure}-({signs})'
            network_descriptives = pd.concat([network_descriptives, temp_descr], axis=1)
#network_descriptives

In [None]:
network_descriptives.to_csv(join(PROJ_DIR, OUTP_DIR, 'within_netwk_desc+sign_change.csv'))
network_descriptives.to_pickle(join(PROJ_DIR, OUTP_DIR, 'within_netwk_desc+sign_change.pkl'))

In [None]:
# multi indexing the within network  descriptives
columns = pd.MultiIndex.from_product([measures, network_names])
index = network_descriptives.index
multi_index_dataframe = pd.DataFrame(dtype=float, columns=columns, index=index)
for column in network_descriptives.columns:
    new_cols = column.split('-')
    multi_index_dataframe[(new_cols[1], new_cols[0])] = network_descriptives[column]
multi_index_dataframe

In [None]:
# melting and saving out the df
melt_wthn_ntwk = multi_index_dataframe.melt()
melt_wthn_ntwk
melt_wthn_ntwk.to_pickle(join(PROJ_DIR, OUTP_DIR, 'melt_wthn_ntwk.pkl'))

In [None]:
# and now a scaffold for 3! 
# between_network is a dictionary of between-network connections for each network
# like network_wise, but without the within-network connections

In [None]:
# between network descriptives 
network_descriptives = pd.DataFrame()
for ntwk in between_network.keys():
    temp_conns = between_network[ntwk]
    for measure in measures.keys():
        temp_df = measures[measure][temp_conns]
        all_temp = temp_df.melt(value_name = ntwk)
        temp_desc = all_temp.describe()
        temp_desc.columns = [f'{ntwk}-{measure}']
        network_descriptives = pd.concat([network_descriptives,temp_desc], axis=1)

In [None]:
network_descriptives.to_csv(join(PROJ_DIR, OUTP_DIR, 'between_netwk_desc.csv'))
network_descriptives.to_pickle(join(PROJ_DIR, OUTP_DIR, 'between_netk_desc.pkl'))

In [None]:
# between network sign change descriptives 
network_descriptives = pd.DataFrame()
for signs in sign_changes:
    for ntwk in between_network.keys():
        temp_conns = between_network[ntwk]
        for measure in measures.keys():
            temp_df = measures[measure][temp_conns]
            all_temp = temp_df.melt(value_name = ntwk)
            temp_desc = all_temp.describe()
            temp_desc.columns = [f'{ntwk}-{measure}-({signs})']
            network_descriptives = pd.concat([network_descriptives,temp_desc], axis=1)
network_descriptives

In [None]:
network_descriptives.to_csv(join(PROJ_DIR, OUTP_DIR, 'between_netwk_desc+sign_change.csv'))
network_descriptives.to_pickle(join(PROJ_DIR, OUTP_DIR, 'between_netk_desc+sign_change.pkl'))

In [None]:
# multi indexing the between network descriptives
columns = pd.MultiIndex.from_product([measures, network_names])
index = network_descriptives.index
multi_index_dataframe = pd.DataFrame(dtype=float, columns=columns, index=index)
for column in network_descriptives.columns:
    new_cols = column.split('-')
    multi_index_dataframe[(new_cols[1], new_cols[0])] = network_descriptives[column]
multi_index_dataframe

In [None]:
# melting and saving out the df
melt_btwn_ntwk = multi_index_dataframe.melt()
melt_btwn_ntwk
melt_btwn_ntwk.to_pickle(join(PROJ_DIR, OUTP_DIR, 'melt_btwn_ntwk.pkl'))

In [None]:
# UNFINISHED EXPERIMENT
# multi indexing the between network SIGN CHANGES descriptives
columns = pd.MultiIndex.from_product([sign_changes, measures, network_names])
index = network_descriptives.index
multi_index_dataframe = pd.DataFrame(dtype=float, columns=columns, index=index)
for column in network_descriptives.columns:
    new_cols = column.split('-')
    multi_index_dataframe[(new_cols[2], new_cols[1], new_cols[0])] = network_descriptives[column]
multi_index_dataframe
#multi_index_dataframe[(new_cols[2])]
# we dont need measures!
# only need counts for stacked bar graphs