# Group Differences

In [1]:
%load_ext autoreload
%autoreload 2

# Standard imports
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import pathlib
import json
import sys
import os
import re

sys.path.append("src/")
from utils import compute_group_differences

# Custom imports
from ToolBox.neuro import stats, plot, wrangling

# Define paths
class PATH:
    STUDY = pathlib.Path('.').parent.resolve()
    FIGURES = STUDY.joinpath('figures')
    RESULTS = STUDY.joinpath('results')
    DEMO = STUDY.joinpath('data/raw/CHARM/demographics.tsv')
    CLEAN_NAMES = STUDY.joinpath('data/clean_feature_names.json')
    FEATURES = dict(
        desikan=STUDY.joinpath('data/cntm-features/Desikan86_sift2/features'),
        schaefer=STUDY.joinpath('data/cntm-features/Schaefer220_sift2/features')
    )

# Load dict for clean feature names
with open(PATH.CLEAN_NAMES) as file:
    clean_names = json.load(file)

# Load demographics information
demo = pd.read_csv(PATH.DEMO, sep='\t')

## Desikan Connectomes

In [9]:
# Load network-level features and combine with demographic data
fpath = str(PATH.FEATURES['desikan'])
features = wrangling.load_computed_measures(fpath, scale='global')
df = (demo
    .reset_index()
    .merge(features.reset_index(), on='index')
    .drop('index', axis=1))

# Compute group differences
group_diff = compute_group_differences(df)
# group_diff['Measure'] = group_diff['Measure'].replace(clean_names)
# group_diff.to_csv(PATH.RESULTS.joinpath('Desikan86_group_differences.txt'), 
#                   index=False, sep='\t')

Rows: 313 Columns: 29
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (3): Subject, Sex, DX
dbl (26): Age, ADOS, SCQ, IQ, assortativity, characteristic_path_length, clu...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.


## Schaefer Connectomes

In [13]:
# Load network-level features and combine with demographic data
fpath = str(PATH.FEATURES['schaefer'])
features = wrangling.load_computed_measures(fpath, scale='global')
df = (demo
    .query("Subject != 'R0083_V0168'")  # Remove R0083_V0168
    .reset_index()
    .merge(features.reset_index(), on='index')
    .drop('index', axis=1))

# Compute group differences
group_diff = compute_group_differences(df)
group_diff['Measure'] = group_diff['Measure'].replace(clean_names)
group_diff.to_csv(PATH.RESULTS.joinpath('Schaefer220_group_differences.txt'), 
                  index=False, sep='\t')

Rows: 311 Columns: 29
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (3): Subject, Sex, DX
dbl (26): Age, ADOS, SCQ, IQ, assortativity, characteristic_path_length, clu...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
