In [1]:
"""
Generate descriptive SNBB figures that match the slide template colours.
--------------------------------------------------------------------------

Prereqs:
    pip install matplotlib pandas numpy seaborn
    (seaborn only used for its convenient styling helper)

Replace CSV_PATH if your file lives elsewhere.
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from matplotlib import font_manager as fm

In [2]:
SOFTWARE = "mrtrix3"
ATLAS = "schaefer2018tian2020_400_7"
RECONSTRUCTION = "SDStream"
SCALE = "None"
WEIGHT = "SIFT2"


DISTRIBUTION_METRIC = "qfmean"
DIFFUSION_MEASURES = ["adc", "fa", "rd", "ad"]
STRUCTURAL_MEASURES = ["gm_vol", "wm_vol", "csf_vol"]
CONNECTOME_MEASURES = [
    "degree",
    "eigenvector_centrality",
    "betweenness_centrality",
    "degree_norm",
    "regional_efficiency",
]

In [3]:
source = Path("/home/galkepler/Projects/athletes_brain/data/interim")
destination = Path("/home/galkepler/Projects/athletes_brain/data/processed")

# unique_measures = DIFFUSION_MEASURES + STRUCTURAL_MEASURES + CONNECTOME_MEASURES
unique_measures = DIFFUSION_MEASURES + STRUCTURAL_MEASURES

MAX_NUMBER_OF_WORKOUTS = 0

# write a query to detect atheletes (study == "sport", condition != "learning")
ATHLETES_QUERY = "(study == 'Sport') & (condition != 'Learning')"

results = {m: {} for m in unique_measures}

for m in unique_measures:
    print(f"Processing {m}")

    data = pd.read_csv(source / f"{m}.csv", index_col=0)
    # tag athletes
    data["target"] = data.eval(ATHLETES_QUERY)
    # drop learners
    data = data.drop(
        data[
            (data["condition"].isin(["Learning", "Novice Control"])) & (data["study"] == "Sport")
        ].index
    )
    data = data.drop(
        data[(data["exercise_frequency"] > MAX_NUMBER_OF_WORKOUTS) & (~data["target"])].index
    )
    # data[data["exercise_frequency"] < MAX_NUMBER_OF_WORKOUTS, "target"] = False
    data.to_csv(destination / f"{m}.csv")
    print(f"Found {data['subject_code'].nunique()} subjects with {m} data")
    # break
    # break
    # df = collect_data(m_data, m)
    # break

Processing adc


  data = pd.read_csv(source / f"{m}.csv", index_col=0)


Found 1018 subjects with adc data
Processing fa


  data = pd.read_csv(source / f"{m}.csv", index_col=0)


Found 993 subjects with fa data
Processing rd


  data = pd.read_csv(source / f"{m}.csv", index_col=0)


Found 993 subjects with rd data
Processing ad


  data = pd.read_csv(source / f"{m}.csv", index_col=0)


Found 993 subjects with ad data
Processing gm_vol


  data = pd.read_csv(source / f"{m}.csv", index_col=0)


Found 997 subjects with gm_vol data
Processing wm_vol


  data = pd.read_csv(source / f"{m}.csv", index_col=0)


Found 997 subjects with wm_vol data
Processing csf_vol


  data = pd.read_csv(source / f"{m}.csv", index_col=0)


Found 997 subjects with csf_vol data


In [4]:
d = data.drop_duplicates(subset=["subject_code"])
d[d["target"]]

Unnamed: 0,subject_code,dob,age_at_scan,sex,session_id,study,group,condition,gender,sexual_orientation,...,qualityratings_IQR,tiv,strip_score,height,education_level,psychometric_score_total,psychometric_score_verbal,psychometric_score_quantitative,psychometric_score_english,target
93524,360,1976-11-20,44.11,M,202012291715,Sport,Bjj,Control,cisgender,heterosexual,...,1.659603,1568.04,1.851534,,,,,,,True
98518,374,1996-12-21,24.19,F,202103010944,Sport,Climbing,Professional,cisgender,heterosexual,...,1.652754,1409.10,1.344629,,,,,,,True
98972,375,2000-04-21,20.88,F,202103091650,Sport,Climbing,Professional,cisgender,heterosexual,...,1.654010,1263.87,1.522478,,,,,,,True
100334,379,1998-10-19,22.39,M,202103091521,Sport,Climbing,Control,cisgender,heterosexual,...,1.661998,1561.08,0.804320,,,,,,,True
101242,380,1994-10-07,26.43,M,202103110957,Sport,Climbing,Control,cisgender,heterosexual,...,1.658640,1386.68,1.089114,,,,,,,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1791484,FTNC05,1995-11-26,29.40,M,202504211533,Sport,Fitness,Control,,,...,1.653931,1535.25,1.602234,181.0,,,,,,True
1823264,CLMC41,1990-03-06,35.09,F,202504061231,Sport,Climbing,Control,,,...,1.651191,1188.05,1.594662,158.0,,,,,,True
1906800,BJJC21,1996-08-05,28.84,M,202506080916,Sport,Bjj,Control,M,heterosexual,...,1.738703,1534.31,1.636156,177.0,bachelor's degree,682.0,131.0,124.0,150.0,True
1914064,CLMC31,2002-01-21,23.35,M,202505291252,Sport,Climbing,Control,,,...,1.652718,1618.61,1.629763,181.5,,,,,,True
