In [33]:
import pandas as pd
from pathlib import Path

## Load cobre participant data and do some stuff

root_p = Path("__file__").resolve().parents[1] / "data" / "oasis"

df = pd.read_csv(root_p / "clarken_8_28_2023_13_54_33.csv")

In [34]:
df.head()

Unnamed: 0,MR ID,Subject,Age,Scanner,Scans,M/F,NPIQINF,NPIQINFX,DEL,DELSEV,...,NEOPIF,COGOTH,COGOTHX,COGOTHIF,COGOTH2,COGOTH2X,COGOTH2F,COGOTH3,COGOTH3X,COGOTH3F
0,OAS30001_MR_d0129,OAS30001,65.0,3.0T,"bold(3), T1w(2), T2w(2)",F,2.0,,0.0,,...,,0.0,,,,,,,,
1,OAS30001_MR_d0757,OAS30001,67.0,3.0T,"bold(2), dwi(1), minIP(1), swi(1), T1w(2), T2s...",F,2.0,,0.0,,...,,0.0,,,,,,,,
2,OAS30001_MR_d2430,OAS30001,71.0,3.0T,"angio(1), asl(1), dwi(2), fieldmap(3), FLAIR(1...",F,2.0,,0.0,,...,,0.0,,,0.0,,,0.0,,
3,OAS30001_MR_d3132,OAS30001,73.0,3.0T,"asl(2), bold(2), dwi(2), fieldmap(3), T1w(1), ...",F,2.0,,0.0,,...,,0.0,,,0.0,,,0.0,,
4,OAS30001_MR_d3746,OAS30001,,,"angio(1), asl(1), bold(2), fieldmap(3), FLAIR(...",F,2.0,,0.0,,...,,0.0,,,,,,,,


In [35]:
# Rename some columns and map values for sex
df.rename(
    columns={"M/F": "sex", "Age": "age", "Subject": "participant_id"},
    inplace=True,
)
df["sex"] = df["sex"].map({"M": 0, "F": 1})

In [36]:
# Find controls in one of two ways, due to how missing data was coded 
# Create a list of column names that need to be empty
empty_columns = ["NORMCOG", "DEMENTED", "MCIAMEM", "MCIAPLUS", "MCIAPLAN", "MCIAPATT", "MCIAPEX", "MCIAPVIS",
                 "MCINON1", "MCIN1LAN", "MCIN1ATT", "MCIN1EX", "MCIN1VIS", "MCINON2", "MCIN2LAN", "MCIN2ATT",
                 "MCIN2EX", "MCIN2VIS"]

# Use boolean indexing to filter rows based on conditions
control_df = df[((df["dx1"] == 'Cognitively healthy') & (df[empty_columns].isna().all(axis=1))) |
                   (df['NORMCOG'] == 1)]

In [37]:
# MCI only
# Filter rows where any of the MCI diagnoses are True
mci_df = df[df[['MCIAMEM', 'MCIAPLUS', 'MCIAPLAN', 'MCIAPATT', 'MCIAPEX',
                  'MCIAPVIS', 'MCINON1', 'MCIN1LAN', 'MCIN1ATT', 'MCIN1EX',
                  'MCIN1VIS', 'MCINON2', 'MCIN2LAN', 'MCIN2ATT', 'MCIN2EX', 'MCIN2VIS']].any(axis=1)]

In [38]:
# Alzheimers only
# Filter rows where DEMENTED column is True
ad_df = df[df['DEMENTED'] == 1]

In [40]:
# Add group column
control_df.loc[:, "group"] = "CON"
mci_df.loc[:, "group"] = "MCI"
ad_df.loc[:, "group"] = "AD"

In [41]:
def _mbi_conversion(df):
    # Calculate MBI domains
    df["decreased_motivation"] = df["APA"]
    df["emotional_dysregulation"] = df["DEPD"] + df["ANX"] + df["ELAT"]
    df["impulse_dyscontrol"] = df["AGIT"] + df["IRR"] + df["MOT"]
    df["social_inappropriateness"] = df["DISN"]
    df["abnormal_perception"] = df["DEL"] + df["HALL"]

    # Calculate MBI total score
    mbi_domains = [
        "decreased_motivation",
        "emotional_dysregulation",
        "impulse_dyscontrol",
        "social_inappropriateness",
        "abnormal_perception",
    ]
    df["MBI_total_score"] = df[mbi_domains].sum(axis=1)
    df["mbi_status"] = (df["MBI_total_score"] >= 1).astype(int)

    return df

In [42]:
mci_df = _mbi_conversion(mci_df.copy()) 
dementia_df = _mbi_conversion(dementia_df.copy()) 

In [None]:
# Next steps: match connectomes to patient df to check which ones we have. select earliest one with mbi status.
# then separate into positive and negative. select all the correct columns, then merge with controls



