In [222]:
import pandas as pd
import numpy as np

In [223]:
bleh = pd.read_csv('tajik_np_full_clinical_copy.csv')[['Wave','Clinic','Site','IDX','ID','A1','Behavior1','Behavior5','Behavior8','Behavior12',
                                                      'Behavior13','Behavior16','Behavior19','Behavior23',
                                                      'Behavior28','Behavior29','Behavior32','Behavior33',
                                                      'Behavior36','Behavior37','Behavior42','Behavior43',
                                                      'Behavior44','Behavior50','Behavior51','Behavior55',
                                                      'Behavior57','Behavior58','Behavior61','Behavior64',
                                                      'Behavior67','Behavior69','Behavior70','Behavior71',
                                                      'Behavior72','Behavior73']]

  bleh = pd.read_csv('tajik_np_full_clinical_copy.csv')[['Wave','Clinic','Site','IDX','ID','A1','Behavior1','Behavior5','Behavior8','Behavior12',


In [224]:
def get_age_category(age: int) -> str:
    if np.isnan(age):
        return age
    elif not isinstance(age, int):
        age = int(age)

    if age < 18:
        age_category = np.nan
        print('Warning: age < 18')
    elif age >= 18 and age < 30:
        age_category = 'age_18'
    elif age >= 30 and age < 40:
        age_category = 'age_30'
    elif age >= 40 and age < 50:
        age_category = 'age_40'
    elif age >= 50 and age < 60:
        age_category = 'age_50'
    elif age >= 60 and age < 70:
        age_category = 'age_60'
    elif age >= 70 and age < 80:
        age_category = 'age_70'
    elif age >= 80 and age < 90:
        age_category = 'age_80'
    else:
        age_category = np.nan
        print('that cat is too old')

    return age_category


def score_brief(df: pd.DataFrame, id_cols: list, age_col: str) -> pd.DataFrame:
    """
    Function to score responses to the BRIEF

    : df : Dataframe containing BRIEF item responses with the respective item number in the variable name
    : age_col : Column name containing age of the responder
    : id_cols : Column names containing unique identifiers for the responder

    Returns a dataframe organized by particpant ID, containing raw scores, T-scores, and PR for:
        Inhibit
        Shift
        Emotional Control
        Self-Monitor
        Initiate
        Working Memory
        Plan/ Organize
        Task Monitor
        Org. of Materials
        Behavioral Regulation
        Metacognition
        Global Executive
    """

    # check df
    for col in [*id_cols,age_col]:
        if col not in df.columns:
            raise Exception(f'{col} not in Dataframe')

    try:
        df[age_col].apply(lambda x: x if np.nan else int(x))
    except:
        raise Exception(f'Could not convert age column {age_col} to integer')
    
    # rename BRIEF item columns, replace invalid values with np.nan
    for col in df.drop(columns=[*id_cols,age_col]):
        item_no = int(''.join([i for i in [*col] if i.isdigit()]))
        new_name = f'b{item_no}'
        df.rename(columns={col:new_name}, inplace=True)
        for i, resp in enumerate(df[new_name]):
            if df.loc[df.index[i],new_name] not in [1,2,3,np.nan]:
                df.loc[df.index[i],new_name] = np.nan
                #print(f'Invalid value: {df.loc[df.index[i],col]}, index: {i}, column: {col}\nValue set to NaN')

    items = {
        'inhibit':['b5','b16','b29','b36','b43','b55','b58','b73'],
        'shift':['b8','b22','b32','b44','b61','b67'],
        'emotcontrol':['b1','b12','b19','b28','b33','b42','b51','b57','b69','b72'],
        'selfmonitor':['b13','b23','b37','b50','b64','b70'],
        'initiate':['b6','b14','b20','b25','b45','b49','b53','b62'],
        'workingmemory':['b4','b11','b17','b26','b35','b46','b56','b68'],
        'plan':['b9','b15','b21','b34','b39','b47','b54','b63','b66','b71'],
        'taskmonitor':['b2','b18','b24','b41','b52','b75'],
        'organization':['b3','b7','b30','b31','b40','b60','b65','b74'],
        'behavereg':['inhibit_raw','shift_raw','emotcont_raw','selfmonitor_raw'],
        'metacog':['initiate_raw','workingmemory_raw','plan_raw','taskmonitor_raw','organization_raw'],
        'globalexec':['behavereg_raw','metacog_raw']
    }

    # calculate raw (summed) scores
    # for key, values in items.items():
    #     subset = list(set(values).intersection(set(df.columns)))
    #     df[key+'_raw'] = df[subset].sum(axis='columns')

    # convert raw scores to t-scores and percents

    # create age category column
    df['age_category'] = df[age_col].apply(get_age_category)

    # get reference tables
    tables = pd.read_excel('BRIEF_tables.xls',sheet_name=None)

    # loop thru items
    for key, values in items.items():
        
        subset = list(set(values).intersection(set(df.columns)))

        if key in ['shift','selfmonitor','taskmonitor']:
            delta = 1
        elif key in ['inhibit','emotcontrol','initiate','workingmemory','plan','organization']:
            delta = 2

        # loop thru rows
        for i, row in df.iterrows():
            
            # if sufficient values, sum values
            if len([i for i in df.loc[df.index[i],subset] if not np.isnan(i)]) < (len(values) - delta):
                df.loc[df.index[i],[key+'_raw']] = np.nan
            else:
                df.loc[df.index[i],[key+'_raw']] = df.loc[df.index[i],subset].sum()

            age_category = df.loc[df.index[i],['age_category']].values[0]
            #print(age_category)
            raw_score = df.loc[df.index[i],[key+'_raw']].values[0]
            #print(raw_score)

            if not age_category or not raw_score:
                df.loc[df.index[i],[key+'_t']] = np.nan
                df.loc[df.index[i],[key+'_pr']] = np.nan
                #print('age or raw score missing')
            elif raw_score not in tables[key+'_t']['raw_score'].values:
                df.loc[df.index[i],[key+'_t']] = np.nan
                df.loc[df.index[i],[key+'_pr']] = np.nan
                #print('score out of range of reference table')
            else:
                #print(tables['inhibit_t'].loc[tables['inhibit_t']['raw_score']==raw_score,age_category].values)
                df.loc[df.index[i],[key+'_t']] = tables[key+'_t'].loc[tables[key+'_t']['raw_score']==raw_score,age_category].values[0]
                df.loc[df.index[i],[key+'_pr']] = tables[key+'_pr'].loc[tables[key+'_pr']['raw_score']==raw_score,age_category].values[0]

    df.drop(columns=['age_category'],inplace=True)

    return df


test_output = score_brief(df=bleh,id_cols=['Wave','Clinic','Site','IDX','ID'],age_col='A1')
test_output.to_csv('test.csv', index=False)