# Convert calibrated ADOS severity scores

1. Encode the conversion tables 
2. Convert the total, social-affect, and repetitive behavior severity estimates

In [1]:
import numpy as np
import pandas as pd
import pathlib as pal

from scipy.stats import pearsonr

In [2]:
# 📁 Base path = where this notebook lives
root_p = pal.Path().resolve()

# 📂 Input paths
data_p = root_p / '../../source_data/Data'
pheno_1_p = data_p / 'ABIDE1_Pheno_PSM_matched.tsv'
pheno_2_p = data_p / 'ABIDE2_Pheno_PSM_matched.tsv'
sev_css_p = data_p / 'Severity_LUT.txt'
sa_css_p = data_p / 'SA_CSS_LUT.txt'
rbb_css_p = data_p / 'RRB_CSS_LUT.txt'

# 📂 Output paths
fig_p = root_p / '../../output_data/Supplemental/convert_ados'
fig_p.mkdir(parents=True, exist_ok=True)
pheno_1_out_p = fig_p / 'ABIDE_1_Pheno_PSM_matched_ados.tsv'
pheno_2_out_p = fig_p / 'ABIDE_2_Pheno_PSM_matched_ados.tsv'

In [3]:
pheno_1 = pd.read_csv(pheno_1_p, sep='\t')
pheno_2 = pd.read_csv(pheno_2_p, sep='\t')

sev = pd.read_csv(sev_css_p, sep='\t', header=None)
sev.rename(columns={0:'css', 1:'module'}, inplace=True)
sev[['age_min', 'age_max']] = sev[2].str.split(':', n=1, expand=True)
sev[['val_min', 'val_max']] = sev[3].str.split(':', n=1, expand=True)
sev = sev[['css', 'module', 'age_min', 'age_max', 'val_min', 'val_max']]

sa = pd.read_csv(sa_css_p, sep='\t', header=None)
sa.rename(columns={0:'css', 1:'module'}, inplace=True)
sa[['age_min', 'age_max']] = sa[2].str.split(':', n=1, expand=True)
sa[['val_min', 'val_max']] = sa[3].str.split(':', n=1, expand=True)
sa = sa[['css', 'module', 'age_min', 'age_max', 'val_min', 'val_max']]

rbb = pd.read_csv(rbb_css_p, sep='\t', header=None)
rbb.rename(columns={0:'css', 1:'module'}, inplace=True)
rbb[['age_min', 'age_max']] = rbb[2].str.split(':', n=1, expand=True)
rbb[['val_min', 'val_max']] = rbb[3].str.split(':', n=1, expand=True)
rbb = rbb[['css', 'module', 'age_min', 'age_max', 'val_min', 'val_max']]

In [4]:
# Get an approximate SA score by adding Social and communicative together
pheno_1['ADOS_SA_approximated'] = pheno_1['ADOS_SOCIAL'] + pheno_1['ADOS_COMM']
pheno_2['ADOS_SA_approximated'] = pheno_2['ADOS_G_SOCIAL'] + pheno_2['ADOS_G_COMM']

In [5]:
def lookup(pheno, table, col='ADOS_TOTAL', col_alt=None):
    css = list()
    use_col = col
    for rid, row in pheno.iterrows():
        # Make a switch to use the alternative column if the first fails
        if col_alt is not None:
            if np.isnan(row[col]) and not np.isnan(row[col_alt]):
                use_col = col_alt
            else:
                use_col = col
        if not np.isnan(row[use_col]) and not np.isnan(row['ADOS_MODULE']):
            # Look up the score
            age = np.round(row['AGE_AT_SCAN'])
            module = row['ADOS_MODULE']
            value = row[use_col]
            # Check if the age is appropriate for the module
            ind_module = (table['module']==module).values
            age_max = np.max(table[ind_module]['age_max'].astype(float).values)
            age_min = np.min(table[ind_module]['age_min'].astype(float).values)
            if age>age_max:
                # This person got the wrong module
                # See if it is module 4
                if module==4:
                    # Fine, just take whatever is the max age, they are old anyway
                    #print('{} >>>> {} in module {}'.format(age, age_max, module))
                    age = age_max
                # Otherwise see if it is within 2 years of the max
                elif age-age_max < 3:
                    #print('{} >>>> {} in module {}'.format(age, age_max, module))
                    age = age_max
                else:
                    # Do nothing
                    pass
            elif age<age_min:
                print('!!!!!oh oh!!!!')
            # Find the correct element
            ind = ((table['module']==module).values & 
                   (table['age_min'].astype(float)<= age).values & 
                   (table['age_max'].astype(float)>= age).values &
                   (table['val_min'].astype(float)<= value).values &
                   (table['val_max'].astype(float)>= value).values)
            val = table[ind]['css'].values
            # See if we actually found anything
            if val.size == 0:
                #print('    Out with you!')
                #print('   ', age, age_max, module, value)
                css.append(None)
            else:
                css.append(val[0])
        else:
            css.append(None)
    return css

## Availability

In [6]:
pheno_1.query('not ADOS_TOTAL.isnull() or not ADOS_GOTHAM_TOTAL.isnull()')['DX_GROUP'].value_counts()

DX_GROUP
Autism     196
Control     32
Name: count, dtype: int64

In [7]:
pheno_1.query('not ADOS_GOTHAM_SEVERITY.isnull()')['DX_GROUP'].value_counts()

DX_GROUP
Autism     91
Control    16
Name: count, dtype: int64

In [8]:
pheno_2.query('not ADOS_G_TOTAL.isnull() or not ADOS_2_TOTAL.isnull()')['DX_GROUP'].value_counts()

DX_GROUP
Autism     209
Control     17
Name: count, dtype: int64

In [9]:
pheno_2.query('not ADOS_2_SEVERITY_TOTAL.isnull()')['DX_GROUP'].value_counts()

DX_GROUP
Autism    115
Name: count, dtype: int64

## Total scores

In [10]:
pheno_1['ADOS_CSS_proxy'] = lookup(pheno_1, sev, col='ADOS_TOTAL')
pheno_1['ADOS_CSS_proxy_fully'] = lookup(pheno_1, sev, col='ADOS_TOTAL', col_alt='ADOS_GOTHAM_TOTAL')
pheno_2['ADOS_CSS_proxy'] = lookup(pheno_2, sev, col='ADOS_G_TOTAL')
pheno_2['ADOS_CSS_proxy_fully'] = lookup(pheno_2, sev, col='ADOS_G_TOTAL', col_alt='ADOS_2_TOTAL')

In [11]:
pheno_1.query('not ADOS_CSS_proxy_fully.isnull()')['DX_GROUP'].value_counts()

DX_GROUP
Autism     190
Control     31
Name: count, dtype: int64

In [12]:
pheno_2.query('not ADOS_CSS_proxy_fully.isnull()')['DX_GROUP'].value_counts()

DX_GROUP
Autism     207
Control     16
Name: count, dtype: int64

In [13]:
pheno_1.query('not ADOS_TOTAL.isnull() and not ADOS_GOTHAM_TOTAL.isnull()')[['ADOS_TOTAL', 'ADOS_GOTHAM_TOTAL']].corr()

Unnamed: 0,ADOS_TOTAL,ADOS_GOTHAM_TOTAL
ADOS_TOTAL,1.0,0.919886
ADOS_GOTHAM_TOTAL,0.919886,1.0


In [14]:
pheno_2.query('not ADOS_G_TOTAL.isnull() and not ADOS_2_TOTAL.isnull()')[['ADOS_G_TOTAL', 'ADOS_2_TOTAL']].corr()

Unnamed: 0,ADOS_G_TOTAL,ADOS_2_TOTAL
ADOS_G_TOTAL,1.0,0.890681
ADOS_2_TOTAL,0.890681,1.0


In [15]:
pheno_1.query('not ADOS_CSS_proxy_fully.isnull() and not ADOS_GOTHAM_SEVERITY.isnull()')[['ADOS_CSS_proxy_fully', 'ADOS_GOTHAM_SEVERITY']].corr()

Unnamed: 0,ADOS_CSS_proxy_fully,ADOS_GOTHAM_SEVERITY
ADOS_CSS_proxy_fully,1.0,0.90092
ADOS_GOTHAM_SEVERITY,0.90092,1.0


In [16]:
df = pheno_1.query('not ADOS_CSS_proxy_fully.isnull() and not ADOS_GOTHAM_SEVERITY.isnull()')
corr, p_value = pearsonr(df['ADOS_CSS_proxy_fully'], df['ADOS_GOTHAM_SEVERITY'])
print(f"P-value: {p_value:.3f}")

P-value: 0.000


In [17]:
pheno_2.query('not ADOS_CSS_proxy_fully.isnull() and not ADOS_2_SEVERITY_TOTAL.isnull()')[['ADOS_CSS_proxy_fully', 'ADOS_2_SEVERITY_TOTAL']].corr()

Unnamed: 0,ADOS_CSS_proxy_fully,ADOS_2_SEVERITY_TOTAL
ADOS_CSS_proxy_fully,1.0,0.939199
ADOS_2_SEVERITY_TOTAL,0.939199,1.0


In [18]:
df = pheno_2.query('not ADOS_CSS_proxy_fully.isnull() and not ADOS_2_SEVERITY_TOTAL.isnull()')
corr, p_value = pearsonr(df['ADOS_CSS_proxy_fully'], df['ADOS_2_SEVERITY_TOTAL'])
print(f"P-value: {p_value:.3f}")

P-value: 0.000


In [19]:
# Combine the raw totals that were used to compute the CSS
pheno_1['ADOS_RAW_TOTAL_combined'] = pheno_1['ADOS_TOTAL']
pheno_1['ADOS_RAW_TOTAL_combined'] = pheno_1['ADOS_TOTAL'].fillna(pheno_1['ADOS_GOTHAM_TOTAL'])

pheno_2['ADOS_RAW_TOTAL_combined'] = pheno_2['ADOS_G_TOTAL']
pheno_2['ADOS_RAW_TOTAL_combined'] = pheno_2['ADOS_G_TOTAL'].fillna(pheno_2['ADOS_2_TOTAL'])

## Social scores

In [20]:
pheno_1.query('not ADOS_SA_approximated.isnull() and not ADOS_GOTHAM_SOCAFFECT.isnull()')[['ADOS_SA_approximated', 'ADOS_GOTHAM_SOCAFFECT']].corr()

Unnamed: 0,ADOS_SA_approximated,ADOS_GOTHAM_SOCAFFECT
ADOS_SA_approximated,1.0,0.931774
ADOS_GOTHAM_SOCAFFECT,0.931774,1.0


In [21]:
pheno_2.query('not ADOS_SA_approximated.isnull() and not ADOS_2_SOCAFFECT.isnull()')[['ADOS_SA_approximated', 'ADOS_2_SOCAFFECT']].corr()

Unnamed: 0,ADOS_SA_approximated,ADOS_2_SOCAFFECT
ADOS_SA_approximated,1.0,0.949779
ADOS_2_SOCAFFECT,0.949779,1.0


In [22]:
pheno_1['ADOS_CSS_SA_proxy'] = lookup(pheno_1, sev, col='ADOS_SA_approximated')
pheno_1['ADOS_CSS_SA_proxy_fully'] = lookup(pheno_1, sev, col='ADOS_SA_approximated', col_alt='ADOS_GOTHAM_SOCAFFECT')
pheno_2['ADOS_CSS_SA_proxy'] = lookup(pheno_2, sev, col='ADOS_SA_approximated')
pheno_2['ADOS_CSS_SA_proxy_fully'] = lookup(pheno_2, sev, col='ADOS_SA_approximated', col_alt='ADOS_2_SOCAFFECT')

In [23]:
# Combine the raw totals that were used to compute the CSS SA
pheno_1['ADOS_RAW_SA_combined'] = pheno_1['ADOS_SA_approximated']
pheno_1['ADOS_RAW_SA_combined'] = pheno_1['ADOS_RAW_SA_combined'].fillna(pheno_1['ADOS_GOTHAM_SOCAFFECT'])

pheno_2['ADOS_RAW_SA_combined'] = pheno_2['ADOS_SA_approximated']
pheno_2['ADOS_RAW_SA_combined'] = pheno_2['ADOS_RAW_SA_combined'].fillna(pheno_2['ADOS_2_SOCAFFECT'])

## RRB scores

In [24]:
pheno_1.query('not ADOS_STEREO_BEHAV.isnull() and not ADOS_GOTHAM_RRB.isnull()')[['ADOS_STEREO_BEHAV', 'ADOS_GOTHAM_RRB']].corr()

Unnamed: 0,ADOS_STEREO_BEHAV,ADOS_GOTHAM_RRB
ADOS_STEREO_BEHAV,1.0,0.775084
ADOS_GOTHAM_RRB,0.775084,1.0


In [25]:
pheno_2.query('not ADOS_G_STEREO_BEHAV.isnull() and not ADOS_2_RRB.isnull()')[['ADOS_G_STEREO_BEHAV', 'ADOS_2_RRB']].corr()

Unnamed: 0,ADOS_G_STEREO_BEHAV,ADOS_2_RRB
ADOS_G_STEREO_BEHAV,1.0,0.870761
ADOS_2_RRB,0.870761,1.0


In [26]:
pheno_1['ADOS_CSS_RRB_proxy'] = lookup(pheno_1, sev, col='ADOS_STEREO_BEHAV')
pheno_1['ADOS_CSS_RRB_proxy_fully'] = lookup(pheno_1, sev, col='ADOS_STEREO_BEHAV', col_alt='ADOS_GOTHAM_RRB')
pheno_2['ADOS_CSS_RRB_proxy'] = lookup(pheno_2, sev, col='ADOS_G_STEREO_BEHAV')
pheno_2['ADOS_CSS_RRB_proxy_fully'] = lookup(pheno_2, sev, col='ADOS_G_STEREO_BEHAV', col_alt='ADOS_2_RRB')

In [27]:
# Combine the raw totals that were used to compute the CSS RRB
pheno_1['ADOS_RAW_RRB_combined'] = pheno_1['ADOS_STEREO_BEHAV']
pheno_1['ADOS_RAW_RRB_combined'] = pheno_1['ADOS_RAW_RRB_combined'].fillna(pheno_1['ADOS_GOTHAM_RRB'])

pheno_2['ADOS_RAW_RRB_combined'] = pheno_2['ADOS_G_STEREO_BEHAV']
pheno_2['ADOS_RAW_RRB_combined'] = pheno_2['ADOS_RAW_RRB_combined'].fillna(pheno_2['ADOS_2_RRB'])

In [28]:
print('Next wont run unless converted to code')

Next wont run unless converted to code


In [29]:
pheno_1.to_csv(pheno_1_out_p, sep='\t', index=False)
pheno_2.to_csv(pheno_2_out_p, sep='\t', index=False)