In [9]:
import numpy as np
import pandas as pd

def print_details(das_type, treatment = 'etanercept'):
    df = pd.read_csv(f'./data/das28_BIOP_{das_type}_{treatment}_outcomes.csv')
    
    bins, counts = np.unique(df['eular_bin'], return_counts = True)
    
    cases = (np.count_nonzero(df['class_poor'] == 1) / df.shape[0]) * 100
    print('N: ', df.shape[0])
    print(bins, counts)
    print(f'Cases: {cases:0.2f}%')
    
    for col in ['das_swol.0', 'das_tend.0', f'{das_type}.0', 'das_vas.0', 'HEIGHT', 'WEIGHT', 'AGEONSET', 'DISDUR', 'HAQ', 'HAD_A', 'HAD_D']:
        non0_idx = np.where(~pd.isnull(df[col]))[0]
        
        missing = ((df.shape[0] - len(non0_idx)) / df.shape[0]) * 100
        
        mean = np.mean(df.iloc[non0_idx][col])
        std = np.std(df.iloc[non0_idx][col])
        
        print(f'{col} - {mean:0.4f} - {std:0.2f} - {missing:0.2f}%')
        
    for bin_col in ['CONCURRENT_DMARD', 'SEX', 'SERO', 'FIRSTBIO']:
        non0_idx = np.where(~pd.isnull(df[bin_col]))[0]
        
        missing = ((df.shape[0] - len(non0_idx)) / df.shape[0]) * 100
        
        nnz = (np.count_nonzero(df.iloc[non0_idx][bin_col] == 1) / df.shape[0] * 100)
        
        print(f'{bin_col} - {nnz:0.2f}% - {missing:0.2f}%')

In [10]:
print_details('crp')

N:  778
['Good' 'Moderate' 'None'] [310 320 148]
Cases: 19.02%
das_swol.0 - 8.8445 - 5.20 - 0.00%
das_tend.0 - 14.6877 - 6.74 - 0.00%
crp.0 - 19.0739 - 25.07 - 0.00%
das_vas.0 - 74.7429 - 17.79 - 0.00%
HEIGHT - 164.3806 - 12.00 - 16.58%
WEIGHT - 78.1844 - 20.01 - 5.91%
AGEONSET - 47.3330 - 13.86 - 1.29%
DISDUR - 9.9401 - 10.35 - 1.29%
HAQ - 1.6085 - 0.65 - 12.85%
HAD_A - 8.0868 - 4.54 - 15.55%
HAD_D - 7.3841 - 4.02 - 15.68%
CONCURRENT_DMARD - 81.49% - 1.54%
SEX - 78.66% - 0.00%
SERO - 77.89% - 7.20%
FIRSTBIO - 90.62% - 0.90%


In [12]:
print_details('esr')

N:  693
['Good' 'Moderate' 'None'] [228 322 143]
Cases: 20.63%
das_swol.0 - 8.6479 - 5.15 - 0.00%
das_tend.0 - 14.5137 - 6.81 - 0.00%
esr.0 - 30.2554 - 24.46 - 0.00%
das_vas.0 - 74.7633 - 17.54 - 0.00%
HEIGHT - 164.4378 - 12.27 - 15.30%
WEIGHT - 78.0837 - 18.91 - 5.19%
AGEONSET - 47.5377 - 13.77 - 1.01%
DISDUR - 9.9188 - 10.42 - 1.01%
HAQ - 1.6305 - 0.66 - 15.44%
HAD_A - 7.9947 - 4.49 - 18.04%
HAD_D - 7.3298 - 4.11 - 18.18%
CONCURRENT_DMARD - 81.53% - 1.88%
SEX - 78.21% - 0.00%
SERO - 78.07% - 5.63%
FIRSTBIO - 90.76% - 1.15%
