# Notebook for creating supplement feature metric table (Table S24)

In [1]:
# Configutation part #

# Output of Pycasso analysis.ipynb
scad_dir = './lr/final_output_scad'
# Output of Pycasso analysis.ipynb
mcp_dir = './lr/final_output_mcp'
# Output of Pycasso analysis.ipynb
l1_dir = './lr/final_output_l1'
# Output of ABESS analysis.ipynb
abess_dir = './abess/final_output_abess'
# Output of HHS analysis.ipynb
hhs_dir = './hhs/final_output_hhs'
# Output of Elastic net analysis.ipynb
elastic_net_dir = './elastic_net/final_output_elastic_net'

######################

In [1]:
import pandas as pd
import numpy as np

In [2]:
drugs = ['Rifampicin',
        'Isoniazid',
        'Pyrazinamide',
        'Ethambutol',
        'Streptomycin',
        'Kanamycin',
        'Amikacin',
        'Capreomycin',
        'Ofloxacin',
        'Moxifloxacin',
        'Ciprofloxacin',
        'Ethionamide',
        'Prothionamide']

In [24]:
def get_metric(data):
    data['temp'] = 1
    N_sel = np.sum(data['temp']*data['count'])/5
    maj_data = data[data['count'] >= 3]
    N_sel_maj = np.sum(maj_data['temp']*maj_data['count'])/5
    perc_maj = N_sel_maj/N_sel
    mean_maj_sel =  np.mean(maj_data['count'])
    return N_sel, perc_maj, mean_maj_sel

In [28]:
result = pd.DataFrame(index=drugs, columns=['N_sel_abess', 'N_sel_hhs', 'N_sel_scad',  
                                            'N_sel_mcp', 'N_sel_l1', 'N_sel_en',
                                            'perc_maj_abess', 'perc_maj_hhs', 'perc_maj_scad',
                                            'perc_maj_mcp', 'perc_maj_l1', 'perc_maj_en',
                                            'mean_maj_sel_abess', 'mean_maj_sel_hhs', 'mean_maj_sel_scad',
                                            'mean_maj_sel_mcp', 'mean_maj_sel_l1', 'mean_maj_sel_en'])

for drug in drugs:
    abess_data = pd.read_csv(f'{abess_dir}/{drug}.csv', sep='\t', index_col=None, header=0)
    N_sel_abess, perc_maj_abess, mean_maj_sel_abess = get_metric(abess_data)

    hhs_data = pd.read_csv(f'{hhs_dir}/{drug}.csv', sep='\t', index_col=None, header=0)
    N_sel_hhs, perc_maj_hhs, mean_maj_sel_hhs = get_metric(hhs_data)

    scad_data = pd.read_csv(f'{scad_dir}/{drug}.csv', sep='\t', index_col=None, header=0)
    N_sel_scad, perc_maj_scad, mean_maj_sel_scad = get_metric(scad_data)

    mcp_data = pd.read_csv(f'{mcp_dir}/{drug}.csv', sep='\t', index_col=None, header=0)
    N_sel_mcp, perc_maj_mcp, mean_maj_sel_mcp = get_metric(mcp_data)

    l1_data = pd.read_csv(f'{l1_dir}/{drug}.csv', sep='\t', index_col=None, header=0)
    N_sel_l1, perc_maj_l1, mean_maj_sel_l1 = get_metric(l1_data)

    en_data = pd.read_csv(f'{elastic_net_dir}/{drug}.csv', sep='\t', index_col=None, header=0)
    N_sel_en, perc_maj_en, mean_maj_sel_en = get_metric(en_data)

    result.loc[drug] = [N_sel_abess, N_sel_hhs, N_sel_scad,  
                        N_sel_mcp, N_sel_l1, N_sel_en,
                        perc_maj_abess, perc_maj_hhs, perc_maj_scad,
                        perc_maj_mcp, perc_maj_l1, perc_maj_en,
                        mean_maj_sel_abess, mean_maj_sel_hhs, mean_maj_sel_scad,
                        mean_maj_sel_mcp, mean_maj_sel_l1, mean_maj_sel_en]
result.to_csv('TableS24.csv', sep='\t', index=True, header=True)