# Notebook for creating feature metric table (Table 3)

In [1]:
# Configutation part #

# Output of Pycasso analysis.ipynb
scad_file = './lr/final_output_scad/scad_feature_metrics.csv'
# Output of Pycasso analysis.ipynb
mcp_file = './lr/final_output_mcp/mcp_feature_metrics.csv'
# Output of Pycasso analysis.ipynb
l1_file = './lr/final_output_l1/l1_feature_metrics.csv'
# Output of ABESS analysis.ipynb
abess_file = './abess/final_output_abess/abess_feature_metrics.csv'
# Output of HHS analysis.ipynb
hhs_file = './hhs/final_output_hhs/hhs_feature_metrics.csv'
# Output of Elastic net analysis.ipynb
elastic_net_file = './elastic_net/final_output_elastic_net/elastic_net_feature_metrics.csv'
# List of resistance gene with corresponding drugs
resistance_gene_file = './db/resistance_genes.csv'

######################

In [2]:
import numpy as np
import pandas as pd

In [15]:
drugs = ['Rifampicin',
        'Isoniazid',
        'Pyrazinamide',
        'Ethambutol',
        'Streptomycin',
        'Kanamycin',
        'Amikacin',
        'Capreomycin',
        'Ofloxacin',
        'Moxifloxacin',
        'Ciprofloxacin',
        'Ethionamide',
        'Prothionamide']


res_data = pd.read_csv(resistance_gene_file, sep='\t', 
                               index_col=None, header=0)
res_genes = np.unique(res_data['gene'])

In [16]:
abess_result = pd.read_csv(abess_file, sep='\t', index_col=0, header=0)
hhs_result = pd.read_csv(hhs_file, sep='\t', index_col=0, header=0)
scad_result = pd.read_csv(scad_file, sep='\t', index_col=0, header=0)
mcp_result = pd.read_csv(mcp_file, sep='\t', index_col=0, header=0)
l1_result = pd.read_csv(l1_file, sep='\t', index_col=0, header=0)
elastic_result = pd.read_csv(elastic_net_file, sep='\t', index_col=0, header=0)

In [17]:
jacard = pd.DataFrame(index=drugs, dtype=float)
jacard['l1'] = l1_result['jacard']
jacard['scad'] = scad_result['jacard']
jacard['mcp'] = mcp_result['jacard']
jacard['elastic'] = elastic_result['jacard']
jacard['abess'] = abess_result['jacard']
jacard['hhs'] = hhs_result['jacard']
jacard = jacard.astype({x:float for x in jacard.columns})

true_gene = pd.DataFrame(index=drugs, dtype=float)
true_gene['l1'] = l1_result['sel_true_gene']/l1_result['res_gen']
true_gene['scad'] = scad_result['sel_true_gene']/l1_result['res_gen']
true_gene['mcp'] = mcp_result['sel_true_gene']/l1_result['res_gen']
true_gene['elastic'] = elastic_result['sel_true_gene']/l1_result['res_gen']
true_gene['abess'] = abess_result['sel_true_gene']/l1_result['res_gen']
true_gene['hhs'] = hhs_result['sel_true_gene']/l1_result['res_gen']
true_gene = true_gene.astype({x:float for x in true_gene.columns})

false_gene = pd.DataFrame(index=drugs)
false_gene['l1'] = l1_result['sel_false_gene']/(len(res_genes)-l1_result['res_gen'])
false_gene['scad'] = scad_result['sel_false_gene']/(len(res_genes)-l1_result['res_gen'])
false_gene['mcp'] = mcp_result['sel_false_gene']/(len(res_genes)-l1_result['res_gen'])
false_gene['elastic'] = elastic_result['sel_false_gene']/(len(res_genes)-l1_result['res_gen'])
false_gene['abess'] = abess_result['sel_false_gene']/(len(res_genes)-l1_result['res_gen'])
false_gene['hhs'] = hhs_result['sel_false_gene']/(len(res_genes)-l1_result['res_gen'])
false_gene = false_gene.astype({x:float for x in false_gene.columns})

In [18]:
jacard.columns = [f"{x}_jacard" for x in jacard.columns]
true_gene.columns = [f"{x}_true_gene" for x in true_gene.columns]
false_gene.columns = [f"{x}_false_gene" for x in false_gene.columns]

In [19]:
general = pd.concat([jacard, true_gene, false_gene], axis=1)

In [21]:
general.to_csv('Table3.csv', sep='\t', index=True, header=True)