# Back-test for analysing results from main.py

In [1]:
import os
import json
import numpy as np
import pandas as pd
import src.data.dataloader as dl
import src.utils.metric as customMetric

## Configure paths

In [2]:
tasks = ["Task_11", "Task_12", "Task_21", "Task_22"]
version = '/V5'
out_path_11 = "models"+ version +"/11/out.json"
out_path_12 = "models"+ version +"/12/out.json"
out_path_21 = "models"+ version +"/21/out.json"
out_path_22 = "models"+ version +"/22/out.json"

In [3]:
out_series_11 = pd.read_json(out_path_11, typ='series')
out_series_12 = pd.read_json(out_path_12, typ='series')
out_series_21 = pd.read_json(out_path_21, typ='series')
out_series_22 = pd.read_json(out_path_22, typ='series')

In [4]:
out_df_11 = pd.DataFrame({'audio': out_series_11.index, 'label':out_series_11.values})
out_df_12 = pd.DataFrame({'audio': out_series_12.index, 'label':out_series_12.values})
out_df_21 = pd.DataFrame({'audio': out_series_21.index, 'label':out_series_21.values})
out_df_22 = pd.DataFrame({'audio': out_series_22.index, 'label':out_series_22.values})

In [5]:
inter_json_dir = "SPRSound/test_json/inter_test_json"
intra_json_dir = "SPRSound/test_json/intra_test_json"

In [6]:
def genLabels(json_dir, task):
    dataframe = pd.DataFrame(columns = ["audio", "Task_{}1".format(task), "Task_{}2".format(task)])
    for recording in os.listdir(json_dir):
            name = recording[:-5]
            wav_name = name + ".wav"
            entry = name.split("_")
            patiend_id = int(entry[0])
            age = float(entry[1])
            gender = int(entry[2])
            loc = int(entry[3][-1])
            rec_id = int(entry[4])
            with open(os.path.join(json_dir, recording)) as f:
                rec_json = json.load(f)
            if task == 1:
                events = rec_json["event_annotation"]
                clip_prs = []
                count = 0
                for i, event in enumerate(events):
                    label_12 = event["type"].replace("+", "&")
                    label_11 = "Adventitious" if label_12 != "Normal" else label_12
                    new_row = pd.DataFrame({"audio": name+"_{}.wav".format(count), "Task_11": label_11, "Task_12":label_12}, index=[0])
                    dataframe = pd.concat([dataframe.loc[:], new_row]).reset_index(drop=True)
                    count = count + 1 
                    
            elif task == 2:
                label_22 = rec_json["record_annotation"]
                label_21 = (
                    "Adventitious"
                    if label_22 not in ("Normal", "Poor Quality")
                    else label_22
                )
                new_row = pd.DataFrame({"audio": name+".wav", "Task_21": label_21, "Task_22":label_22}, index=[0])
                dataframe = pd.concat([dataframe.loc[:], new_row]).reset_index(drop=True)
    return dataframe

In [7]:
label_intertest_1 = genLabels(inter_json_dir, 1)
label_intertest_2 = genLabels(intra_json_dir, 2)

## Generate analysis for Task 1

In [8]:
task_11_df = label_intertest_1.join(out_df_11.set_index('audio'), 
                                    on='audio').drop("Task_12", axis=1)
task_11_df["Task_11"] = task_11_df["Task_11"].apply(lambda x: (dl.label2idx(x, "Task_11")).cpu().detach().numpy())
task_11_df["label"] = task_11_df["label"].apply(lambda x: (dl.label2idx(x, "Task_11")).cpu().detach().numpy()) 
task_11_df["match"] = np.where(task_11_df["Task_11"]==task_11_df["label"], True, False)

In [9]:
task_12_df = label_intertest_1.join(out_df_12.set_index('audio'), 
                                    on='audio').drop("Task_11", axis=1)
task_12_df["Task_12"] = task_12_df["Task_12"].apply(lambda x: (dl.label2idx(x, "Task_12")).cpu().detach().numpy())
task_12_df["label"] = task_12_df["label"].apply(lambda x: (dl.label2idx(x, "Task_12")).cpu().detach().numpy()) 
task_12_df["match"] = np.where(task_12_df["Task_12"]==task_12_df["label"], True, False)

In [10]:
score_11 = customMetric.calc_score(task_11_df["Task_11"], task_11_df["label"], True, 1)
score_12 = customMetric.calc_score(task_12_df["Task_12"], task_12_df["label"], True, 1)

Sensitivity (SE): 0.9203
Specificity (SP): 0.8923
Average Score (AS): 0.9063
Harmonic Score (HS): 0.9061
Score: 0.9062
Sensitivity (SE): 0.7481
Specificity (SP): 0.8856
Average Score (AS): 0.8168
Harmonic Score (HS): 0.8110
Score: 0.8139


## Generate Analysis for Task 2

In [11]:
task_21_df = label_intertest_2.join(out_df_21.set_index('audio'), 
                                    on='audio').drop("Task_22", axis=1)
task_21_df["Task_21"] = task_21_df["Task_21"].apply(lambda x: (dl.label2idx(x, "Task_21")).cpu().detach().numpy())
task_21_df["label"] = task_21_df["label"].apply(lambda x: (dl.label2idx(x, "Task_21")).cpu().detach().numpy()) 
task_21_df["match"] = np.where(task_21_df["Task_21"]==task_21_df["label"], True, False)

In [12]:
task_22_df = label_intertest_2.join(out_df_22.set_index('audio'), 
                                    on='audio').drop("Task_21", axis=1)
task_22_df["Task_22"] = task_22_df["Task_22"].apply(lambda x: (dl.label2idx(x, "Task_22")).cpu().detach().numpy())
task_22_df["label"] = task_22_df["label"].apply(lambda x: (dl.label2idx(x, "Task_22")).cpu().detach().numpy()) 
task_22_df["match"] = np.where(task_22_df["Task_22"]==task_22_df["label"], True, False)

In [13]:
score_21 = customMetric.calc_score(task_21_df["Task_21"], task_21_df["label"], True, 2)
score_22 = customMetric.calc_score(task_22_df["Task_22"], task_22_df["label"], True, 2)

Sensitivity (SE): 0.9338
Specificity (SP): 0.9419
Average Score (AS): 0.9379
Harmonic Score (HS): 0.9378
Score: 0.9379
Sensitivity (SE): 0.8676
Specificity (SP): 0.9212
Average Score (AS): 0.8944
Harmonic Score (HS): 0.8936
Score: 0.8940
