In [1]:
from classification.cnn_preprocess import CNNPreprocess
from classification.cnn import CNN
import numpy as np
import json
from statistics import mean

def get_train_data(file_sets, preprocessed_data_path=None):
    data = {}
    if preprocessed_data_path:
        with open(preprocessed_data_path) as json_file:
            data = json.load(json_file)
    else:
        cnn_preprocess = CNNPreprocess(file_sets=file_sets)
        data = cnn_preprocess.preprocess_data()
    
    return data


In [None]:
# Evaluate CNN Prototype

def evaluate_cnn(file_sets, preprocessed_data_path=None):
    
    data = get_train_data(file_sets, preprocessed_data_path)
    cnn_preprocess = CNNPreprocess(file_sets=file_sets)

    X = np.array(data["values"])
    y = np.array(data["labels"])

    cnn = CNN()
    reports = cnn.validate_model_kfold(X, y)
    
    return reports

challenge_data_folder = "/Users/austinatmaja/Documents/4B/SmartScope/murmur-classifier/challenge_data"
    
file_sets = [
    ([f"{challenge_data_folder}/training-a/a" + str(i).zfill(4) for i in range(1, 410)], f"{challenge_data_folder}/training-a/"),
    ([f"{challenge_data_folder}/training-b/b" + str(i).zfill(4) for i in range(1, 491)], f"{challenge_data_folder}/training-b/"),
    ([f"{challenge_data_folder}/training-c/c" + str(i).zfill(4) for i in range(1, 32)], f"{challenge_data_folder}/training-c/"),
    ([f"{challenge_data_folder}/training-d/d" + str(i).zfill(4) for i in range(1, 56)], f"{challenge_data_folder}/training-d/"),
    ([f"{challenge_data_folder}/training-e/e" + str(i).zfill(5) for i in range(1, 2142)], f"{challenge_data_folder}/training-e/"),
    ([f"{challenge_data_folder}/training-f/f" + str(i).zfill(4) for i in range(1, 115)], f"{challenge_data_folder}/training-f/"),
]

classification_results = evaluate_cnn(file_sets)

sensitivities = [res['1']['recall'] for res in classification_results if '1' in res]
specificities = [res['0']['recall'] for res in classification_results if '0' in res]
accuracies = [res['accuracy'] for res in classification_results]

print(sensitivities, specificities, accuracies)

print("Average sensitivity ", mean(sensitivities))
print("Average specificity ", mean(specificities))
print("Average accuracy ", mean(accuracies))

In [2]:
challenge_data_folder = "/Users/austinatmaja/Documents/4B/SmartScope/murmur-classifier/challenge_data"
    
file_sets = [
    ([f"{challenge_data_folder}/training-a/a" + str(i).zfill(4) for i in range(1, 410)], f"{challenge_data_folder}/training-a/"),
    ([f"{challenge_data_folder}/training-b/b" + str(i).zfill(4) for i in range(1, 491)], f"{challenge_data_folder}/training-b/"),
    ([f"{challenge_data_folder}/training-c/c" + str(i).zfill(4) for i in range(1, 32)], f"{challenge_data_folder}/training-c/"),
    ([f"{challenge_data_folder}/training-d/d" + str(i).zfill(4) for i in range(1, 56)], f"{challenge_data_folder}/training-d/"),
    ([f"{challenge_data_folder}/training-e/e" + str(i).zfill(5) for i in range(1, 2142)], f"{challenge_data_folder}/training-e/"),
    ([f"{challenge_data_folder}/training-f/f" + str(i).zfill(4) for i in range(1, 115)], f"{challenge_data_folder}/training-f/"),
]

def save_cnn(file_sets, preprocessed_data_path=None):
    
    data = get_train_data(file_sets, preprocessed_data_path)
    cnn_preprocess = CNNPreprocess(file_sets=file_sets)

    X = np.array(data["values"])
    y = np.array(data["labels"])

    cnn = CNN()
    cnn.save_model(X, y)

save_cnn(file_sets)

Processed a file set
Processed a file set
Processed a file set
Processed a file set
Processed a file set
Processed a file set
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 00047: early stopping
INFO:tensorflow:Assets written to: ./cnn_model/assets


In [8]:
from classification.features import FeaturesProcessor
from classification.cnn_preprocess import CNNPreprocess
from classification.cnn import CNN
from classification.classifier import Classifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.model_selection import StratifiedKFold

import numpy as np

# Evaluate Combined Classifier

# c has range(1,32)
challenge_data_folder = "/Users/austinatmaja/Documents/4B/SmartScope/murmur-classifier/challenge_data"

file_sets = [
    ([f"{challenge_data_folder}/training-a/a" + str(i).zfill(4) for i in range(1, 410)], f"{challenge_data_folder}/training-a/"),
    ([f"{challenge_data_folder}/training-b/b" + str(i).zfill(4) for i in range(1, 491)], f"{challenge_data_folder}/training-b/"),
    ([f"{challenge_data_folder}/training-c/c" + str(i).zfill(4) for i in range(1, 32)], f"{challenge_data_folder}/training-c/"),
    ([f"{challenge_data_folder}/training-d/d" + str(i).zfill(4) for i in range(1, 56)], f"{challenge_data_folder}/training-d/"),
    ([f"{challenge_data_folder}/training-e/e" + str(i).zfill(5) for i in range(1, 2142)], f"{challenge_data_folder}/training-e/"),
    ([f"{challenge_data_folder}/training-f/f" + str(i).zfill(4) for i in range(1, 115)], f"{challenge_data_folder}/training-f/"),
]

# Returns a dictionary with the following structure
'''
{
    filename: {
        "cnn_features": <data>,
        "adaboost_features": <data>,
        "label": <label>
    }
}
'''
def get_combined_data(file_sets):
    # Get all abnormal records
    abnormal_records = set()
    for file_set in file_sets:
        prefix = file_set[1]
        with open("{prefix}RECORDS-abnormal".format(prefix=prefix)) as fp:
            for line in fp:
                l = line.rstrip("\n")
                abnormal_records.add(l)
    
    data = {}
    cnn_preprocess = CNNPreprocess()
    
    for file_set in file_sets:
        filenames, prefix = file_set[0], file_set[1]
        for fname in filenames:
            features_processor = FeaturesProcessor(fname)
            features = features_processor.get_all_features()
            data[fname] = {
                "adaboost_features": features
            }
            cnn_features = cnn_preprocess.process(fname)
            data[fname]["cnn_features"] = cnn_features
            if cnn_preprocess.remove_prefix(fname, prefix) in abnormal_records:
                data[fname]["label"] = 1
            else:
                data[fname]["label"] = 0
        print("Processed a file set")
    
    return data
    
combined_data = get_combined_data(file_sets)
fnames = sorted(combined_data.keys())
y = np.array([combined_data[fname]["label"] for fname in fnames])

# perform 80-20 train-test split
fnames_train, fnames_test, y_train, y_test = train_test_split(fnames, y, test_size=0.2)
X_cnn_train = np.array([combined_data[fname]["cnn_features"].tolist() for fname in fnames_train])
X_cnn_test = np.array([combined_data[fname]["cnn_features"].tolist() for fname in fnames_test])
X_adaboost_train = [combined_data[fname]["adaboost_features"] for fname in fnames_train]
X_adaboost_test = [combined_data[fname]["adaboost_features"] for fname in fnames_test]

# train CNN
cnn = CNN()
cnn_model = cnn.train_model(X_cnn_train, y_train)

# train Adaboost
adaboost = Classifier()
adaboost_model = adaboost.get_model()
adaboost_model = adaboost.train_model(adaboost_model, X_adaboost_train, y_train)

# invoke combined classifier
y_pred_adaboost = adaboost_model.predict_proba(X_adaboost_test)
y_pred_cnn = cnn_model.predict(X_cnn_test)
def get_prediction(pred_cnn, pred_adaboost):
    return 1 if pred_cnn[1] > 0.6 or pred_adaboost[1] > 0.6 else 0
y_pred = [get_prediction(y_pred_cnn[i], y_pred_adaboost[i]) for i in range(len(y_pred_cnn))]

# get prediction metrics for each of the classifiers in isolation
y_pred_cnn = np.argmax(cnn_model.predict(X_cnn_test), axis=1)
y_pred_adaboost = adaboost_model.predict(X_adaboost_test)

# evaluate results
print("COMBINED RESULTS")
report = classification_report(y_test, y_pred, output_dict=True)
print(report)
print("Sensitivity ", report['1']['recall'])
print("Specificity ", report['0']['recall'])
print("Accuracy ", report['accuracy'])

print("ADABOOST RESULTS")
report = classification_report(y_test, y_pred_adaboost, output_dict=True)
print(report)
print("Sensitivity ", report['1']['recall'])
print("Specificity ", report['0']['recall'])
print("Accuracy ", report['accuracy'])

print("CNN RESULTS")
report = classification_report(y_test, y_pred_cnn, output_dict=True)
print(report)
print("Sensitivity ", report['1']['recall'])
print("Specificity ", report['0']['recall'])
print("Accuracy ", report['accuracy'])

  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  n_fft, y.shape[-1]
  n_fft, y.shape[-1]
  n_fft, y.shape[-1]
  n_fft, y.shape[-1]


Processed a file set
Processed a file set
Processed a file set
Processed a file set
Processed a file set
Processed a file set
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 00041: early stopping
COMBINED RESULTS
{'0': {'precision': 0.9291845493562232, 'recall': 0.8608349900596421, 'f1-score': 0.8937048503611971, 'support': 503}, '1': {'precision': 0.6153846153846154, 'recall': 0.7724137931034483, 'f1-score': 0.6850152905198776, 'support': 145}, 'accuracy': 0.8410493827160493, 'macro avg': {'precision': 0.7722845