In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import OrderedDict
from sklearn.preprocessing import MultiLabelBinarizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from datetime import datetime, timedelta
import time
import warnings

# Suppress TensorFlow warnings
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)

In [2]:
import tensorflow as tf
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [3]:
from preprocess import do_all
from models import compile_model

In [4]:
# # Define a dictionary with code ranges and corresponding categories
# category_mapping = {
#     (1, 9): 'Intestinal Infectious Diseases',
#     (10, 18): 'Tuberculosis',
#     (20, 27): 'Zoonotic Bacterial Diseases',
#     (30, 41): 'Other Bacterial Diseases',
#     (42, 42): 'Human Immunodeficiency Virus',
#     (45, 49): 'Poliomyelitis And Other Non-Arthropod-Borne Viral Diseases Of Central Nervous System',
#     (50, 59): 'Viral Diseases Accompanied By Exanthem',
#     (60, 66): 'Arthropod-Borne Viral Diseases',
#     (70, 79): 'Other Diseases Due To Viruses And Chlamydiae',
#     (80, 88): 'Rickettsioses And Other Arthropod-Borne Diseases',
#     (90, 99): 'Syphilis And Other Venereal Diseases',
#     (100, 104): 'Other Spirochetal Diseases',
#     (110, 118): 'Mycoses',
#     (120, 129): 'Helminthiases',
#     (130, 136): 'Other Infectious And Parasitic Diseases',
#     (137, 139): 'Late Effects Of Infectious And Parasitic Diseases',

#     (140, 149): 'Malignant Neoplasm Of Lip, Oral Cavity, And Pharynx',
#     (150, 159): 'Malignant Neoplasm Of Digestive Organs And Peritoneum',
#     (160, 165): 'Malignant Neoplasm Of Respiratory And Intrathoracic Organs',
#     (170, 176): 'Malignant Neoplasm Of Bone, Connective Tissue, Skin, And Breast',
#     (179, 189): 'Malignant Neoplasm Of Genitourinary Organs',
#     (190, 199): 'Malignant Neoplasm Of Other And Unspecified Sites',
#     (200, 209): 'Malignant Neoplasm Of Lymphatic And Hematopoietic Tissue',
#     (210, 229): 'Benign Neoplasms',
#     (230, 234): 'Carcinoma In Situ',
#     (235, 238): 'Neoplasms Of Uncertain Behavior',
#     (239, 239): 'Neoplasms Of Unspecified Nature',

#     (240, 246): 'Disorders Of Thyroid Gland',
#     (249, 259): 'Diseases Of Other Endocrine Glands',
#     (260, 269): 'Nutritional Deficiencies',
#     (270, 279): 'Other Metabolic Disorders And Immunity Disorders',

#     (280, 289): 'Diseases Of Blood And Blood-Forming Organs',

#     (290, 294): 'Organic Psychotic Conditions',
#     (295, 299): 'Other Psychoses',
#     (300, 316): 'Neurotic Disorders, Personality Disorders, And Other Nonpsychotic Mental Disorders',
#     (317, 319): 'Intellectual Disabilities',

#     (320, 327): 'Inflammatory Diseases Of The Central Nervous System',
#     (330, 337): 'Hereditary And Degenerative Diseases Of The Central Nervous System',
#     (338, 338): 'Pain',
#     (339, 339): 'Other Headache Syndromes',
#     (340, 349): 'Other Disorders Of The Central Nervous System',
#     (350, 359): 'Disorders Of The Peripheral Nervous System',
#     (360, 379): 'Disorders Of The Eye And Adnexa',
#     (380, 389): 'Diseases Of The Ear And Mastoid Process',

#     (390, 392): 'Acute Rheumatic Fever',
#     (393, 398): 'Chronic Rheumatic Heart Disease',
#     (401, 405): 'Hypertensive Disease',
#     (410, 414): 'Ischemic Heart Disease',
#     (415, 417): 'Diseases Of Pulmonary Circulation',
#     (420, 429): 'Other Forms Of Heart Disease',
#     (430, 438): 'Cerebrovascular Disease',
#     (440, 449): 'Diseases Of Arteries, Arterioles, And Capillaries',
#     (451, 459): 'Diseases Of Veins And Lymphatics, And Other Diseases Of Circulatory System',

#     (460, 466): 'Acute Respiratory Infections',
#     (470, 478): 'Other Diseases Of Upper Respiratory Tract',
#     (480, 488): 'Pneumonia And Influenza',
#     (490, 496): 'Chronic Obstructive Pulmonary Disease And Allied Conditions',
#     (500, 508): 'Pneumoconioses And Other Lung Diseases Due To External Agents',
#     (510, 519): 'Other Diseases Of Respiratory System',

#     (520, 529): 'Diseases Of Oral Cavity, Salivary Glands, And Jaws',
#     (530, 539): 'Diseases Of Esophagus, Stomach, And Duodenum',
#     (540, 543): 'Appendicitis',
#     (550, 553): 'Hernia Of Abdominal Cavity',
#     (555, 558): 'Noninfective Enteritis And Colitis',
#     (560, 569): 'Other Diseases Of Intestines And Peritoneum',
#     (570, 579): 'Other Diseases Of Digestive System',

#     (580, 589): 'Nephritis, Nephrotic Syndrome, And Nephrosis',
#     (590, 599): 'Other Diseases Of Urinary System',
#     (600, 608): 'Diseases Of Male Genital Organs',
#     (610, 612): 'Disorders Of Breast',
#     (614, 616): 'Inflammatory Disease Of Female Pelvic Organs',
#     (617, 629): 'Other Disorders Of Female Genital Tract',

#     (630, 639): 'Ectopic And Molar Pregnancy And Other Pregnancy With Abortive Outcome',
#     (640, 649): 'Complications Mainly Related To Pregnancy',
#     (650, 659): 'Normal Delivery, And Other Indications For Care In Pregnancy, Labor, And Delivery',
#     (660, 669): 'Complications Occurring Mainly In The Course Of Labor And Delivery',
#     (670, 677): 'Complications Of The Puerperium',
#     (678, 679): 'Other Maternal And Fetal Complications',

#     (680, 686): 'Infections Of Skin And Subcutaneous Tissue',
#     (690, 698): 'Other Inflammatory Conditions Of Skin And Subcutaneous Tissue',
#     (700, 709): 'Other Diseases Of Skin And Subcutaneous Tissue',

#     (710, 719): 'Arthropathies And Related Disorders',
#     (720, 724): 'Dorsopathies',
#     (725, 729): 'Rheumatism, Excluding The Back',
#     (730, 739): 'Osteopathies, Chondropathies, And Acquired Musculoskeletal Deformities',

#     (740, 759): 'Congenital Anomalies',

#     (760, 763): 'Maternal Causes Of Perinatal Morbidity And Mortality',
#     (764, 779): 'Other Conditions Originating In The Perinatal Period',

#     (780, 789): 'Symptoms',
#     (790, 796): 'Nonspecific Abnormal Findings',
#     (797, 799): 'Ill-Defined And Unknown Causes Of Morbidity And Mortality',

#     (800, 804): 'Fracture Of Skull',
#     (805, 809): 'Fracture Of Spine And Trunk',
#     (810, 819): 'Fracture Of Upper Limb',
#     (820, 829): 'Fracture Of Lower Limb',
#     (830, 839): 'Dislocation',
#     (840, 848): 'Sprains And Strains Of Joints And Adjacent Muscles',
#     (850, 854): 'Intracranial Injury, Excluding Those With Skull Fracture',
#     (860, 869): 'Internal Injury Of Chest, Abdomen, And Pelvis',
#     (870, 879): 'Open Wound Of Head, Neck, And Trunk',
#     (880, 887): 'Open Wound Of Upper Limb',
#     (890, 897): 'Open Wound Of Lower Limb',
#     (900, 904): 'Injury To Blood Vessels',
#     (905, 909): 'Late Effects Of Injuries, Poisonings, Toxic Effects, And Other External Causes',
#     (910, 919): 'Superficial Injury',
#     (920, 924): 'Contusion With Intact Skin Surface',
#     (925, 929): 'Crushing Injury',
#     (930, 939): 'Effects Of Foreign Body Entering Through Orifice',
#     (940, 949): 'Burns',
#     (950, 957): 'Injury To Nerves And Spinal Cord',
#     (958, 959): 'Certain Traumatic Complications And Unspecified Injuries',
#     (960, 979): 'Poisoning By Drugs, Medicinals And Biological Substances',
#     (980, 989): 'Toxic Effects Of Substances Chiefly Nonmedicinal As To Source',
#     (990, 995): 'Other And Unspecified Effects Of External Causes',
#     (996, 999): 'Complications Of Surgical And Medical Care, Not Elsewhere Classified',
# }

In [5]:
# Alternatively to use categories instead of sub-categories

category_mapping = {
    (1, 139): 'Infectious And Parasitic Diseases',
    (140, 239): 'Neoplasms',
    (240, 279): 'Endocrine, Nutritional And Metabolic Diseases, And Immunity Disorders',
    (280, 289): 'Diseases Of The Blood And Blood-Forming Organs',
    (290, 319): 'Mental Disorders',
    (320, 389): 'Diseases Of The Nervous System And Sense Organs',
    (390, 459): 'Diseases Of The Circulatory System',
    (460, 519): 'Diseases Of The Respiratory System',
    (520, 579): 'Diseases Of The Digestive System',
    (580, 629): 'Diseases Of The Genitourinary System',
    (630, 679): 'Complications Of Pregnancy, Childbirth, And The Puerperium',
    (680, 709): 'Diseases Of The Skin And Subcutaneous Tissue',
    (710, 739): 'Diseases Of The Musculoskeletal System And Connective Tissue',
    (740, 759): 'Congenital Anomalies',
    (760, 779): 'Certain Conditions Originating In The Perinatal Period',
    (780, 799): 'Symptoms, Signs, And Ill-Defined Conditions',
    (800, 999): 'Injury And Poisoning',
}

In [6]:
# import pickle

# with open("category_mapping.pkl", "wb") as f:
#     pickle.dump(category_mapping, f)



# print(len(category_mapping))
# print(category_mapping)


# # Load the category_mapping dictionary from the saved file
# with open("category_mapping.pkl", "rb") as f:
#     loaded_category_mapping = pickle.load(f)

# # Now loaded_category_mapping contains the loaded dictionary
# print(loaded_category_mapping)

In [7]:
ad = pd.read_csv("admissions.csv")
diagnoses_icd = pd.read_csv("diagnoses_icd.csv")

df_matrix, binar = do_all(ad, diagnoses_icd, category_mapping, 5, 5)
    
df_matrix.head(10)

Unnamed: 0,subject_id,admission_matrix
0,10043050,"[[0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0,..."
1,10045318,"[[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
2,10054464,"[[0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,..."
3,10056223,"[[1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,..."
4,10073847,"[[0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,..."
5,10095888,"[[1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0,..."
6,10099032,"[[0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,..."
7,10108435,"[[0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0,..."
8,10113857,"[[0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0,..."
9,10118315,"[[0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0,..."


In [8]:
diseases = binar.classes_
print(diseases)

[1 140 240 280 290 320 390 460 520 580 630 680 710 740 760 780 800 'V' 'E']


In [9]:
from sklearn.model_selection import train_test_split

unique_patients = df_matrix['subject_id'].unique()
train_patients, test_patients = train_test_split(unique_patients, test_size=0.2, random_state=42)

# Create training and testing sets based on patient IDs
train_data = df_matrix[df_matrix['subject_id'].isin(train_patients)]
test_data = df_matrix[df_matrix['subject_id'].isin(test_patients)]
train_data = np.array(train_data['admission_matrix'].tolist())
test_data = np.array(test_data['admission_matrix'].tolist())

print(train_data.shape)
print(test_data.shape)

num_admissions = train_data.shape[1]
print(num_admissions)

(720, 5, 19)
(180, 5, 19)
5


In [10]:
train_data_pats = df_matrix[df_matrix['subject_id'].isin(train_patients)]
test_data_pats = df_matrix[df_matrix['subject_id'].isin(test_patients)]
test_data_pats

Unnamed: 0,subject_id,admission_matrix
5,10095888,"[[1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0,..."
23,10261471,"[[0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,..."
25,10295020,"[[1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0,..."
30,10342737,"[[1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0,..."
31,10357102,"[[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
...,...,...
885,19776354,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
891,19830515,"[[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,..."
894,19890786,"[[0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0,..."
895,19905254,"[[1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0,..."


In [11]:
def get_target(data, disease):
    train_admissions = data.shape[1] -1
    # print(train_admissions)
    a = data[:, :train_admissions, :] # first (num_admissions -1)
    b = []
    # print(data.shape[0])
    for i in range(data.shape[0]):  # number of patients
        # a.append(data[i][:num])  # first (num_admissions -1)
        b.append(data[i][-1][disease])   # final admission
        # print(disease)

    return np.array(a), np.array(b)

In [12]:
def train_models(iterations):
    losses = []
    accuracies = []
    models = []
    runtimes = []

    for i in range(0,iterations):

        name = "Disease_"+ str(i)
        # print(name)

        print(f"========================== Disease {i+1}(code {diseases[i]}) ==========================")

        X_train , y_train = get_target(train_data, i)
        X_test , y_test = get_target(test_data, i)

        # print(X_train.shape)
        # print(y_train.shape)
        # print(X_test.shape)
        # print(y_test.shape)

        X_train = X_train.astype('float32')
        y_train = y_train.astype('float32')
        X_test = X_test.astype('float32')
        y_test = y_test.astype('float32')

        model = compile_model(name, X_train)


        start_time = time.time()
        history = model.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=0.2)
        finish_time = time.time() - start_time

        runtimes.append(finish_time)

        models.append(model)

        loss, accuracy = model.evaluate(X_test, y_test)
        losses.append(loss)
        print("Test Loss:", loss)

        accuracies.append(accuracy)
        print("Test Accuracy:", accuracy)
        print()

        # # Plot Accuracy and Loss
        # plt.figure(figsize=(12, 4))

        # plt.subplot(1, 2, 1)
        # plt.plot(history.history['loss'], label='Training Loss')
        # plt.plot(history.history['val_loss'], label='Validation Loss')
        # plt.title(f'Disease {i} - Training and Validation Loss')
        # plt.xlabel('Epoch')
        # plt.ylabel('Loss')
        # plt.legend()

        # plt.subplot(1, 2, 2)
        # plt.plot(history.history['accuracy'], label='Training Accuracy')
        # plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
        # plt.title(f'Disease {i} - Training and Validation Accuracy')
        # plt.xlabel('Epoch')
        # plt.ylabel('Accuracy')
        # plt.legend()

        # plt.tight_layout()

        # # Save the individual disease plots
        # plot_filename = f'./plots/Disease_{i}_Training_Validation.png'
        # plt.savefig(plot_filename)
        # plt.close()

        model_filename = f'models/Disease_{diseases[i]}_model.keras'
        model.save(model_filename)


    print("Average loss:", np.average(loss))
    print("Average accuracy:", np.average(accuracies))
    print(f"Total Training time: {np.sum(runtimes)} and Average training time = {np.mean(runtimes)}")


    return models


iterations = train_data.shape[2]
print(iterations)

models = train_models(iterations)


19
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Loss: 0.25587478280067444
Test Accuracy: 0.9055555462837219

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Loss: 0.050877343863248825
Test Accuracy: 0.9888888597488403

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Loss: 0.07889313995838165
Test Accuracy: 0.9777777791023254

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Loss: 0.12308411300182343
Test Accuracy: 0.9611111283302307

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Loss: 0.15142692625522614
Test Accuracy: 0.949999988079071

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Loss: 0.15480975806713104
Test Accuracy: 0.949999988079071

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Loss: 0.10169635713100433
Test Accuracy: 0.9666666388511658

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Loss: 0.2484268844127655
Test Accuracy: 0.9166666865348816

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Loss: 0.12809666

In [13]:
X_test , y_test = get_target(test_data, 1)
print(test_data.shape)
ground_truth =  test_data[:, -1:, :]
print(X_test.shape)
print(ground_truth.shape)


(180, 5, 19)
(180, 4, 19)
(180, 1, 19)


In [14]:
# print(test_data.shape)
# print(X_test.shape)
import sys

def predict_diseases(models, data):

    if len(data.shape) == 2:
        print("Reshaping data into compatible format.")
        data = np.reshape(data, (1, data.shape[0], data.shape[1]))
        print(data.shape)
    
    all_predictions = [[] for _ in range(data.shape[0])]   
    # print(np.array(all_predictions).shape) 


    iterations = data.shape[-1]

    for i in range(iterations):
        # Use the first n-1 admissions to make prediction
        X_test , _ = get_target(data, i)
        X_test = X_test.astype('float32')
        # print(X_test.shape)

        model = models[i]

        # print(diseases[i])
        predictions = model.predict(X_test)
        rounded_predictions = np.round(predictions).astype(int)
        # print(rounded_predictions.shape)

        all_predictions = np.concatenate((all_predictions, rounded_predictions), axis=1).astype(int)

    return all_predictions


In [15]:
print(test_data.shape)

all_predictions = predict_diseases(models, test_data)

print(all_predictions.shape)

# print(all_predictions)
# for pred, true_label in zip(rounded_predictions, y_test):
#     print("Prediction: {pred}, True Label: {true_label}")

(180, 5, 19)
(180, 19)


In [16]:
all_predictions_reshaped = all_predictions.reshape(test_data.shape[0], 1, test_data.shape[2])

print(all_predictions_reshaped.shape)

print()
print("predicted diseases")
print()

predicted_labels = binar.inverse_transform(all_predictions)
for i in predicted_labels:
    print(i)

(180, 1, 19)

predicted diseases

(1, 240, 290, 320, 390, 710, 800, 'V', 'E')
(240, 290, 390, 580, 710, 780)
(1, 240, 280, 320, 390, 460, 520, 580, 680, 780, 800, 'V')
(1, 140, 240, 280, 290, 320, 460, 520, 680, 780, 'V', 'E')
(140, 280, 320, 390, 580, 710, 780, 'V', 'E')
(290, 520, 710, 'V')
(280, 390, 520, 680, 710, 'V', 'E')
(1, 140, 240, 280, 390, 460, 520, 710, 780, 'V', 'E')
(1, 290, 320, 520, 780, 800, 'V', 'E')
(240, 280, 320, 390, 520, 580, 680, 780, 800, 'V')
(1, 140, 240, 280, 320, 390, 520, 580, 680, 800, 'V', 'E')
(1, 240, 280, 290, 780, 800, 'V', 'E')
(1, 140, 240, 280, 390, 520, 580, 780, 'V', 'E')
(240, 290, 320, 390, 710, 780, 'V', 'E')
(1, 140, 240, 280, 390, 460, 520, 580, 710, 780, 800, 'V', 'E')
(240, 280, 290, 320, 390, 520, 580, 780, 'V')
(1, 140, 240, 280, 290, 320, 580, 780, 'V', 'E')
(240, 390, 460, 520, 580, 740, 780, 'V')
(1, 140, 240, 280, 290, 320, 390, 460, 520, 580, 680, 780, 800, 'V', 'E')
(140, 240, 280, 290, 520, 680, 710, 780, 'V', 'E')
(1, 140, 240,

In [17]:
predicted_str = []
for i in predicted_labels:
    # Convert each element to a string
    str_tuple = [str(item) for item in i]
    # Join the elements together into a single string
    result = ' '.join(str_tuple)
    predicted_str.append(result)


test_data_pats["predicted"] = all_predictions.tolist()
print(test_data_pats.shape)
print(all_predictions.shape)
test_data_pats.to_csv('test_data_pats.csv', index=False)
test_data_pats["labels"] = predicted_str

# Selecting only the 'subject_id' and 'predicted' columns
test_data_pats_save = test_data_pats[['subject_id', 'labels']]
test_data_pats_save.to_csv('test_data_pats_save.csv', index=False)

test_data_pats

(180, 3)
(180, 19)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data_pats["predicted"] = all_predictions.tolist()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data_pats["labels"] = predicted_str


Unnamed: 0,subject_id,admission_matrix,predicted,labels
5,10095888,"[[1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0,...","[1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, ...",1 240 290 320 390 710 800 V E
23,10261471,"[[0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,...","[0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, ...",240 290 390 580 710 780
25,10295020,"[[1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0,...","[1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, ...",1 240 280 320 390 460 520 580 680 780 800 V
30,10342737,"[[1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0,...","[1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, ...",1 140 240 280 290 320 460 520 680 780 V E
31,10357102,"[[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, ...",140 280 320 390 580 710 780 V E
...,...,...,...,...
885,19776354,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, ...",1 140 240 280 290 390 520 580 780 V
891,19830515,"[[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,...","[1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, ...",1 240 280 290 390 520 580 680 780 V E
894,19890786,"[[0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0,...","[0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, ...",140 240 280 290 320 390 460 520 680 710 780 V E
895,19905254,"[[1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0,...","[1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, ...",1 240 280 290 390 520 580 710 780 V


In [18]:
def calculate_accuracies(y_true, y_pred):
    # Check if the shapes of y_true and y_pred match
    if y_true.shape != y_pred.shape:
        print(f"Shape of y_true is {y_true.shape} and shape of y_pred is {y_pred.shape}")
        raise ValueError("Shapes of y_true and y_pred must match.")

    # Calculate accuracy for each prediction
    accuracies = []
    for i in range(y_true.shape[0]):
        correct_predictions = np.sum(y_true[i] == y_pred[i])
        total_predictions = y_true[i].size
        accuracy = correct_predictions / total_predictions
        accuracies.append(accuracy)


    return accuracies


ground_truth =  test_data[:, -1:, :]

accuracies = calculate_accuracies(ground_truth, all_predictions_reshaped)
print(f"Average accuracy: {np.mean(accuracies):.4f}")
print(f"Best accuracy: {max(accuracies):.4f} for patient no. {np.argmax(accuracies)+1}")
print(f"Worst accuracy: {min(accuracies):.4f} for patient no. {np.argmin(accuracies)+1}")

Average accuracy: 0.9608
Best accuracy: 1.0000 for patient no. 2
Worst accuracy: 0.7368 for patient no. 74


In [19]:
def calculate_metrics(y_true, y_pred):
    # Calculate True Positives, False Positives, True Negatives, False Negatives
    TP = np.sum(np.logical_and(y_true == 1, y_pred == 1))
    FP = np.sum(np.logical_and(y_true == 0, y_pred == 1))
    TN = np.sum(np.logical_and(y_true == 0, y_pred == 0))
    FN = np.sum(np.logical_and(y_true == 1, y_pred == 0))

    # Calculate Precision
    precision = TP / (TP + FP) if (TP + FP) > 0 else 0

    # Calculate Recall
    recall = TP / (TP + FN) if (TP + FN) > 0 else 0

    # Calculate F1-score
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

    # Calculate Accuracy
    accuracy = (TP + TN) / (TP + FP + TN + FN)

    # Calculate R2 score
    mean_true = np.mean(y_true)
    r2 = 1 - (np.sum((y_true - y_pred) ** 2) / np.sum((y_true - mean_true) ** 2))

    return precision, recall, f1, accuracy, r2


ground_truth =  test_data[:, -1:, :]

precision, recall, f1, accuracy, r2 = calculate_metrics(ground_truth, all_predictions_reshaped)
print("Average metrics")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")
print(f"Accuracy: {accuracy:.4f}")
print(f"R2 score: {r2:.4f}")


Average metrics
Precision: 0.9948
Recall: 0.9329
F1-score: 0.9629
Accuracy: 0.9608
R2 score: 0.8420


In [20]:
def calculate_metrics_per_patient(y_true, y_pred):
    metrics_per_patient = []
    for i in range(y_true.shape[0]):
        # Calculate True Positives, False Positives, True Negatives, False Negatives
        TP = np.sum(np.logical_and(y_true[i] == 1, y_pred[i] == 1))
        FP = np.sum(np.logical_and(y_true[i] == 0, y_pred[i] == 1))
        TN = np.sum(np.logical_and(y_true[i] == 0, y_pred[i] == 0))
        FN = np.sum(np.logical_and(y_true[i] == 1, y_pred[i] == 0))

        # Calculate Precision
        precision = TP / (TP + FP) if (TP + FP) > 0 else 0

        # Calculate Recall
        recall = TP / (TP + FN) if (TP + FN) > 0 else 0

        # Calculate F1-score
        f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

        # Calculate Accuracy
        accuracy = (TP + TN) / (TP + FP + TN + FN)

        # Calculate R2 score
        mean_true = np.mean(y_true[i])
        r2 = 1 - (np.sum((y_true[i] - y_pred[i]) ** 2) / np.sum((y_true[i] - mean_true) ** 2))

        metrics_per_patient.append((precision, recall, f1, accuracy, r2, y_true[i], y_pred[i]))

    return metrics_per_patient

ground_truth =  test_data[:, -1:, :]
metrics_per_patient = calculate_metrics_per_patient(ground_truth, all_predictions_reshaped)

# Find the best and worst metrics
best_idx = np.argmax([metric[2] for metric in metrics_per_patient])  # F1-score index
worst_idx = np.argmin([metric[2] for metric in metrics_per_patient])  # F1-score index

best_patient_metrics = metrics_per_patient[best_idx]
worst_patient_metrics = metrics_per_patient[worst_idx]

best_patient_num = best_idx + 1
worst_patient_num = worst_idx + 1

print("Best Patient Metrics:")
print(f"Patient No.: {best_patient_num}")
print(f"Precision: {best_patient_metrics[0]:.4f}")
print(f"Recall: {best_patient_metrics[1]:.4f}")
print(f"F1-score: {best_patient_metrics[2]:.4f}")
print(f"Accuracy: {best_patient_metrics[3]:.4f}")
print(f"R2 score: {best_patient_metrics[4]:.4f}")
print("truth:", best_patient_metrics[5])  # Print ground truth values
print("preds:", best_patient_metrics[6])  # Print predicted values

print("\nWorst Patient Metrics:")
print(f"Patient No.: {worst_patient_num}")
print(f"Precision: {worst_patient_metrics[0]:.4f}")
print(f"Recall: {worst_patient_metrics[1]:.4f}")
print(f"F1-score: {worst_patient_metrics[2]:.4f}")
print(f"Accuracy: {worst_patient_metrics[3]:.4f}")
print(f"R2 score: {worst_patient_metrics[4]:.4f}")
print("truth:", worst_patient_metrics[5])  # Print ground truth values
print("preds:", worst_patient_metrics[6])  # Print predicted values


Best Patient Metrics:
Patient No.: 2
Precision: 1.0000
Recall: 1.0000
F1-score: 1.0000
Accuracy: 1.0000
R2 score: 1.0000
truth: [[0 0 1 0 1 0 1 0 0 1 0 0 1 0 0 1 0 0 0]]
preds: [[0 0 1 0 1 0 1 0 0 1 0 0 1 0 0 1 0 0 0]]

Worst Patient Metrics:
Patient No.: 127
Precision: 0.8889
Recall: 0.6667
F1-score: 0.7619
Accuracy: 0.7368
R2 score: -0.1310
truth: [[1 0 1 1 0 1 1 1 0 1 0 0 1 0 0 1 1 1 1]]
preds: [[0 0 1 1 0 1 1 1 1 1 0 0 0 0 0 1 0 1 0]]


In [21]:
print(ground_truth[7])
print(all_predictions_reshaped[7])

[[1 1 1 1 0 0 1 1 1 0 0 0 1 0 0 1 0 1 1]]
[[1 1 1 1 0 0 1 1 1 0 0 0 1 0 0 1 0 1 1]]


In [22]:
single = predict_diseases(models, test_data[7])

Reshaping data into compatible format.
(1, 5, 19)


In [23]:
# print(single)
print("predicted diseases")
for i in binar.inverse_transform(single):
    print(i,"\t")

gr_th = ground_truth[7]
print("ground truth")
for i in binar.inverse_transform(gr_th):
    print(i,"\t")


predicted diseases
(1, 140, 240, 280, 390, 460, 520, 710, 780, 'V', 'E') 	
ground truth
(1, 140, 240, 280, 390, 460, 520, 710, 780, 'V', 'E') 	
