In [2]:
import pandas as pd
import numpy as np
import pickle
from matplotlib import pyplot as plt
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE
pd.options.display.max_seq_items = 2000

In [3]:
## setting up drive to import the dataset
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
f = open('/content/drive/My Drive/dataset_OSAS.pickle', 'rb')
data = pickle.load(f)
f.close()

961357


In [12]:
  ## helper functions for the logistic sigmoid function
  def logistic(x):
    return 1.0/(1+np.exp(-x))

  def logistic_deriv(x):
    return logistic(x) * (1-logistic(x))

  ## initializing the learning rate, input and hidden layer dimensionality and the epochs 
  LR = 1

  I_dim = 4
  H_dim = 4 

  epoch_count = 20

  ## starting with random weight initilization
  weights_ItoH = np.random.uniform(-1, 1, (I_dim,H_dim))
  weights_HtoO = np.random.uniform(-1,1,H_dim)

  preActivation_H = np.zeros(H_dim)
  postActivation_H = np.zeros(H_dim)


In [13]:
  #helping function to check the number of true values

  def count_true(list):
    count = 0
    for i in list:
      if i == 1:
        count+=1
    return count
    


In [None]:
  ## This cell sets up our training and testing data 
  patient_ids = np.unique(data['patient'])
  patient_count = len(patient_ids)

  training_count = int(patient_count * .8)
  testing_count = int(patient_count * .2)

  assert( (training_count + testing_count) == patient_count)

  training_patients = patient_ids[:training_count]
  testing_patients = patient_ids[training_count:]

  training_data_set = np.array([])

  X = []
  y = []
  cnt = 0

  #preparing training data 
  for pid in training_patients:
    patient = data[data['patient'] == pid]
    
    ## the features we are using to train the model
    rr = patient['RR(rpm)'].to_numpy()
    hr = patient['HR(bpm)'].to_numpy()
    spO2 = patient['SpO2(%)'].to_numpy()
    pvcs = patient['PVCs(/min)'].to_numpy()

    labels = np.array( list( map( lambda x: 1 if x.__contains__('APNEA') else 0, patient['event'].to_list())))
 
    for i in range(len(rr)):
        if (np.isnan(rr[i]) or np.isnan(hr[i]) or np.isnan(spO2[i]) or np.isnan(pvcs[i])):
            continue
        X.append([rr[i], hr[i], spO2[i], pvcs[i]])
        y.append(labels[i])
        
    
    training_data = np.asarray(X)
    training_count = len(training_data)

  ## standardizing and handling the imbalance of our raw data 
  X = StandardScaler().fit_transform(X)
  oversample = SMOTE()
  X, y = oversample.fit_resample(X, y)
   
  #preparing testing data 
  X_t = []
  y_t = []
  for pid in testing_patients:
    patient = data[data['patient'] == pid]
    
    rr = patient['RR(rpm)'].to_numpy()
    hr = patient['HR(bpm)'].to_numpy()
    spO2 = patient['SpO2(%)'].to_numpy()
    pvcs = patient['PVCs(/min)'].to_numpy()

    labels = np.array( list( map( lambda x: 1 if x.__contains__('APNEA') else 0, patient['event'].to_list())))
    # print(len(labels))
    # print(patient.head())
    # print(np.unique(patient['event']))
    for i in range(len(rr)):
        if (np.isnan(rr[i]) or np.isnan(hr[i]) or np.isnan(spO2[i]) or np.isnan(pvcs[i])):
            continue
        X_t.append([rr[i], hr[i], spO2[i], pvcs[i]])
        y_t.append(labels[i])
        
  X_t = StandardScaler().fit_transform(X_t)  
  testing_data = np.asarray(X_t)
  testing_count = len(testing_data)



In [16]:
  ## This is the feed forward step in the MLP
  ## The model is trained based on the epoch number set and within each epoch, an output value is calculated for each sample

  for epoch in range(epoch_count):
    for sample in range(training_count):
      for node in range(H_dim):
            preActivation_H[node] = np.dot(training_data[sample,:], weights_ItoH[:, node])
            postActivation_H[node] = logistic(preActivation_H[node])
      
      preActivation_O = np.dot(postActivation_H, weights_HtoO)
      postActivation_O = logistic(preActivation_O)

      FE = postActivation_O - y[sample]

      ## Backpropagation phase where the output layer feeds input back to the hidden layer and towards the input layer
      for H_node in range(H_dim):
        S_error = FE * logistic_deriv(preActivation_O)
        gradient_HtoO = S_error * postActivation_H[H_node]
                       
        for I_node in range(I_dim):
            input_value = training_data[sample, I_node]
            gradient_ItoH = S_error * weights_HtoO[H_node] * logistic_deriv(preActivation_H[H_node]) * input_value
            
            weights_ItoH[I_node, H_node] -= LR * gradient_ItoH
            
        weights_HtoO[H_node] -= LR * gradient_HtoO


In [None]:
## Testing phase that takes note of evaluation counts such as true positives, false positives etc 
correct_classification_count = 0
Y_pred = []
tp = 0
fp = 0
tf = 0
ff = 0

for sample in range(testing_count):
    for node in range(H_dim):
        preActivation_H[node] = np.dot(testing_data[sample,:], weights_ItoH[:, node])
        postActivation_H[node] = logistic(preActivation_H[node])
            
    preActivation_O = np.dot(postActivation_H, weights_HtoO)
    postActivation_O = logistic(preActivation_O)
        
    if postActivation_O > 0.5:
        output = 1
    else:
        output = 0     
        
    if output == y_t[sample]:
      correct_classification_count += 1
      if y_t[sample]:
        tp +=1
      else:
        tf +=1
    else:
      if y_t[sample]:
        ff +=1
      else:
        fp +=1
    Y_pred.append(output)
    


print('number of correct classifications', correct_classification_count)
print('Percentage of correct classifications:')
print(correct_classification_count*100/testing_count)

In [None]:
## Calculating the performance metrics to check the performance of the MLP

accuracy = (tf+tp) / (tf+tp+fp+ff)
precision = tp / (tp+fp)
recall = tp / (tp+ff)
f1_score = 2 * (precision*recall) / (precision+recall)

print("accuracy:", accuracy)
print("precision:", precision)
print("recall:", recall)
print("f1_score:", f1_score)

print(tp,tf,fp,ff)

