# set up

In [1]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "decision_trees"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

# import dataset

## read data from excel

In [2]:
import pandas as pd
data = pd.read_excel(r'C:\Users\ASUS\Desktop\machine learning\MLfinalReport\AHIdata.xlsx')

## extract y from excel

In [3]:
df = pd.DataFrame(data, columns=['rescored AHI'])
print(type(df))#datatype is pd.dataframe not numpy array
df_2D = df.to_numpy()#convert into numpy array
#print(df_2D)#now it is numpy 2d array

<class 'pandas.core.frame.DataFrame'>


In [4]:
y_stack = df_2D.flatten()#flatten into 1d array
print(y_stack)

[ 15.2  12.   27.8  71.2  34.3  86.9  17.5  78.3  40.5  21.2  94.7  40.2
  41.1  11.5  61.4 108.8  59.6  24.1   9.5  33.2  11.9  76.7  29.7  23.7
  38.   79.9  13.8  61.9   8.8   8.5  37.1  38.7  33.9  46.4  34.5  25.8
  82.3  13.4  85.4  42.9  24.3  37.8  16.9  47.6  50.   21.1   1.6   2.1
  12.3  66.3  27.9  17.    2.   11.   53.   20.8  50.9  15.6  66.2  56.1
  49.8  10.2  77.7   7.2   3.   47.3  78.9  15.6   9.   47.2  12.    7.6
  42.3  26.3   7.5  31.5   1.9  15.8  11.6   8.5  20.5  52.4  20.2  16.6
  19.8  37.9  60.2  36.1  24.5  24.6  22.4  30.   16.9   7.7   8.8   7.1
  39.1   4.6   1.8  97.3  60.5   6.9  28.3  41.   75.   13.7  89.2  76.5
  85.   77.   51.2   3.3  78.1   7.9  26.9  80.5  30.9  66.9   2.3]


## encoding y_stack

In [5]:
for count in range (0, 119):
    if y_stack[count]>=30:#AHI>=30, class:3
        y_stack[count]=3
    elif y_stack[count]>=15:#30>AHI>=15, class:2
        y_stack[count]=2
    elif y_stack[count]>=5:#15>AHI>=5, class: 1
        y_stack[count]=1
    else:
        y_stack[count]=0#5>AHI, class:0

## extract inputs from excel

In [6]:
#BMI
x1 = pd.DataFrame(data, columns=['BMI'])
x1_2D = x1.to_numpy()#convert into numpy array
x_bmi = x1_2D.flatten()#flatten into 1d array
#Age
x2 = pd.DataFrame(data, columns=['Age'])
x2_2D = x2.to_numpy()#convert into numpy array
x_age = x2_2D.flatten()#flatten into 1d array
#neck
x3 = pd.DataFrame(data, columns=['neck'])
x3_2D = x3.to_numpy()#convert into numpy array
x_neck = x3_2D.flatten()#flatten into 1d array
#desaturatino index
x4 = pd.DataFrame(data, columns=['desaturation index'])
x4_2D = x4.to_numpy()#convert into numpy array
x_dsi = x4_2D.flatten()#flatten into 1d array
#CVHR-OR_CEI
x5 = pd.DataFrame(data, columns=['Aligned CVHR-OR-CEI'])
x5_2D = x5.to_numpy()#convert into numpy array
x_cvhrorcei = x5_2D.flatten()#flatten into 1d array
#CVHRI
x6 = pd.DataFrame(data, columns=['CVHRI'])
x6_2D = x6.to_numpy()#convert into numpy array
x_cvhri = x6_2D.flatten()#flatten into 1d array
#CEI
x7 = pd.DataFrame(data, columns=['CEI'])
x7_2D = x7.to_numpy()#convert into numpy array
x_cei = x7_2D.flatten()#flatten into 1d array

# Data preprocessing

## decide your inputs

In [7]:
X_stack = np.stack((x_bmi, x_age, x_neck, x_dsi, x_cvhrorcei, ), axis=-1)

In [8]:
print(X_stack)

[[23.2 32.  37.   0.2 24.9]
 [23.9 50.  35.   5.8 16.9]
 [28.  63.  42.  15.8 11.5]
 [30.4 43.  40.  56.1 53.7]
 [26.4 27.  37.   6.7 13.5]
 [39.  29.  43.  60.  25.2]
 [23.6 57.  32.  10.9  6.4]
 [30.7 45.  42.5 81.2 75.4]
 [27.1 46.  41.  26.1 33.7]
 [25.7 32.  36.5  3.7  8.2]
 [33.2 38.  42.  92.3 79.4]
 [24.4 49.  37.5 23.9 23.1]
 [23.  49.  35.  12.5 27.4]
 [23.3 35.  39.   0.2 11. ]
 [28.4 62.  42.  58.3 41.3]
 [33.6 27.  40.  95.7 88.5]
 [38.2 51.  46.  56.9 59. ]
 [27.8 21.  36.   1.8 22.8]
 [22.9 40.  38.   0.4  2.4]
 [34.  42.  39.  22.1 10.6]
 [30.4 48.  40.   3.5 10. ]
 [33.6 37.  45.  69.8 86.7]
 [20.2 44.  35.   1.6 22. ]
 [27.2 29.  42.  10.7 20.6]
 [38.1 39.  41.  27.3 31.3]
 [29.4 37.  39.  68.  75.7]
 [29.  66.  39.   4.9 20.6]
 [27.7 56.  34.  22.2 42.7]
 [24.7 34.  36.   0.3  4.2]
 [21.8 27.  35.   0.  14.7]
 [22.9 31.  38.  26.  22.5]
 [25.6 44.  39.  22.3 39.3]
 [28.1 38.  40.  15.  13. ]
 [36.4 65.  42.  16.3 14.5]
 [22.8 67.  35.   7.4 34.3]
 [26.3 41.  33.   4.

## train test split

In [9]:
from sklearn.model_selection import train_test_split 
X_train, X_test, y_train, y_test = train_test_split(X_stack, y_stack, test_size=0.2, shuffle=False)

# Training MLP model

## create a MLP model

In [10]:
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense
import pandas as pd
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(3)

In [11]:
# model=tf.keras.models.Sequential()
# model.add(tf.keras.layers.Dense(units=len(X_stack.T), activation=tf.nn.relu, input_dim=5, kernel_initializer='normal'))
# model.add(tf.keras.layers.Dense(units=3, activation=tf.nn.softmax, kernel_initializer='normal'))
# model.add(tf.keras.layers.Dense(units=3, activation=tf.nn.softmax, kernel_initializer='normal'))
# model.add(tf.keras.layers.Dense(units=3, activation=tf.nn.softmax, kernel_initializer='normal'))
# model.add(tf.keras.layers.Dense(units=1, activation=tf.nn.softmax, kernel_initializer='normal'))
# model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
# model.summary()

In [20]:
model=tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(units=len(X_stack.T),
                               activation=tf.nn.relu,
                               input_dim=5))
model.add(tf.keras.layers.Dense(units=3,
                               activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(units=1,
                               activation=tf.nn.softmax))

# model.compile(optimizer='adam',
#              loss=tf.keras.losses.categorical_crossentropy,
#              metrics=['accuracy'])
learning_rate=0.01
opt1=tf.keras.optimizers.RMSprop(lr=learning_rate)
model.compile(
    optimizer=opt1,
    loss=tf.keras.losses.categorical_crossentropy,
    metrics=['accuracy'])
history=model.fit(X_train, y_train,
         epochs=20,
         batch_size=10)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
train_history=model.fit(x=X_train, y=y_train, validation_split=0.15,batch_size=10, epochs=20, verbose=2)

# testing data evaluation

In [None]:
score=model.evaluate(X_test, y_test, batch_size=12)
print("score=", score)

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
y_pred = model.predict(y_test)
confusion_matrix(y_test, y_pred)

In [None]:
import seaborn as sns
# Get and reshape confusion matrix data
matrix = confusion_matrix(y_test, y_pred)
matrix = matrix.astype('float') / matrix.sum(axis=1)[:, np.newaxis]

# Build the plot
plt.figure(figsize=(16,7))
sns.set(font_scale=1.4)
sns.heatmap(matrix, annot=True, annot_kws={'size':10},
            cmap=plt.cm.Greens, linewidths=0.2)

# Add labels to the plot
class_names = ['No apnea', 'low AHI', 'medium AHI', 'high AHI']
tick_marks = np.arange(len(class_names))
tick_marks2 = tick_marks + 0.5
plt.xticks(tick_marks, class_names, rotation=0)
plt.yticks(tick_marks2, class_names, rotation=0)
plt.xlabel('Predicted label')
plt.ylabel('True label')
plt.title('Confusion Matrix for Random Forest Model')
plt.show()

## plot accuracy and loss curve

In [None]:
import matplotlib.pyplot as plt
plt.plot(history.history['accuracy'])
plt.plot(history.history['loss'])
plt.title('model accuracy')
plt.ylabel('acc&loss')
plt.xlabel('epoch')
plt.legend(['acc','loss'],loc='upper right')
plt.show()

## plot training and testing history

In [None]:
def show_train_history(train_history, train, validation):
    plt.plot(train_history.history[train])
    plt.plot(train_history.history[validation])
    plt.title('Train History')
    plt.ylabel(train)
    plt.xlabel('epoch')
    plt.legend(['train','validation'], loc='upper left')
    plt.show()

In [None]:
show_train_history(train_history, 'accuracy', 'val_accuracy')

In [None]:
show_train_history(train_history, 'loss', 'val_loss')