In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import matthews_corrcoef, confusion_matrix
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
import tensorflow as tf
from tensorflow import keras
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense

In [None]:
path = './ctg_data_full.xls'
# read the ctg data
df = pd.read_excel(path, sheet_name='Data', usecols='K:AE', skiprows=0, header=1, nrows=2126)
df.dropna(inplace=True) # removing missing values
X = df.to_numpy()
print(df.head(5))

# read the results column. 
# For the 3-class NSP column use 'AT', for the 10-class FHR use 'AR'.
# Change the number of output nodes in the model accordingly.
y_df = pd.read_excel(path, sheet_name='Data', usecols='AR', skiprows=0, header=1, nrows=2126)
y_df.dropna(inplace=True) # removing missing values
y = y_df.to_numpy()
print(y_df.head(5))


### Scale the dataset

# Standardizing
scaler = StandardScaler()
X = scaler.fit_transform(X)

# # Normalizing, though better results are achieved with standardizing
# scaler = MinMaxScaler()
# X = scaler.fit_transform(X)

# create 0-indexed categories of the result classes
encoder = LabelEncoder()
y_enc = encoder.fit_transform(y)
y_bin = np_utils.to_categorical(y_enc)

In [None]:
# split data into training: 60%, validation: 10%  and testing: 30%
len_data = X.shape[0]
train_size = int(len_data * .6)
val_size = int(len_data * .1)
test_size = len_data - (train_size + val_size)

print('Training size:', train_size)
print('\nValidation size:', val_size)
print('\nTesting size:', test_size) 

# train data
x_train = X[:train_size,:] 
y_train = y_bin[:train_size,:]
y_train_true = y[:train_size]

# validation data
x_val = X[train_size:train_size + val_size,:]
y_val = y_bin[train_size:train_size + val_size,:]
y_val_true = y[train_size:train_size + val_size]

# test data
x_test = X[train_size + val_size:,:]
y_test = y_bin[train_size + val_size:,:]
y_test_true = y[train_size + val_size:]

In [None]:
# define the model
model = Sequential()

# add layers: input, 2 hidden and an output
model.add(Dense(units=50, input_dim=21, activation='relu'))
model.add(Dense(units=30, activation='relu'))
model.add(Dense(units=30, activation='relu'))
model.add(Dense(units=10, activation='softmax'))
print(model.summary())

# compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics='accuracy')

# train the model
history = model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=30, batch_size=1, verbose=1)

In [None]:
### Accuracy calculation

# evaluate the training 
score = model.evaluate(x_train, y_train, batch_size=1, verbose=0)
print(f'Train { model.metrics_names[1] }: { round(score[1]*100, 2) }%')

# train, validation and test predictions accuracy and reverting the catigorization with np.argmax
y_train_pred = model.predict(x_train, batch_size=1)
y_train_pred = np.argmax(y_train_pred, axis=1)
temp = 100*sum(y_train_true == y_train_pred)/y_train.shape[0]
train_acc = float(round(sum(temp)/temp.shape[0], 2))
print(f'\nTrain Accuracy by model.predict: { train_acc }%')

y_val_pred = model.predict(x_val, batch_size=1)
y_val_pred = np.argmax(y_val_pred, axis=1)
temp = 100*sum(y_val_true == y_val_pred)/y_val.shape[0]
val_acc = float(round(sum(temp)/temp.shape[0], 2))
print(f'\nValidation Accuracy by model.predict: { val_acc }%')

y_test_pred = model.predict(x_test, batch_size=1)
y_test_pred = np.argmax(y_test_pred, axis=1)
temp = 100*sum(y_test_true == y_test_pred)/y_test.shape[0]
test_acc = float(round(sum(temp)/temp.shape[0], 2))
print(f'\nTest Accuracy by model.predict: { test_acc }%')

In [None]:
### Metrics

# Matthews correlation coefficient
print('Train MCC:', matthews_corrcoef(y_train_true, y_train_pred))
print('\nVal MCC:', matthews_corrcoef(y_val_true, y_val_pred))
print('\nTest MCC:', matthews_corrcoef(y_test_true, y_test_pred))

# confusion matrix
print('\n====================TRAIN====================')
print(confusion_matrix(y_train_true, y_train_pred))
print('\n====================VAL====================')
print(confusion_matrix(y_val_true, y_val_pred))
print('\n====================TEST====================')
print(confusion_matrix(y_test_true, y_test_pred))

In [None]:
### history loss function

# Plot training and validation loss
plt.figure(figsize=(20, 10))
plt.subplot(2, 1, 1)
plt.title('Model loss')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.plot(history.history['loss'], c='r')
plt.plot(history.history['val_loss'], c='g')
plt.legend(['train', 'validation'], loc='upper right')
plt.subplot(2, 1, 2)
plt.title('Model accuracy')
plt.xlabel('epochs')
plt.ylabel('acccuracy')
plt.plot(history.history['accuracy'], c='r')
plt.plot(history.history['val_accuracy'], c='g')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()