# RNN results

# Without Cross validation and Without considering missing values

In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Load the dataset
data_path = r"C:\Users\Jay\OneDrive - University of Florida\Educational\Research\MLFHRT\FHR-dataset-CTUUHB\combined_FHR_data_resampled_with_minutes 0-1.csv"
df = pd.read_csv(data_path)

# Convert labels to binary (1 or 2 to 0 or 1)
df['label'] = df['label'].apply(lambda x: 0 if x == 1 else 1)

# Extract features (FHR time series) and labels
X = df.iloc[:, 1:-1].values
y = df['label'].values

# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize the features (mean=0, std=1)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Reshape data for RNN input (samples, time steps, features)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

# Define the RNN model
model = tf.keras.Sequential([
    tf.keras.layers.LSTM(64, return_sequences=True, input_shape=(X_train.shape[1], 1)),
    tf.keras.layers.LSTM(64),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate the model on training data
train_accuracy = model.evaluate(X_train, y_train, verbose=0)[1]

# Evaluate the model on test data
test_accuracy = model.evaluate(X_test, y_test, verbose=0)[1]

print(f"Accuracy on training data: {train_accuracy * 100:.2f}%")
print(f"Accuracy on test data: {test_accuracy * 100:.2f}%")


Epoch 1/10


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 167ms/step - accuracy: 0.9470 - loss: nan - val_accuracy: 0.9359 - val_loss: nan
Epoch 2/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 91ms/step - accuracy: 0.9136 - loss: nan - val_accuracy: 0.9359 - val_loss: nan
Epoch 3/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 92ms/step - accuracy: 0.9334 - loss: nan - val_accuracy: 0.9359 - val_loss: nan
Epoch 4/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 89ms/step - accuracy: 0.9241 - loss: nan - val_accuracy: 0.9359 - val_loss: nan
Epoch 5/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 91ms/step - accuracy: 0.9077 - loss: nan - val_accuracy: 0.9359 - val_loss: nan
Epoch 6/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 91ms/step - accuracy: 0.9327 - loss: nan - val_accuracy: 0.9359 - val_loss: nan
Epoch 7/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 96ms/

In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix

# Load the dataset
data_path = r"C:\Users\Jay\OneDrive - University of Florida\Educational\Research\MLFHRT\FHR-dataset-CTUUHB\combined_FHR_data_resampled_with_minutes 0-1.csv"
df = pd.read_csv(data_path)

# Convert labels to binary (1 or 2 to 0 or 1)
df['label'] = df['label'].apply(lambda x: 0 if x == 1 else 1)

# Extract features (FHR time series) and labels
X = df.iloc[:, 1:-1].values
y = df['label'].values

# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize the features (mean=0, std=1)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Reshape data for RNN input (samples, time steps, features)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

# Define the RNN model
model = tf.keras.Sequential([
    tf.keras.layers.LSTM(64, return_sequences=True, input_shape=(X_train.shape[1], 1)),
    tf.keras.layers.LSTM(64),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate the model on training data
train_accuracy = model.evaluate(X_train, y_train, verbose=0)[1]

# Evaluate the model on test data
test_accuracy = model.evaluate(X_test, y_test, verbose=0)[1]

print(f"Accuracy on training data: {train_accuracy * 100:.2f}%")
print(f"Accuracy on test data: {test_accuracy * 100:.2f}%")

# Make predictions
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int)

# Compute confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)


Epoch 1/10


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 158ms/step - accuracy: 0.9125 - loss: nan - val_accuracy: 0.9359 - val_loss: nan
Epoch 2/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 88ms/step - accuracy: 0.9371 - loss: nan - val_accuracy: 0.9359 - val_loss: nan
Epoch 3/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 90ms/step - accuracy: 0.9160 - loss: nan - val_accuracy: 0.9359 - val_loss: nan
Epoch 4/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 109ms/step - accuracy: 0.9233 - loss: nan - val_accuracy: 0.9359 - val_loss: nan
Epoch 5/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 98ms/step - accuracy: 0.9273 - loss: nan - val_accuracy: 0.9359 - val_loss: nan
Epoch 6/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 90ms/step - accuracy: 0.9277 - loss: nan - val_accuracy: 0.9359 - val_loss: nan
Epoch 7/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 91ms

# + Cross validation - considering missing values

In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score

# Load the dataset
data_path = r"C:\Users\Jaber\OneDrive - University of Florida\Educational\Research\MLFHRT\FHR-dataset-CTUUHB\combined_FHR_data_resampled_with_minutes.csv"
df = pd.read_csv(data_path)

# Convert labels to binary (1 or 2 to 0 or 1)
df['label'] = df['label'].apply(lambda x: 0 if x == 1 else 1)

# Exclude the last column from zero value treatment
cols_to_impute = df.columns[:-1]
df[cols_to_impute] = df[cols_to_impute].replace(0, np.nan)

# Impute missing values using mean imputation
imputer = SimpleImputer(strategy='mean')
df[cols_to_impute] = imputer.fit_transform(df[cols_to_impute])

# Extract features (FHR time series) and labels
X = df.iloc[:, 1:-1].values
y = df['label'].values

# Standardize the features (mean=0, std=1)
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Reshape data for RNN input (samples, time steps, features)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

# Define the RNN model
def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(64, return_sequences=True, input_shape=(X_train.shape[1], 1)),
        tf.keras.layers.LSTM(64),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Create and train the model
model = create_model()
model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)

# Evaluate the model on test data
y_test_pred = (model.predict(X_test) > 0.5).astype(int)
test_accuracy = accuracy_score(y_test, y_test_pred)

# Evaluate the model on training data
y_train_pred = (model.predict(X_train) > 0.5).astype(int)
train_accuracy = accuracy_score(y_train, y_train_pred)

print(f"Accuracy on train data: {train_accuracy * 100:.2f}%")
print(f"Accuracy on test data: {test_accuracy * 100:.2f}%")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy on train data: 100.00%
Accuracy on test data: 100.00%


# The optimal code with Cross-validation (without considering missing values).

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Load the dataset
data_path = r"C:\Users\Jaber\OneDrive - University of Florida\Educational\Research\MLFHRT\FHR-dataset-CTUUHB\combined_FHR_data_resampled_with_minutes.csv"
df = pd.read_csv(data_path)

# Convert labels to binary (1 or 2 to 0 or 1)
df['label'] = df['label'].apply(lambda x: 0 if x == 1 else 1)

# Extract features (FHR time series) and labels
X = df.iloc[:, 1:-1].values
y = df['label'].values

# Standardize the features (mean=0, std=1)
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Define a function to create the RNN model
def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(64, return_sequences=True, input_shape=(X.shape[1], 1)),
        tf.keras.layers.LSTM(64),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Initialize cross-validation
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
accuracies = []

# Perform cross-validation
for train_index, test_index in kfold.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

    model = create_model()
    model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0)

    y_pred = (model.predict(X_test) > 0.5).astype(int)
    accuracy = accuracy_score(y_test, y_pred)
    accuracies.append(accuracy)

# Calculate and print the mean accuracy over all folds
mean_accuracy = np.mean(accuracies)
print(f"Mean Accuracy: {mean_accuracy * 100:.2f}%")


Mean Accuracy: 91.67%


In [2]:
df

Unnamed: 0,Patient,1,2,3,4,5,6,7,8,9,...,82,83,84,85,86,87,88,89,90,label
0,1001,145.118750,142.607292,136.781250,138.810417,138.488542,149.541667,151.360417,145.588542,136.220833,...,,,,,,,,,,0
1,1002,150.323958,138.341667,139.404167,148.260417,139.529167,161.725000,161.821875,151.493750,151.245833,...,,,,,,,,,,0
2,1003,143.712500,148.243750,146.097917,135.853125,141.971875,134.325000,135.838542,134.287500,134.361458,...,,,,,,,,,,0
3,1004,157.179167,156.259375,154.177083,143.554167,157.220833,131.486458,156.216667,155.942708,156.213542,...,,,,,,,,,,0
4,1005,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,,,,,,,,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
547,2042,47.537500,141.019792,142.528125,142.020833,143.883333,102.576042,138.321875,122.770833,134.607292,...,,,,,,,,,,1
548,2043,55.040625,119.585417,111.237500,126.494792,69.259375,122.856250,99.140625,126.675000,111.025000,...,0.0,0.0,0.0,0.0,,,,,,1
549,2044,146.455208,156.293750,123.094792,150.250000,157.241667,165.626042,165.131250,166.731250,163.304167,...,,,,,,,,,,1
550,2045,124.087500,116.191667,121.347917,112.247917,122.673958,123.755208,124.393750,126.363542,116.915625,...,,,,,,,,,,1


# Considering missing values with mean strategy (+cross validation)

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score

# Load the dataset
data_path = r"C:\Users\Jaber\OneDrive - University of Florida\Educational\Research\MLFHRT\FHR-dataset-CTUUHB\combined_FHR_data_resampled_with_minutes.csv"
df = pd.read_csv(data_path)

# Convert labels to binary (1 or 2 to 0 or 1)
df['label'] = df['label'].apply(lambda x: 0 if x == 1 else 1)

# Exclude the last column from zero value treatment
cols_to_impute = df.columns[:-1]
df[cols_to_impute] = df[cols_to_impute].replace(0, np.nan)

# Impute missing values using mean imputation
imputer = SimpleImputer(strategy='mean')
df[cols_to_impute] = imputer.fit_transform(df[cols_to_impute])

# Extract features (FHR time series) and labels
X = df.iloc[:, 1:-1].values
y = df['label'].values

# Standardize the features (mean=0, std=1)
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Define a function to create the RNN model
def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(64, return_sequences=True, input_shape=(X.shape[1], 1)),
        tf.keras.layers.LSTM(64),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Initialize cross-validation
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
accuracies = []

# Perform cross-validation
for train_index, test_index in kfold.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

    model = create_model()
    model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0)

    y_pred = (model.predict(X_test) > 0.5).astype(int)
    accuracy = accuracy_score(y_test, y_pred)
    accuracies.append(accuracy)

# Calculate and print the mean accuracy over all folds
mean_accuracy = np.mean(accuracies)
print(f"Mean Accuracy: {mean_accuracy * 100:.2f}%")


Mean Accuracy: 91.49%


# With CV + Missing values with mean strategy

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score

# Load the dataset
data_path = r"C:\Users\Jaber\OneDrive - University of Florida\Educational\Research\MLFHRT\FHR-dataset-CTUUHB\combined_FHR_data_resampled_with_minutes.csv"
df = pd.read_csv(data_path)

# Convert labels to binary (1 or 2 to 0 or 1)
df['label'] = df['label'].apply(lambda x: 0 if x == 1 else 1)

# Exclude the last column from zero value treatment
cols_to_impute = df.columns[:-1]
df[cols_to_impute] = df[cols_to_impute].replace(0, np.nan)

# Impute missing values using median imputation (positive values)
imputer = SimpleImputer(strategy='median')
df[cols_to_impute] = imputer.fit_transform(df[cols_to_impute])

# Extract features (FHR time series) and labels
X = df.iloc[:, 1:-1].values
y = df['label'].values

# Standardize the features (mean=0, std=1)
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Define a function to create the RNN model
def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(64, return_sequences=True, input_shape=(X.shape[1], 1)),
        tf.keras.layers.LSTM(64),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Initialize cross-validation
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
accuracies = []

# Perform cross-validation
for train_index, test_index in kfold.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

    model = create_model()
    model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0)

    y_pred = (model.predict(X_test) > 0.5).astype(int)
    accuracy = accuracy_score(y_test, y_pred)
    accuracies.append(accuracy)

# Calculate and print the mean accuracy over all folds
mean_accuracy = np.mean(accuracies)
print(f"Mean Accuracy: {mean_accuracy * 100:.2f}%")


Mean Accuracy: 92.03%


In [3]:
df

Unnamed: 0,Patient,1,2,3,4,5,6,7,8,9,...,82,83,84,85,86,87,88,89,90,label
0,1001.0,145.118750,142.607292,136.781250,138.810417,138.488542,149.541667,151.360417,145.588542,136.220833,...,90.268229,90.580208,85.51875,75.407292,78.180208,72.880208,75.021875,62.307292,62.077083,0
1,1002.0,150.323958,138.341667,139.404167,148.260417,139.529167,161.725000,161.821875,151.493750,151.245833,...,90.268229,90.580208,85.51875,75.407292,78.180208,72.880208,75.021875,62.307292,62.077083,0
2,1003.0,143.712500,148.243750,146.097917,135.853125,141.971875,134.325000,135.838542,134.287500,134.361458,...,90.268229,90.580208,85.51875,75.407292,78.180208,72.880208,75.021875,62.307292,62.077083,0
3,1004.0,157.179167,156.259375,154.177083,143.554167,157.220833,131.486458,156.216667,155.942708,156.213542,...,90.268229,90.580208,85.51875,75.407292,78.180208,72.880208,75.021875,62.307292,62.077083,0
4,1005.0,134.291146,134.615625,135.044792,135.094792,133.101042,134.262500,133.883854,134.187500,134.628646,...,90.268229,90.580208,85.51875,75.407292,78.180208,72.880208,75.021875,62.307292,62.077083,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
547,2042.0,47.537500,141.019792,142.528125,142.020833,143.883333,102.576042,138.321875,122.770833,134.607292,...,90.268229,90.580208,85.51875,75.407292,78.180208,72.880208,75.021875,62.307292,62.077083,1
548,2043.0,55.040625,119.585417,111.237500,126.494792,69.259375,122.856250,99.140625,126.675000,111.025000,...,90.268229,90.580208,85.51875,75.407292,78.180208,72.880208,75.021875,62.307292,62.077083,1
549,2044.0,146.455208,156.293750,123.094792,150.250000,157.241667,165.626042,165.131250,166.731250,163.304167,...,90.268229,90.580208,85.51875,75.407292,78.180208,72.880208,75.021875,62.307292,62.077083,1
550,2045.0,124.087500,116.191667,121.347917,112.247917,122.673958,123.755208,124.393750,126.363542,116.915625,...,90.268229,90.580208,85.51875,75.407292,78.180208,72.880208,75.021875,62.307292,62.077083,1


# Considering missing values as meadian strategy ( without cross validation)

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Load the dataset
data_path = r"C:\Users\Jaber\OneDrive - University of Florida\Educational\Research\MLFHRT\FHR-dataset-CTUUHB\combined_FHR_data_resampled_with_minutes.csv"
df = pd.read_csv(data_path)

# Convert labels to binary (1 or 2 to 0 or 1)
df['label'] = df['label'].apply(lambda x: 0 if x == 1 else 1)

# Exclude the last column from zero value treatment
cols_to_impute = df.columns[:-1]
df[cols_to_impute] = df[cols_to_impute].replace(0, np.nan)

# Impute missing values using median imputation (positive values)
imputer = SimpleImputer(strategy='median')
df[cols_to_impute] = imputer.fit_transform(df[cols_to_impute])

# Extract features (FHR time series) and labels
X = df.iloc[:, 1:-1].values
y = df['label'].values

# Standardize the features (mean=0, std=1)
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Reshape data for RNN input (samples, time steps, features)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

# Define the RNN model
def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(64, return_sequences=True, input_shape=(X_train.shape[1], 1)),
        tf.keras.layers.LSTM(64),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Create and train the model
model = create_model()
model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)

# Evaluate the model on test data
y_test_pred = (model.predict(X_test) > 0.5).astype(int)
test_accuracy = accuracy_score(y_test, y_test_pred)

# Evaluate the model on training data
y_train_pred = (model.predict(X_train) > 0.5).astype(int)
train_accuracy = accuracy_score(y_train, y_train_pred)

print(f"Accuracy on train data: {train_accuracy * 100:.2f}%")
print(f"Accuracy on test data: {test_accuracy * 100:.2f}%")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy on train data: 93.01%
Accuracy on test data: 88.55%


# Ecxperiments with NO-INFORMATION-CELLS as 0 values.

# with cv and no info

In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score

# Load the dataset
data_path = r"C:\Users\Jaber\OneDrive - University of Florida\Educational\Research\MLFHRT\FHR-dataset-CTUUHB\combined_FHR_data_resampled_with_minutes.csv"
df = pd.read_csv(data_path)

# Convert labels to binary (1 or 2 to 0 or 1)
df['label'] = df['label'].apply(lambda x: 0 if x == 1 else 1)

# Exclude the "label" column from zero value treatment
cols_to_impute = df.columns[:-1]
df[cols_to_impute] = df[cols_to_impute].replace(0, np.nan)

# Impute missing values using mean imputation
imputer = SimpleImputer(strategy='mean')
df[cols_to_impute] = imputer.fit_transform(df[cols_to_impute])

# Extract features (FHR time series) and labels
X = df.iloc[:, 1:-1].values
y = df['label'].values

# Standardize the features (mean=0, std=1)
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Define a function to create the RNN model
def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(64, return_sequences=True, input_shape=(X.shape[1], 1)),
        tf.keras.layers.LSTM(64),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Initialize cross-validation
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
accuracies = []

# Perform cross-validation
for train_index, test_index in kfold.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

    model = create_model()
    model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0)

    y_pred = (model.predict(X_test) > 0.5).astype(int)
    accuracy = accuracy_score(y_test, y_pred)
    accuracies.append(accuracy)

# Calculate and print the mean accuracy over all folds
mean_accuracy = np.mean(accuracies)
print(f"Mean Accuracy: {mean_accuracy * 100:.2f}%")


KeyboardInterrupt: 

# Withot CV and No info

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Load the dataset
data_path = r"C:\Users\Jaber\OneDrive - University of Florida\Educational\Research\MLFHRT\FHR-dataset-CTUUHB\combined_FHR_data_resampled_with_minutes.csv"
df = pd.read_csv(data_path)

# Convert labels to binary (1 or 2 to 0 or 1)
df['label'] = df['label'].apply(lambda x: 0 if x == 1 else 1)

# Exclude the "label" column from zero value treatment
cols_to_impute = df.columns[:-1]
df[cols_to_impute] = df[cols_to_impute].replace(0, np.nan)

# Impute missing values using mean imputation
imputer = SimpleImputer(strategy='mean')
df[cols_to_impute] = imputer.fit_transform(df[cols_to_impute])

# Extract features (FHR time series) and labels
X = df.iloc[:, 1:-1].values
y = df['label'].values

# Standardize the features (mean=0, std=1)
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Reshape data for RNN input (samples, time steps, features)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

# Define the RNN model
def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(64, return_sequences=True, input_shape=(X_train.shape[1], 1)),
        tf.keras.layers.LSTM(64),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Create and train the model
model = create_model()
model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)

# Evaluate the model on test data
y_test_pred = (model.predict(X_test) > 0.5).astype(int)
test_accuracy = accuracy_score(y_test, y_test_pred)

# Evaluate the model on training data
y_train_pred = (model.predict(X_train) > 0.5).astype(int)
train_accuracy = accuracy_score(y_train, y_train_pred)

print(f"Accuracy on train data: {train_accuracy * 100:.2f}%")
print(f"Accuracy on test data: {test_accuracy * 100:.2f}%")


# With apgr1 as categorical labels

# +CV + Missing value (mean strategy)

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score

# Load the dataset
data_path = r"C:\Users\Jaber\OneDrive - University of Florida\Educational\Research\MLFHRT\FHR-dataset-CTUUHB\combined_FHR_data_resampled_with_minutes.csv"
df = pd.read_csv(data_path)

# Exclude the last column from zero value treatment
cols_to_impute = df.columns[:-1]
df[cols_to_impute] = df[cols_to_impute].replace(0, np.nan)

# Impute missing values using mean imputation (positive values)
imputer = SimpleImputer(strategy='mean')
df[cols_to_impute] = imputer.fit_transform(df[cols_to_impute])

# Convert labels to binary (1 to 10 to 0 to 9)
df['label'] = df['label'].apply(lambda x: x - 1)

# Extract features (FHR time series) and labels
X = df.iloc[:, 1:-1].values
y = df['label'].values

# Standardize the features (mean=0, std=1)
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Define a function to create the RNN model with improved architecture
def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(128, return_sequences=True, input_shape=(X.shape[1], 1)),
        tf.keras.layers.LSTM(128),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')  # Adjust output units for 10 classes
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Initialize cross-validation
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
accuracies = []

# Perform cross-validation
for train_index, test_index in kfold.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

    model = create_model()
    model.fit(X_train, y_train, epochs=20, batch_size=64, verbose=0)  # Increase epochs and batch size

    y_pred = np.argmax(model.predict(X_test), axis=-1)  # Convert softmax output to class labels
    accuracy = accuracy_score(y_test, y_pred)
    accuracies.append(accuracy)

# Calculate and print the mean accuracy over all folds
mean_accuracy = np.mean(accuracies)
print(f"Mean Accuracy: {mean_accuracy * 100:.2f}%")




Mean Accuracy: 49.10%


**Results:** apgar1 outcome has nothing to do with FHR treands. 50% accuracy means having or not having FHR data doesn't make any difference. It is like you toss a coin.

# +CV + No Missing value treatment (let 0 values be).

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Load the dataset
data_path = r"C:\Users\Jaber\OneDrive - University of Florida\Educational\Research\MLFHRT\FHR-dataset-CTUUHB\combined_FHR_data_resampled_with_minutes.csv"
df = pd.read_csv(data_path)

# Convert labels to binary (1 to 10 to 0 to 9)
df['label'] = df['label'].apply(lambda x: x - 1)

# Extract features (FHR time series) and labels
X = df.iloc[:, 1:-1].values
y = df['label'].values

# Standardize the features (mean=0, std=1)
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Define a function to create the RNN model with improved architecture
def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(128, return_sequences=True, input_shape=(X.shape[1], 1)),
        tf.keras.layers.LSTM(128),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')  # Adjust output units for 10 classes
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Initialize cross-validation
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
accuracies = []

# Perform cross-validation
for train_index, test_index in kfold.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

    model = create_model()
    model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0)

    y_pred = np.argmax(model.predict(X_test), axis=-1)  # Convert softmax output to class labels
    accuracy = accuracy_score(y_test, y_pred)
    accuracies.append(accuracy)

# Calculate and print the mean accuracy over all folds
mean_accuracy = np.mean(accuracies)
print(f"Mean Accuracy: {mean_accuracy * 100:.2f}%")




Mean Accuracy: 0.36%


# +CV + No Missing value treatment (0 values mean NO INFO).

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Load the dataset
data_path = r"C:\Users\Jaber\OneDrive - University of Florida\Educational\Research\MLFHRT\FHR-dataset-CTUUHB\combined_FHR_data_resampled_with_minutes.csv"
df = pd.read_csv(data_path)

# Replace 0 values with NaN to exclude them from calculations
df[df != 0] = df[df != 0].replace(0, np.nan)

# Convert labels to binary (1 to 10 to 0 to 9)
df['label'] = df['label'].apply(lambda x: x - 1)

# Extract features (FHR time series) and labels
X = df.iloc[:, 1:-1].values
y = df['label'].values

# Standardize the features (mean=0, std=1)
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Define a function to create the RNN model with improved architecture
def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(128, return_sequences=True, input_shape=(X.shape[1], 1)),
        tf.keras.layers.LSTM(128),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')  # Adjust output units for 10 classes
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Initialize cross-validation
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
accuracies = []

# Perform cross-validation
for train_index, test_index in kfold.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

    model = create_model()
    model.fit(X_train, y_train, epochs=20, batch_size=64, verbose=0)  # Increase epochs and batch size

    y_pred = np.argmax(model.predict(X_test), axis=-1)  # Convert softmax output to class labels
    accuracy = accuracy_score(y_test, y_pred)
    accuracies.append(accuracy)

# Calculate and print the mean accuracy over all folds
mean_accuracy = np.nanmean(accuracies)  # Exclude NaN values
print(f"Mean Accuracy: {mean_accuracy * 100:.2f}%")




Mean Accuracy: 0.36%


# With apgr5 as categorical labels

# +CV + Missing value (mean strategy)

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score

# Load the dataset
data_path = r"C:\Users\Jaber\OneDrive - University of Florida\Educational\Research\MLFHRT\FHR-dataset-CTUUHB\combined_FHR_data_resampled_with_minutes.csv"
df = pd.read_csv(data_path)

# Exclude the last column from zero value treatment
cols_to_impute = df.columns[:-1]
df[cols_to_impute] = df[cols_to_impute].replace(0, np.nan)

# Impute missing values using mean imputation (positive values)
imputer = SimpleImputer(strategy='mean')
df[cols_to_impute] = imputer.fit_transform(df[cols_to_impute])

# Convert labels to binary (1 to 10 to 0 to 9)
df['label'] = df['label'].apply(lambda x: x - 1)

# Extract features (FHR time series) and labels
X = df.iloc[:, 1:-1].values
y = df['label'].values

# Standardize the features (mean=0, std=1)
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Define a function to create the RNN model with improved architecture
def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(128, return_sequences=True, input_shape=(X.shape[1], 1)),
        tf.keras.layers.LSTM(128),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')  # Adjust output units for 10 classes
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Initialize cross-validation
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
accuracies = []

# Perform cross-validation
for train_index, test_index in kfold.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

    model = create_model()
    model.fit(X_train, y_train, epochs=20, batch_size=64, verbose=0)  # Increase epochs and batch size

    y_pred = np.argmax(model.predict(X_test), axis=-1)  # Convert softmax output to class labels
    accuracy = accuracy_score(y_test, y_pred)
    accuracies.append(accuracy)

# Calculate and print the mean accuracy over all folds
mean_accuracy = np.mean(accuracies)
print(f"Mean Accuracy: {mean_accuracy * 100:.2f}%")




Mean Accuracy: 43.40%


**Results:** apgar5 outcome has nothing to do with FHR treands. The results is almost the same as apgar1. Achieving almost 50% accuracy means having or not having FHR data doesn't make any difference. It is like you toss a coin.

In [4]:
df

Unnamed: 0,Patient,1,2,3,4,5,6,7,8,9,...,82,83,84,85,86,87,88,89,90,label
0,1001.000000,145.118750,142.607292,136.781250,138.810417,138.488542,149.541667,151.360417,145.588542,136.220833,...,85.711139,84.493478,83.431677,76.781132,76.0516,73.467824,71.639732,66.866414,68.349621,7
1,1002.000000,150.323958,138.341667,139.404167,148.260417,139.529167,161.725000,161.821875,151.493750,151.245833,...,85.711139,84.493478,83.431677,76.781132,76.0516,73.467824,71.639732,66.866414,68.349621,7
2,1003.000000,143.712500,148.243750,146.097917,135.853125,141.971875,134.325000,135.838542,134.287500,134.361458,...,85.711139,84.493478,83.431677,76.781132,76.0516,73.467824,71.639732,66.866414,68.349621,8
3,1004.000000,157.179167,156.259375,154.177083,143.554167,157.220833,131.486458,156.216667,155.942708,156.213542,...,85.711139,84.493478,83.431677,76.781132,76.0516,73.467824,71.639732,66.866414,68.349621,8
4,1005.000000,128.218641,130.512258,128.993490,129.726250,128.734919,128.729547,128.182656,129.147454,129.817015,...,85.711139,84.493478,83.431677,76.781132,76.0516,73.467824,71.639732,66.866414,68.349621,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
548,2043.000000,55.040625,119.585417,111.237500,126.494792,69.259375,122.856250,99.140625,126.675000,111.025000,...,85.711139,84.493478,83.431677,76.781132,76.0516,73.467824,71.639732,66.866414,68.349621,8
549,2044.000000,146.455208,156.293750,123.094792,150.250000,157.241667,165.626042,165.131250,166.731250,163.304167,...,85.711139,84.493478,83.431677,76.781132,76.0516,73.467824,71.639732,66.866414,68.349621,7
550,2045.000000,124.087500,116.191667,121.347917,112.247917,122.673958,123.755208,124.393750,126.363542,116.915625,...,85.711139,84.493478,83.431677,76.781132,76.0516,73.467824,71.639732,66.866414,68.349621,8
551,2046.000000,74.005208,78.928125,99.091667,66.653125,42.480208,108.951042,102.046875,115.465625,68.359375,...,85.711139,84.493478,83.431677,76.781132,76.0516,73.467824,71.639732,66.866414,68.349621,6


# - CV + Missing value (mean strategy)

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score

# Load the dataset
data_path = r"C:\Users\Jaber\OneDrive - University of Florida\Educational\Research\MLFHRT\FHR-dataset-CTUUHB\combined_FHR_data_resampled_with_minutes.csv"
df = pd.read_csv(data_path)

# Exclude the last column from zero value treatment
cols_to_impute = df.columns[:-1]
df[cols_to_impute] = df[cols_to_impute].replace(0, np.nan)

# Impute missing values using mean imputation (positive values)
imputer = SimpleImputer(strategy='mean')
df[cols_to_impute] = imputer.fit_transform(df[cols_to_impute])

# Convert labels to binary (1 to 10 to 0 to 9)
df['label'] = df['label'].apply(lambda x: x - 1)

# Extract features (FHR time series) and labels
X = df.iloc[:, 1:-1].values
y = df['label'].values

# Standardize the features (mean=0, std=1)
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Define a function to create the RNN model with improved architecture
def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(128, return_sequences=True, input_shape=(X.shape[1], 1)),
        tf.keras.layers.LSTM(128),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')  # Adjust output units for 10 classes
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Split the data into train and test sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

model = create_model()
model.fit(X_train, y_train, epochs=20, batch_size=64, verbose=0)  # Increase epochs and batch size

# Training accuracy
y_train_pred = np.argmax(model.predict(X_train), axis=-1)
train_accuracy = accuracy_score(y_train, y_train_pred)

# Test accuracy
y_test_pred = np.argmax(model.predict(X_test), axis=-1)
test_accuracy = accuracy_score(y_test, y_test_pred)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")


Training Accuracy: 36.43%
Test Accuracy: 38.74%


# +CV + Missing value (median strategy)

In [5]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score

# Load the dataset
data_path = r"C:\Users\Jaber\OneDrive - University of Florida\Educational\Research\MLFHRT\FHR-dataset-CTUUHB\combined_FHR_data_resampled_with_minutes.csv"
df = pd.read_csv(data_path)

# Exclude the last column from zero value treatment
cols_to_impute = df.columns[:-1]
df[cols_to_impute] = df[cols_to_impute].replace(0, np.nan)

# Impute missing values using median imputation (positive values)
imputer = SimpleImputer(strategy='median')  # Changed to median strategy
df[cols_to_impute] = imputer.fit_transform(df[cols_to_impute])

# Convert labels to binary (1 to 10 to 0 to 9)
df['label'] = df['label'].apply(lambda x: x - 1)

# Extract features (FHR time series) and labels
X = df.iloc[:, 1:-1].values
y = df['label'].values

# Standardize the features (mean=0, std=1)
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Define a function to create the RNN model with improved architecture
def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(128, return_sequences=True, input_shape=(X.shape[1], 1)),
        tf.keras.layers.LSTM(128),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')  # Adjust output units for 10 classes
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Initialize cross-validation
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
accuracies = []

# Perform cross-validation
for train_index, test_index in kfold.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

    model = create_model()
    model.fit(X_train, y_train, epochs=20, batch_size=64, verbose=0)  # Increase epochs and batch size

    y_pred = np.argmax(model.predict(X_test), axis=-1)  # Convert softmax output to class labels
    accuracy = accuracy_score(y_test, y_pred)
    accuracies.append(accuracy)

# Calculate and print the mean accuracy over all folds
mean_accuracy = np.mean(accuracies)
print(f"Mean Accuracy: {mean_accuracy * 100:.2f}%")




Mean Accuracy: 40.69%


# +CV + No Missing value treatment (let 0 values be).

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Load the dataset
data_path = r"C:\Users\Jaber\OneDrive - University of Florida\Educational\Research\MLFHRT\FHR-dataset-CTUUHB\combined_FHR_data_resampled_with_minutes.csv"
df = pd.read_csv(data_path)

# Convert labels to binary (1 to 10 to 0 to 9)
df['label'] = df['label'].apply(lambda x: x - 1)

# Extract features (FHR time series) and labels
X = df.iloc[:, 1:-1].values
y = df['label'].values

# Standardize the features (mean=0, std=1)
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Define a function to create the RNN model
def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(64, return_sequences=True, input_shape=(X.shape[1], 1)),
        tf.keras.layers.LSTM(64),
        tf.keras.layers.Dense(10, activation='softmax')  # Adjust output units for 10 classes
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Initialize cross-validation
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
accuracies = []

# Perform cross-validation
for train_index, test_index in kfold.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

    model = create_model()
    model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0)

    y_pred = np.argmax(model.predict(X_test), axis=-1)  # Convert softmax output to class labels
    accuracy = accuracy_score(y_test, y_pred)
    accuracies.append(accuracy)

# Calculate and print the mean accuracy over all folds
mean_accuracy = np.mean(accuracies)
print(f"Mean Accuracy: {mean_accuracy * 100:.2f}%")




Mean Accuracy: 0.00%


# +CV + No Missing value treatment (0 values mean NO INFO).

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Load the dataset
data_path = r"C:\Users\Jaber\OneDrive - University of Florida\Educational\Research\MLFHRT\FHR-dataset-CTUUHB\combined_FHR_data_resampled_with_minutes.csv"
df = pd.read_csv(data_path)

# Replace 0 values with NaN to exclude them from calculations
df[df != 0] = df[df != 0].replace(0, np.nan)

# Convert labels to binary (1 to 10 to 0 to 9)
df['label'] = df['label'].apply(lambda x: x - 1)

# Extract features (FHR time series) and labels
X = df.iloc[:, 1:-1].values
y = df['label'].values

# Standardize the features (mean=0, std=1)
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Define a function to create the RNN model with improved architecture
def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(128, return_sequences=True, input_shape=(X.shape[1], 1)),
        tf.keras.layers.LSTM(128),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')  # Adjust output units for 10 classes
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Initialize cross-validation
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
accuracies = []

# Perform cross-validation
for train_index, test_index in kfold.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

    model = create_model()
    model.fit(X_train, y_train, epochs=20, batch_size=64, verbose=0)  # Increase epochs and batch size

    y_pred = np.argmax(model.predict(X_test), axis=-1)  # Convert softmax output to class labels
    accuracy = accuracy_score(y_test, y_pred)
    accuracies.append(accuracy)

# Calculate and print the mean accuracy over all folds
mean_accuracy = np.nanmean(accuracies)  # Exclude NaN values
print(f"Mean Accuracy: {mean_accuracy * 100:.2f}%")




Mean Accuracy: 0.00%


## Prediction accuracy on APGAR1 outcome (Binary encoding: 0-6 as 0 and 7-10 as 1)

In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Load the dataset
data_path = r"C:\Users\Jaber\OneDrive - University of Florida\Educational\Research\MLFHRT\FHR-dataset-CTUUHB\combined_FHR_data_resampled_with_minutes.csv"
df = pd.read_csv(data_path)

# Exclude the last column from zero value treatment
cols_to_impute = df.columns[:-1]
df[cols_to_impute] = df[cols_to_impute].replace(0, np.nan)

# Impute missing values using mean imputation (positive values)
imputer = SimpleImputer(strategy='mean')
df[cols_to_impute] = imputer.fit_transform(df[cols_to_impute])

# Convert labels to binary (1 to 6 to 0 and 7 to 10 to 1)
df['label'] = df['label'].apply(lambda x: 0 if x <= 6 else 1)

# Extract features (FHR time series) and labels
X = df.iloc[:, 1:-1].values
y = df['label'].values

# Standardize the features (mean=0, std=1)
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Define a function to create the RNN model with improved architecture
def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(128, return_sequences=True, input_shape=(X.shape[1], 1)),
        tf.keras.layers.LSTM(128),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')  # Output a single binary value
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

model = create_model()
model.fit(X_train, y_train, epochs=20, batch_size=64, verbose=0)  # Increase epochs and batch size

# Test accuracy
y_test_pred = (model.predict(X_test) > 0.5).astype(int)
test_accuracy = accuracy_score(y_test, y_test_pred)

print(f"Test Accuracy: {test_accuracy * 100:.2f}%")


Test Accuracy: 81.98%


## Prediction dccuracy on APGAR5 outcome (Binary encoding: 0-6 as 0 and 7-10 as 1)

In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Load the dataset
data_path = r"C:\Users\Jaber\OneDrive - University of Florida\Educational\Research\MLFHRT\FHR-dataset-CTUUHB\combined_FHR_data_resampled_with_minutes.csv"
df = pd.read_csv(data_path)

# Exclude the last column from zero value treatment
cols_to_impute = df.columns[:-1]
df[cols_to_impute] = df[cols_to_impute].replace(0, np.nan)

# Impute missing values using mean imputation (positive values)
imputer = SimpleImputer(strategy='mean')
df[cols_to_impute] = imputer.fit_transform(df[cols_to_impute])

# Convert labels to binary (1 to 6 to 0 and 7 to 10 to 1)
df['label'] = df['label'].apply(lambda x: 0 if x <= 6 else 1)

# Extract features (FHR time series) and labels
X = df.iloc[:, 1:-1].values
y = df['label'].values

# Standardize the features (mean=0, std=1)
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Define a function to create the RNN model with improved architecture
def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(128, return_sequences=True, input_shape=(X.shape[1], 1)),
        tf.keras.layers.LSTM(128),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')  # Output a single binary value
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

model = create_model()
model.fit(X_train, y_train, epochs=20, batch_size=64, verbose=0)  # Increase epochs and batch size

# Test accuracy
y_test_pred = (model.predict(X_test) > 0.5).astype(int)
test_accuracy = accuracy_score(y_test, y_test_pred)

print(f"Test Accuracy: {test_accuracy * 100:.2f}%")


Test Accuracy: 99.10%


In [2]:
df

Unnamed: 0,Patient,1,2,3,4,5,6,7,8,9,...,82,83,84,85,86,87,88,89,90,label
0,1001.000000,145.118750,142.607292,136.781250,138.810417,138.488542,149.541667,151.360417,145.588542,136.220833,...,85.711139,84.493478,83.431677,76.781132,76.0516,73.467824,71.639732,66.866414,68.349621,1
1,1002.000000,150.323958,138.341667,139.404167,148.260417,139.529167,161.725000,161.821875,151.493750,151.245833,...,85.711139,84.493478,83.431677,76.781132,76.0516,73.467824,71.639732,66.866414,68.349621,1
2,1003.000000,143.712500,148.243750,146.097917,135.853125,141.971875,134.325000,135.838542,134.287500,134.361458,...,85.711139,84.493478,83.431677,76.781132,76.0516,73.467824,71.639732,66.866414,68.349621,1
3,1004.000000,157.179167,156.259375,154.177083,143.554167,157.220833,131.486458,156.216667,155.942708,156.213542,...,85.711139,84.493478,83.431677,76.781132,76.0516,73.467824,71.639732,66.866414,68.349621,1
4,1005.000000,128.218641,130.512258,128.993490,129.726250,128.734919,128.729547,128.182656,129.147454,129.817015,...,85.711139,84.493478,83.431677,76.781132,76.0516,73.467824,71.639732,66.866414,68.349621,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
548,2043.000000,55.040625,119.585417,111.237500,126.494792,69.259375,122.856250,99.140625,126.675000,111.025000,...,85.711139,84.493478,83.431677,76.781132,76.0516,73.467824,71.639732,66.866414,68.349621,1
549,2044.000000,146.455208,156.293750,123.094792,150.250000,157.241667,165.626042,165.131250,166.731250,163.304167,...,85.711139,84.493478,83.431677,76.781132,76.0516,73.467824,71.639732,66.866414,68.349621,1
550,2045.000000,124.087500,116.191667,121.347917,112.247917,122.673958,123.755208,124.393750,126.363542,116.915625,...,85.711139,84.493478,83.431677,76.781132,76.0516,73.467824,71.639732,66.866414,68.349621,1
551,2046.000000,74.005208,78.928125,99.091667,66.653125,42.480208,108.951042,102.046875,115.465625,68.359375,...,85.711139,84.493478,83.431677,76.781132,76.0516,73.467824,71.639732,66.866414,68.349621,1
