In [None]:
!pip install tensorflow



In [None]:
#Importing Libraries
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from imblearn.over_sampling import SMOTE

In [None]:
# Dataset Paths
data_folder = "/content/drive/MyDrive/Dataset/Signals_KNN"
ctu_df_path = "/content/drive/MyDrive/Dataset/ctu_df.csv"

In [None]:
# Parameters
sequence_length = 3000
feature_dim = 2

In [None]:
# Load annotation file
ctu_df = pd.read_csv(ctu_df_path, dtype={'ID': str})  # Taking ID as a string
valid_ids = set(ctu_df['ID'].values)

In [None]:
X_list = []
y_list = []

In [None]:
# Processing each CSV file(552 files)
for file in sorted(os.listdir(data_folder)):
    if file.endswith(".csv"):
        file_id = file[:-8]  # Removing ".csv" to get ID
        if file_id not in valid_ids:
            continue  # Skip files not in ctu_df

        # Read CSV (only FHR, UC columns)
        df = pd.read_csv(os.path.join(data_folder, file), usecols=['FHR_KNN', 'UC_KNN'])
        df.dropna(inplace=True)

        # Ensure enough data points
        if len(df) < sequence_length:
            print(f"Skipping {file}, not enough data.")
            continue

        # Select the first `sequence_length` rows
        sequence = df.iloc[:sequence_length].values
        X_list.append(sequence)

        # Assign label from ctu_df
        y_list.append(ctu_df.loc[ctu_df['ID'] == file_id, 'target'].values[0])


In [None]:
# Convert lists to NumPy arrays
X = np.array(X_list, dtype=np.float32)
y = np.array(y_list, dtype=np.int8)

print(f"Final input shape: {X.shape}")  # Should be (552, sequence_length, 2)
print(f"Final output shape: {y.shape}")  # Should be (552,)

# Save for training
np.save("X.npy", X)
np.save("y.npy", y)

Final input shape: (552, 3000, 2)
Final output shape: (552,)


In [None]:
# Flatten X for SMOTE (Convert 3D -> 2D)
n_samples, sequence_length, num_features = X.shape
X_flat = X.reshape(n_samples, sequence_length * num_features)

In [None]:
# Apply SMOTE
smote = SMOTE(sampling_strategy='auto', random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_flat, y)

In [None]:
# Reshape X back to 3D (time series format)
X_resampled = X_resampled.reshape(-1, sequence_length, num_features)
print(f"Balanced dataset shape: {X_resampled.shape}, {y_resampled.shape}")

Balanced dataset shape: (1012, 3000, 2), (1012,)


In [None]:
# Train-test split (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)

In [None]:
# LSTM Model
def build_lstm_model(sequence_length, num_features):
    model = keras.Sequential([
        layers.Input(shape=(sequence_length, num_features)),
        layers.LSTM(64, return_sequences=True),
        layers.LSTM(32),
        layers.Dense(32, activation="relu"),
        layers.Dropout(0.2),
        layers.Dense(1, activation="sigmoid")  # Binary classification
    ])
    return model

In [None]:
# Compiling the model
model = build_lstm_model(sequence_length, num_features)
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="binary_crossentropy", metrics=["accuracy"])

In [None]:
# Train Model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, batch_size=16, verbose=1)

Epoch 1/50
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 147ms/step - accuracy: 0.4778 - loss: 0.6997 - val_accuracy: 0.5517 - val_loss: 0.6885
Epoch 2/50
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 151ms/step - accuracy: 0.5476 - loss: 0.6873 - val_accuracy: 0.5616 - val_loss: 0.6807
Epoch 3/50
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 153ms/step - accuracy: 0.5347 - loss: 0.6803 - val_accuracy: 0.5813 - val_loss: 0.6686
Epoch 4/50
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 135ms/step - accuracy: 0.5631 - loss: 0.6684 - val_accuracy: 0.5665 - val_loss: 0.6621
Epoch 5/50
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 167ms/step - accuracy: 0.6356 - loss: 0.6551 - val_accuracy: 0.5911 - val_loss: 0.6682
Epoch 6/50
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 152ms/step - accuracy: 0.5760 - loss: 0.6665 - val_accuracy: 0.5862 - val_loss: 0.6628
Epoch 7/50
[1m51/51[0m 

In [None]:
# Evaluating the model (Calculating the model Loss and Accuracy)
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 75ms/step - accuracy: 0.6721 - loss: 0.6610
Test Accuracy: 0.6207


In [None]:
# Evaluate Model
y_pred = (model.predict(X_test) > 0.5).astype("int32")  # Convert probabilities to binary (0 or 1)

# Print classification report
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=["Pathological", "Normal"]))


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 85ms/step

Classification Report:
               precision    recall  f1-score   support

Pathological       0.59      0.80      0.68       102
      Normal       0.69      0.44      0.53       101

    accuracy                           0.62       203
   macro avg       0.64      0.62      0.61       203
weighted avg       0.64      0.62      0.61       203

