In [None]:
!pip install tensorflow



In [None]:
#Importing Libraries
import os
import numpy as np
import pandas as pd
import h5py
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.layers import Input, LayerNormalization, MultiHeadAttention, Dense, Dropout, GlobalAveragePooling1D
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from imblearn.over_sampling import SMOTE

In [None]:
# Dataset Paths
data_folder = "/content/drive/MyDrive/Design Lab/Database/signals"
ctu_df_path = "/content/ctu_df.csv"

In [None]:
# Parameters
sequence_length = 1500
feature_dim = 2

In [None]:
# Load annotation file
ctu_df = pd.read_csv(ctu_df_path, dtype={'ID': str})  # Taking ID as a string
valid_ids = set(ctu_df['ID'].values)

In [None]:
X_list = []
y_list = []

In [None]:
# Processing each CSV file(552 files)
for file in sorted(os.listdir(data_folder)):
    if file.endswith(".csv"):
        file_id = file[:-4]  # Removing ".csv" to get ID
        if file_id not in valid_ids:
            continue  # Skip files not in ctu_df

        # Read CSV (only FHR, UC columns)
        df = pd.read_csv(os.path.join(data_folder, file), usecols=['FHR', 'UC'])
        df.dropna(inplace=True)

        # Ensure enough data points
        if len(df) < sequence_length:
            print(f"Skipping {file}, not enough data.")
            continue

        # Select the first `sequence_length` rows
        sequence = df.iloc[:sequence_length].values
        X_list.append(sequence)

        # Assign label from ctu_df
        y_list.append(ctu_df.loc[ctu_df['ID'] == file_id, 'target'].values[0])


In [None]:
# Convert lists to NumPy arrays
X = np.array(X_list, dtype=np.float32)
y = np.array(y_list, dtype=np.int8)

print(f"Final input shape: {X.shape}")  # Should be (552, sequence_length, 2)
print(f"Final output shape: {y.shape}")  # Should be (552,)

# Save for training
np.save("X.npy", X)
np.save("y.npy", y)

Final input shape: (552, 1500, 2)
Final output shape: (552,)


In [None]:
# Flatten X for SMOTE (Convert 3D -> 2D)
n_samples, sequence_length, num_features = X.shape
X_flat = X.reshape(n_samples, sequence_length * num_features)

In [None]:
# Apply SMOTE
smote = SMOTE(sampling_strategy='auto', random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_flat, y)

In [None]:
# Reshape X back to 3D (time series format)
X_resampled = X_resampled.reshape(-1, sequence_length, num_features)
print(f"Balanced dataset shape: {X_resampled.shape}, {y_resampled.shape}")

Balanced dataset shape: (1012, 1500, 2), (1012,)


In [None]:
# Train-test split (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)

In [None]:
# Hyperparameters
sequence_length = X.shape[1]  # Number of time steps (e.g., 1500)
num_features = X.shape[2]  # FHR and UC → 2 features
embed_dim = 32  # Embedding size for attention
num_heads = 4  # Multi-head attention
ff_dim = 64  # Feed-forward layer size
num_transformer_blocks = 2  # Number of Transformer blocks
dropout_rate = 0.2  # Dropout for regularization

In [None]:
# Transformer Block
def transformer_block(inputs, embed_dim, num_heads, ff_dim, dropout=0.2):
    # Multi-Head Self-Attention
    attention = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)(inputs, inputs)
    attention = layers.Dropout(dropout)(attention)
    attention = layers.LayerNormalization(epsilon=1e-6)(inputs + attention)

    # Feed Forward Network
    ffn = keras.Sequential([
        layers.Dense(ff_dim, activation="relu"),
        layers.Dense(embed_dim)
    ])
    ffn_output = ffn(attention)
    ffn_output = layers.Dropout(dropout)(ffn_output)
    return layers.LayerNormalization(epsilon=1e-6)(attention + ffn_output)


In [None]:
# Build Transformer Model
def build_transformer_model(sequence_length, num_features):
    inputs = layers.Input(shape=(sequence_length, num_features))

    # Project input to embedding space
    x = layers.Dense(embed_dim)(inputs)

    # Add positional encoding
    positions = tf.range(start=0, limit=sequence_length, delta=1)
    position_embeddings = layers.Embedding(input_dim=sequence_length, output_dim=embed_dim)(positions)
    x += position_embeddings

    # Transformer Blocks
    for _ in range(num_transformer_blocks):
        x = transformer_block(x, embed_dim, num_heads, ff_dim, dropout_rate)

    # Global Average Pooling
    x = layers.GlobalAveragePooling1D()(x)

    # Final Classification Head
    x = layers.Dense(32, activation="relu")(x)
    x = layers.Dropout(0.2)(x)
    outputs = layers.Dense(1, activation="sigmoid")(x)  # Binary classification (0 or 1)

    return keras.Model(inputs, outputs, name="TimeSeriesTransformer")



In [None]:
# Compile the model
from tensorflow.keras import layers, models
model = build_transformer_model(sequence_length, num_features)
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),loss="binary_crossentropy",metrics=["accuracy"])

In [None]:
# Train Model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, batch_size=16, verbose=1)

Epoch 1/50
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 343ms/step - accuracy: 0.5241 - loss: 0.7182 - val_accuracy: 0.5616 - val_loss: 0.6694
Epoch 2/50
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 106ms/step - accuracy: 0.5995 - loss: 0.6601 - val_accuracy: 0.6650 - val_loss: 0.6404
Epoch 3/50
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 106ms/step - accuracy: 0.5896 - loss: 0.6665 - val_accuracy: 0.6995 - val_loss: 0.6148
Epoch 4/50
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 110ms/step - accuracy: 0.7120 - loss: 0.5972 - val_accuracy: 0.6502 - val_loss: 0.6056
Epoch 5/50
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 110ms/step - accuracy: 0.6409 - loss: 0.6146 - val_accuracy: 0.6650 - val_loss: 0.5808
Epoch 6/50
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 106ms/step - accuracy: 0.6648 - loss: 0.5877 - val_accuracy: 0.7044 - val_loss: 0.6002
Epoch 7/50
[1m51/51[0m

In [None]:
# Evaluating the model (Calculating the model Loss and Accuracy)
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 71ms/step - accuracy: 0.8317 - loss: 0.3965
Test Accuracy: 0.8325


In [None]:
# Evaluate Model
y_pred = (model.predict(X_test) > 0.5).astype("int32")  # Convert probabilities to binary (0 or 1) # 1-->Normal, 0-->Pathological

# Print classification report
print("\nClassification Report:\n", classification_report(y_test, y_pred))


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 211ms/step

Classification Report:
               precision    recall  f1-score   support

           0       0.78      0.93      0.85       102
           1       0.91      0.73      0.81       101

    accuracy                           0.83       203
   macro avg       0.85      0.83      0.83       203
weighted avg       0.85      0.83      0.83       203

