In [87]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, SimpleRNN, Bidirectional
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, confusion_matrix


# # Identify numerical and categorical columns
# num_cols = X_train.select_dtypes(include=['int64', 'float64']).columns
# cat_cols = X_train.select_dtypes(include=['object', 'category']).columns

# # Handle missing values for numeric columns
# num_imputer = SimpleImputer(strategy='mean')
# X_train[num_cols] = num_imputer.fit_transform(X_train[num_cols])
# X_test[num_cols] = num_imputer.transform(X_test[num_cols])

# # Handle missing values for categorical columns (if any)
# cat_imputer = SimpleImputer(strategy='most_frequent')
# X_train[cat_cols] = cat_imputer.fit_transform(X_train[cat_cols])
# X_test[cat_cols] = cat_imputer.transform(X_test[cat_cols])

# # Label Encoding for categorical columns
# label_encoders = {}
# for col in cat_cols:
#     le = LabelEncoder()
#     X_train[col] = le.fit_transform(X_train[col])
#     X_test[col] = le.transform(X_test[col])  # Assumes test data has no unseen categories
#     label_encoders[col] = le

# # Feature scaling
# scaler = StandardScaler()
# X_train_scaled = scaler.fit_transform(X_train)
# X_test_scaled = scaler.transform(X_test)

# # Final preprocessed outputs
# print("X_train_scaled shape:", X_train_scaled.shape)
# print("X_test_scaled shape:", X_test_scaled.shape)

In [101]:
# Load the datasets
train_df = pd.read_csv('Paitients_Files_Train.csv')

train_df

Unnamed: 0,ID,PRG,PL,PR,SK,TS,M11,BD2,Age,Insurance,Sepssis
0,ICU200010,6,148,72,35,0,33.6,0.627,50,0,Positive
1,ICU200011,1,85,66,29,0,26.6,0.351,31,0,Negative
2,ICU200012,8,183,64,0,0,23.3,0.672,32,1,Positive
3,ICU200013,1,89,66,23,94,28.1,0.167,21,1,Negative
4,ICU200014,0,137,40,35,168,43.1,2.288,33,1,Positive
...,...,...,...,...,...,...,...,...,...,...,...
594,ICU200604,6,123,72,45,230,33.6,0.733,34,0,Negative
595,ICU200605,0,188,82,14,185,32.0,0.682,22,1,Positive
596,ICU200606,0,67,76,0,0,45.3,0.194,46,1,Negative
597,ICU200607,1,89,24,19,25,27.8,0.559,21,0,Negative


In [63]:
print("Training Data Column Types:\n")
print(train_df.dtypes)

print("\n" + "="*50 + "\n")


Training Data Column Types:

PRG            int64
PL             int64
PR             int64
SK             int64
TS             int64
M11          float64
BD2          float64
Age            int64
Insurance      int64
Sepssis       object
dtype: object




In [51]:
print("Missing values in Train Data:\n")
print(train_df.isnull().sum())

print("\n" + "="*50 + "\n")


Missing values in Train Data:

PRG          0
PL           0
PR           0
SK           0
TS           0
M11          0
BD2          0
Age          0
Insurance    0
Sepssis      0
dtype: int64




In [105]:
# Display unique values before mapping
print("Before Mapping:", train_df['Sepssis'].unique())

# Map 'positive' -> 1, 'negative' -> 0
train_df['Sepssis'] = train_df['Sepssis'].map({'Positive': 1, 'Negative': 0})

# Display unique values after mapping
print("After Mapping:", train_df['Sepssis'].unique())

Before Mapping: ['Positive' 'Negative']
After Mapping: [1 0]


In [115]:
# Separate features and target (assuming 'Sepssis' is the target)
X = train_df.drop(columns=['Sepssis'])
y = train_df['Sepssis']

print(X)
print(y)

     PRG   PL  PR  SK   TS   M11    BD2  Age  Insurance
0      6  148  72  35    0  33.6  0.627   50          0
1      1   85  66  29    0  26.6  0.351   31          0
2      8  183  64   0    0  23.3  0.672   32          1
3      1   89  66  23   94  28.1  0.167   21          1
4      0  137  40  35  168  43.1  2.288   33          1
..   ...  ...  ..  ..  ...   ...    ...  ...        ...
594    6  123  72  45  230  33.6  0.733   34          0
595    0  188  82  14  185  32.0  0.682   22          1
596    0   67  76   0    0  45.3  0.194   46          1
597    1   89  24  19   25  27.8  0.559   21          0
598    1  173  74   0    0  36.8  0.088   38          1

[599 rows x 9 columns]
0      1
1      0
2      1
3      0
4      1
      ..
594    0
595    1
596    0
597    0
598    1
Name: Sepssis, Length: 599, dtype: int64


In [117]:
print("Training Data Column Types:\n")
print(train_df.dtypes)

print("\n" + "="*50 + "\n")


Training Data Column Types:

PRG            int64
PL             int64
PR             int64
SK             int64
TS             int64
M11          float64
BD2          float64
Age            int64
Insurance      int64
Sepssis        int64
dtype: object




In [37]:
# Count the number of 0s and 1s
sepsis_counts = train_df['Sepssis'].value_counts()
print(sepsis_counts)

Sepssis
0    391
1    208
Name: count, dtype: int64


In [119]:
# Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
y
# Final preprocessed outputs
print("X_train_scaled shape:", X_scaled.shape)

X_scaled

X_train_scaled shape: (599, 9)


array([[ 0.64740245,  0.85274382,  0.1691093 , ...,  0.43233261,
         1.41383587, -1.47857033],
       [-0.84067916, -1.07651239, -0.14145729, ..., -0.38600171,
        -0.19380386, -1.47857033],
       [ 1.24263509,  1.92455283, -0.24497949, ...,  0.56575669,
        -0.10919124,  0.67632901],
       ...,
       [-1.13829548, -1.62772845,  0.37615369, ..., -0.85150349,
         1.0753854 ,  0.67632901],
       [-0.84067916, -0.95401993, -2.3154234 , ...,  0.23071401,
        -1.03993003, -1.47857033],
       [-0.84067916,  1.61832169,  0.27263149, ..., -1.16579131,
         0.39848446,  0.67632901]])

In [121]:
# Step 8: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [123]:
# Reshape data: (samples, timesteps=1, features)
X_train_rnn = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test_rnn = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

def build_and_run_model(model_type='LSTM'):
    model = Sequential()

    if model_type == 'LSTM':
        model.add(LSTM(64, input_shape=(1, X_train.shape[1])))
    elif model_type == 'BiLSTM':
        model.add(Bidirectional(LSTM(64), input_shape=(1, X_train.shape[1])))
    elif model_type == 'RNN':
        model.add(SimpleRNN(64, input_shape=(1, X_train.shape[1])))
    elif model_type == 'BiRNN':
        model.add(Bidirectional(SimpleRNN(64), input_shape=(1, X_train.shape[1])))

    model.add(Dense(32, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))  # Binary classification

    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

    print(f"\nTraining {model_type} model...\n")
    model.fit(X_train_rnn, y_train, epochs=10, batch_size=32, validation_data=(X_test_rnn, y_test), verbose=1)

    # Evaluate model
    y_pred = model.predict(X_test_rnn)
    y_pred_labels = (y_pred > 0.5).astype(int)

    print(f"\nEvaluation Report for {model_type}:\n")
    print(classification_report(y_test, y_pred_labels))
    print(confusion_matrix(y_test, y_pred_labels))

# Run all models
for m in ['LSTM', 'BiLSTM', 'RNN', 'BiRNN']:
    build_and_run_model(m)


Training LSTM model...

Epoch 1/10


  super().__init__(**kwargs)


[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.6093 - loss: 0.6807 - val_accuracy: 0.6833 - val_loss: 0.6502
Epoch 2/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7222 - loss: 0.6313 - val_accuracy: 0.6583 - val_loss: 0.6129
Epoch 3/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7269 - loss: 0.5875 - val_accuracy: 0.6917 - val_loss: 0.5817
Epoch 4/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7382 - loss: 0.5445 - val_accuracy: 0.6917 - val_loss: 0.5651
Epoch 5/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7486 - loss: 0.5217 - val_accuracy: 0.6833 - val_loss: 0.5610
Epoch 6/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7935 - loss: 0.4675 - val_accuracy: 0.6833 - val_loss: 0.5602
Epoch 7/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━

  super().__init__(**kwargs)


[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step - accuracy: 0.6094 - loss: 0.6806 - val_accuracy: 0.7250 - val_loss: 0.6447
Epoch 2/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7408 - loss: 0.6311 - val_accuracy: 0.7417 - val_loss: 0.6045
Epoch 3/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7637 - loss: 0.5757 - val_accuracy: 0.7083 - val_loss: 0.5714
Epoch 4/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7859 - loss: 0.5051 - val_accuracy: 0.7083 - val_loss: 0.5532
Epoch 5/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7729 - loss: 0.4729 - val_accuracy: 0.7167 - val_loss: 0.5521
Epoch 6/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7500 - loss: 0.4763 - val_accuracy: 0.7167 - val_loss: 0.5533
Epoch 7/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━

  super().__init__(**kwargs)


[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.6806 - loss: 0.6321 - val_accuracy: 0.7000 - val_loss: 0.5719
Epoch 2/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7327 - loss: 0.5436 - val_accuracy: 0.7250 - val_loss: 0.5360
Epoch 3/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7650 - loss: 0.4891 - val_accuracy: 0.7083 - val_loss: 0.5252
Epoch 4/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7846 - loss: 0.4573 - val_accuracy: 0.7417 - val_loss: 0.5220
Epoch 5/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7805 - loss: 0.4518 - val_accuracy: 0.7583 - val_loss: 0.5188
Epoch 6/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7821 - loss: 0.4415 - val_accuracy: 0.7583 - val_loss: 0.5256
Epoch 7/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━

  super().__init__(**kwargs)


[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.5097 - loss: 0.7252 - val_accuracy: 0.6583 - val_loss: 0.6240
Epoch 2/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7767 - loss: 0.5286 - val_accuracy: 0.6917 - val_loss: 0.5571
Epoch 3/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7551 - loss: 0.5056 - val_accuracy: 0.7167 - val_loss: 0.5298
Epoch 4/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7816 - loss: 0.4535 - val_accuracy: 0.7250 - val_loss: 0.5436
Epoch 5/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7822 - loss: 0.4451 - val_accuracy: 0.7167 - val_loss: 0.5528
Epoch 6/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7902 - loss: 0.4333 - val_accuracy: 0.7250 - val_loss: 0.5590
Epoch 7/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━