#Data loading and Preparation

##Data Loading

In [1]:
#Importing libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats

In [2]:
#Loading data
path = '/content/prepared_4A_multi-task.xlsx'
data =pd.read_excel(path)
data.head(2)

FileNotFoundError: [Errno 2] No such file or directory: '/content/prepared_4A_multi-task.xlsx'

#**Data Preprocessing**

In [None]:
col_to_remove = ['DURATION', 'OUT COME', 'IP NUMBER']
data1 = data.drop(columns=col_to_remove)

In [None]:
#Encoding categorical features
non_features = ['NUMBER OF ADDITIONAL WORKING DIAGNOSIS','AGE','NUMBER OF PREVIOUS ADMISSIONS',
                'Survival_Week 1',	'Survival_Week 2',	'Survival_Week 3',	'Survival_Week 4']
data2 = data1.drop(columns=non_features)

text_columns = data2.select_dtypes(include=['object']).columns

data_enc = pd.get_dummies(data1, columns=text_columns)

In [None]:
data_enc.sample(4)

In [None]:
data_enc.info()

##Normalization and Visualization

In [None]:
#Transforming the continuous features
transform_features = ['AGE','NUMBER OF PREVIOUS ADMISSIONS', 'NUMBER OF ADDITIONAL WORKING DIAGNOSIS']
for col in transform_features:
  data_enc[col] = np.log1p(data_enc[col])

#Second transformation
data_enc['NUMBER OF PREVIOUS ADMISSIONS'] = np.log1p(data_enc['NUMBER OF PREVIOUS ADMISSIONS'])


In [None]:
#Normalizing and Standardizing continuous features
normalise_features = ['AGE','NUMBER OF PREVIOUS ADMISSIONS','NUMBER OF ADDITIONAL WORKING DIAGNOSIS']
scaler = StandardScaler()
data_enc[normalise_features] = scaler.fit_transform(data_enc[normalise_features])


In [None]:
#Plotting Density plot
import seaborn as sns

continuous_features = ['AGE','NUMBER OF PREVIOUS ADMISSIONS','NUMBER OF ADDITIONAL WORKING DIAGNOSIS']

fig, axes = plt.subplots(1, len(continuous_features), figsize=(15, 5))

for i, col in enumerate(continuous_features):
  sns.kdeplot(data_enc[col], fill=True,ax=axes[i])
  axes[i].set_title(f'Density Plot for {col}')
  axes[i].set_xlabel('Values')
  axes[i].set_ylabel('Density')
# plt.figure(figsize=(8,9))
# plt.bar(data['AGE'])
plt.tight_layout()
plt.show

In [None]:
data_enc.sample(5)

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, LSTM, Dense
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras import layers
tf.random.set_seed(42)
np.random.seed(42)



In [None]:
# Extract features (X) and target (y) data
X = data_enc.drop(columns=["Survival_Week 1", "Survival_Week 2", "Survival_Week 3", "Survival_Week 4"])
y = data_enc[["Survival_Week 1", "Survival_Week 2", "Survival_Week 3", "Survival_Week 4"]]

#**Model training**

In [None]:
from sklearn.metrics import accuracy_score
# Extract features (X) and target (y) data
X = data_enc.drop(columns=["Survival_Week 1", "Survival_Week 2", "Survival_Week 3", "Survival_Week 4"])
y = data_enc[["Survival_Week 1", "Survival_Week 2", "Survival_Week 3", "Survival_Week 4"]]

# Set the sequence length to 7 to capture a week's worth of data
sequence_length = 7
num_features = X.shape[1]

# Create lists to hold the reshaped data
X_reshaped_list = []
y_reshaped_list = []

# Iterate through the data to create sequences
for i in range(0, len(X), sequence_length):
  X_sequence = X.iloc[i:i+sequence_length]
  y_sequence = y.iloc[i:i+sequence_length]

  # Check if the sequence length is less than 7
  if len(X_sequence) < sequence_length:
      continue

  X_reshaped_list.append(X_sequence.values)
  y_reshaped_list.append(y_sequence.values)

# Combine the lists into arrays
X_reshaped = np.array(X_reshaped_list)
y_reshaped = np.array(y_reshaped_list)

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_reshaped, y_reshaped, test_size=0.2, random_state=42)


In [None]:

# Defining parameters
num_features = X_train.shape[2]
num_weeks = 4
num_epochs = 50
batch_size = 32

models = [] # a list to store the models
lambda_1 = 0.01 #regularization parameter (lambda_1)

# custom loss function for logistic regression with L2 regularization
def custom_loss(y_true, y_pred):
  y_true = tf.cast(y_true, tf.float32)
  logistic_loss = tf.reduce_mean(tf.math.log(1 + tf.exp(-y_true * y_pred)))
  l2_regularization = 0.5 * lambda_1 * tf.reduce_sum(tf.square(model.trainable_weights[0]))
  total_loss = logistic_loss + l2_regularization
  return total_loss

# neural network with an LSTM layer
model = keras.Sequential([
    layers.Input(shape=(X_train.shape[1], X_train.shape[2])),
    layers.LSTM(20, return_sequences=False),
    layers.Dense(4, activation='sigmoid', use_bias=True, kernel_regularizer=keras.regularizers.l2(lambda_1))
])
model.compile(optimizer='adam', loss=custom_loss, metrics=['accuracy'])

accuracy_scores = [] # list to store accuracy for each week

# Training a model for each week
for week in range(num_weeks):
    print(f"Training for Survival_Week {week + 1}")
    model.fit(X_train, y_train[:, week], epochs=num_epochs, batch_size=batch_size, validation_data=(X_test, y_test[:, week]))
    models.append(model)

    y_pred = model.predict(X_test)
    y_pred_binary = (y_pred > 0.5).astype(int)
    accuracy = accuracy_score(y_test[:, week], y_pred_binary)

    # Store the accuracy in the list
    accuracy_scores.append(accuracy)


In [None]:
# accuracy for each week
for week, accuracy in enumerate(accuracy_scores):
    print(f"Week {week + 1} Accuracy: {accuracy}")

#Model 2 binary crossentropy

In [None]:
#extracting features and targets
targets = ['Survival_Week 1',	'Survival_Week 2',	'Survival_Week 3',	'Survival_Week 4']
X = data_enc.drop(columns=targets).values
y = data_enc[['Survival_Week 1',	'Survival_Week 2',	'Survival_Week 3',	'Survival_Week 4']].values


#Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
import numpy as np
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# neural network model for logistic regression
def logistic_regression_model(input_dim):
    inputs = keras.Input(shape=(input_dim,))
    outputs = keras.layers.Dense(1, activation='sigmoid')(inputs)
    model = keras.Model(inputs, outputs)

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

accuracy_scores = [] # list to store accuracy for each week

# Initializing models for each week using a loop
num_weeks = 4
models = [logistic_regression_model(input_dim=X.shape[1]) for _ in range(num_weeks)]


num_epochs = 100
batch_size = 32

for week in range(num_weeks):
    model = models[week]
    model.fit(X_train, y_train[:, week], epochs=num_epochs, batch_size=batch_size, validation_data=(X_test, y_test[:, week]))

    y_pred = model.predict(X_test)
    y_pred_binary = (y_pred > 0.5).astype(int)
    accuracy = accuracy_score(y_test[:, week], y_pred_binary)
    print(f'Accuracy for Week {week}: {accuracy}')

    # Store the accuracy in the list
    accuracy_scores.append(accuracy)


In [None]:
# printing accuracy
for week, accuracy in enumerate(accuracy_scores):
  print(f"Week {week + 1} Accuracy: {accuracy}")
