In [16]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd
from sklearn.model_selection import train_test_split

In [17]:
# Load datasets
train_dir = "train.csv"
test_dir = "test.csv"

# Read the data
train_data = pd.read_csv(train_dir)
test_data = pd.read_csv(test_dir)

In [18]:
train_data.drop(columns=[col for col in ['case_id', 'patientid'] if col in train_data.columns], inplace=True)

In [19]:
stay_mapping = {
    '0-10': 1, '11-20': 2, '21-30': 3, '31-40': 4, '41-50': 5,
    '51-60': 6, '61-70': 7, '71-80': 8, '81-90': 9, '91-100': 10,
    '100 Days': 10, 'More than 100 Days': 11
}
train_data['Stay'] = train_data['Stay'].map(stay_mapping)

In [None]:
# Fill bed grade and city code nulls with most common value (mode) for that feature
if 'Bed Grade' in train_data.columns and train_data['Bed Grade'].isnull().sum() > 0:
    train_data['Bed Grade'].fillna(train_data['Bed Grade'].mode()[0], inplace=True)
if 'City_Code_Patient' in train_data.columns and train_data['City_Code_Patient'].isnull().sum() > 0:
    train_data['City_Code_Patient'].fillna(train_data['City_Code_Patient'].mode()[0], inplace=True)

# Ensure Bed Grade and City Code are integers
if 'Bed Grade' in train_data.columns:
    train_data['Bed Grade'] = train_data['Bed Grade'].astype(int)
if 'City_Code_Patient' in train_data.columns:
    train_data['City_Code_Patient'] = train_data['City_Code_Patient'].astype(int)

In [20]:
#split train into two data sets, so that Stay is isolated
X = train_data.drop(columns=['Stay'])  # Creates a dataset without the 'Stay' column
y = train_data['Stay']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


# Define the number of input features and output classes
num_features = X_train.shape[1]  # Assuming X_train is already preprocessed
num_classes = y_train.nunique()  # Number of unique length-of-stay categories

In [22]:
# Build the model
model = keras.Sequential([
    layers.Dense(128, activation='relu', input_shape=(num_features,)),
    layers.BatchNormalization(),
    layers.Dropout(0.3),

    layers.Dense(64, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.3),

    layers.Dense(32, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.2),

    layers.Dense(num_classes, activation='softmax')  # Softmax for multi-class classification
])

# Compile the model
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='sparse_categorical_crossentropy',  # Use 'categorical_crossentropy' if one-hot encoded
    metrics=['accuracy']
)

# Train the model
history = model.fit(
    X_train, y_train, 
    validation_data=(X_train, y_train), 
    epochs=50, 
    batch_size=32, 
    class_weight=None,  # Adjust if imbalance exists
    verbose=1
)

# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc:.4f}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


ValueError: could not convert string to float: 'b'