In [41]:
from pathlib import Path
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from joblib import dump
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout


y = (df["readmitted"] == "<30").astype(int)  


feat_cols = [
    "time_in_hospital", "num_medications",
    "number_inpatient", "number_emergency",
    "A1Cresult", "insulin", "change", "diabetesMed"
]

X = df[feat_cols].copy()


cat = ["A1Cresult", "insulin", "change", "diabetesMed"]
X = pd.get_dummies(X, columns=cat, drop_first=True)




df = pd.read_csv("../data/diabetic_data.csv")  
print(" File loaded successfully!")
print(df.shape)
df.head()



CSV_PATH = Path("../data/diabetic_data.csv")  
MODEL_PATH = Path("tf_model.h5")
PREPROC_PATH = Path("tf_preprocessor.joblib")


CAT_COLS = ["race", "gender", "age", "A1Cresult", "insulin", "change", "diabetesMed"]
NUM_COLS = ["time_in_hospital", "num_lab_procedures", "num_procedures", "num_medications", "number_outpatient", "number_emergency", "number_inpatient"]
def load_data(path: Path) -> pd.DataFrame:
    df = pd.read_csv(path, na_values=["?", "NA", "NaN"])
    df = df[df["readmitted"].isin(["<30", ">30", "NO"])].copy()
    df["target"] = (df["readmitted"] != "NO").astype(int)
    return df[CAT_COLS + NUM_COLS + ["target"]]

def build_preprocessor() -> ColumnTransformer:
    return ColumnTransformer(
        transformers=[
          ("cat", OneHotEncoder(handle_unknown="ignore"), CAT_COLS),
            ("num", Pipeline([("scale", StandardScaler())]), NUM_COLS),  
        ]
    )
def build_model(input_dim: int) -> keras.Model:
    model = keras.Sequential([
        layers.Input(shape=(input_dim,)),
        layers.Dense(64, activation="relu"),
        layers.Dense(32, activation="relu"),
        layers.Dense(16, activation="relu"),
        layers.Dense(1, activation="sigmoid")
    ])

    
    
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    
    
    return model

def main():
    df = load_data(CSV_PATH)
    X, y = df.drop(columns=["target"]), df["target"]

    pre =build_preprocessor()
    Xp = pre.fit_transform(X)
    dump(pre, PREPROC_PATH)

scaler = StandardScaler()
Xp = scaler.fit_transform(X)

Xtr, Xte, ytr, yte = train_test_split(Xp, y, test_size=0.2, random_state=42, stratify=y)


model = build_model(Xtr.shape[1])


model.fit(Xtr, ytr, validation_data=(Xte, yte), epochs=10, batch_size=32, verbose=1)


model.save(MODEL_PATH)

print(f" Saved model: {MODEL_PATH.resolve()}")
print(f" Saved preprocessor: {PREPROC_PATH.resolve()}")

if __name__ == "__main__":
    main()








 File loaded successfully!
(101766, 50)
Epoch 1/10
[1m2545/2545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9095 - loss: 0.2618 - val_accuracy: 0.9133 - val_loss: 0.2430
Epoch 2/10
[1m2545/2545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.9143 - loss: 0.2453 - val_accuracy: 0.9148 - val_loss: 0.2396
Epoch 3/10
[1m2545/2545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9142 - loss: 0.2432 - val_accuracy: 0.9142 - val_loss: 0.2381
Epoch 4/10
[1m2545/2545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.9150 - loss: 0.2422 - val_accuracy: 0.9135 - val_loss: 0.2413
Epoch 5/10
[1m2545/2545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9150 - loss: 0.2414 - val_accuracy: 0.9154 - val_loss: 0.2383
Epoch 6/10
[1m2545/2545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.9152 - loss: 0.2409 - val_accuracy: 0.



 Saved model: /workspaces/Hospital-Diabetic-readmission/src/tf_model.h5
 Saved preprocessor: /workspaces/Hospital-Diabetic-readmission/src/tf_preprocessor.joblib


  df = pd.read_csv(path, na_values=["?", "NA", "NaN"])


In [42]:
!pwd
!ls -lah


/workspaces/Hospital-Diabetic-readmission/src
total 108K
drwxrwxrwx+ 2 codespace codespace 4.0K Oct 14 04:14 .
drwxrwxrwx+ 5 codespace root      4.0K Oct 14 05:48 ..
-rw-rw-rw-  1 codespace codespace 8.6K Oct 14 06:27 mini_health.ipynb
-rw-rw-rw-  1 codespace codespace  77K Oct 14 06:19 tf_model.h5
-rw-rw-rw-  1 codespace codespace 4.6K Oct 14 06:19 tf_preprocessor.joblib
