In [1]:
import pickle
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load dataset
file_path = "hospital_readmissions.csv"
df = pd.read_csv(file_path)

# Encode categorical features
label_enc = LabelEncoder()
df['readmitted'] = label_enc.fit_transform(df['readmitted'])  # Yes/No -> 1/0
df['age'] = label_enc.fit_transform(df['age'])
df['medical_specialty'] = label_enc.fit_transform(df['medical_specialty'])
df['diag_1'] = label_enc.fit_transform(df['diag_1'])
df['diag_2'] = label_enc.fit_transform(df['diag_2'])
df['diag_3'] = label_enc.fit_transform(df['diag_3'])
df['glucose_test'] = label_enc.fit_transform(df['glucose_test'])
df['A1Ctest'] = label_enc.fit_transform(df['A1Ctest'])
df['change'] = label_enc.fit_transform(df['change'])
df['diabetes_med'] = label_enc.fit_transform(df['diabetes_med'])

# Define features and target
X = df.drop(columns=['readmitted'])
y = df['readmitted']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Save train-test data
with open("train_test_data.pkl", "wb") as f:
    pickle.dump((X_train, X_test, y_train, y_test), f)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

# Save the trained model
with open("readmission_model.pkl", "wb") as f:
    pickle.dump(model, f)

print("Model training complete! Saved as readmission_model.pkl")


Model Accuracy: 0.60
Model training complete! Saved as readmission_model.pkl


In [2]:
# Save the processed dataset
df.to_csv("processed_hospital_readmissions.csv", index=False)  # 🔹