In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


In [7]:
# Load dataset
file_path = "LengthOfStay.csv"  # Update with actual path
df = pd.read_csv(file_path)

In [8]:

# Define features and target
selected_features = [
    "rcount", "gender", "dialysisrenalendstage", "asthma", "irondef",
    "pneum", "psychologicaldisordermajor", "depress", "malnutrition", "hemo",
    "hematocrit", "sodium", "glucose", "creatinine", "bmi", "pulse", "respiration"
]
target = "lengthofstay"

In [9]:
# Drop rows with missing target values
df = df.dropna(subset=[target])



In [14]:
# Encode categorical variable (gender)
if df["gender"].dtype == 'O':
    le = LabelEncoder()
    df["gender"] = le.fit_transform(df["gender"])

In [16]:
# Convert features to numeric and fill missing values
df[selected_features] = df[selected_features].apply(pd.to_numeric, errors='coerce')
df[selected_features] = df[selected_features].fillna(df[selected_features].median())


In [18]:
# Function to categorize length of stay
def categorize_los(days):
    if days <= 3:
        return "Short Stay"
    elif days <= 7:
        return "Medium Stay"
    else:
        return "Long Stay"

In [20]:
# Apply categorization
y = df[target].apply(categorize_los)
X = df[selected_features]

In [22]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [24]:
# Train AdaBoost model
ada_model = AdaBoostClassifier(n_estimators=100, random_state=42)
ada_model.fit(X_train, y_train)




In [25]:
# Predictions and evaluation
y_pred = ada_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred) * 100
precision = precision_score(y_test, y_pred, average='weighted') * 100
recall = recall_score(y_test, y_pred, average='weighted') * 100
f1 = f1_score(y_test, y_pred, average='weighted') * 100

In [26]:
# Print evaluation results in percentage
print(f"Accuracy: {accuracy:.2f}%")
print(f"Precision: {precision:.2f}%")
print(f"Recall: {recall:.2f}%")
print(f"F1-score: {f1:.2f}%")


Accuracy: 81.64%
Precision: 76.53%
Recall: 81.64%
F1-score: 78.10%


In [30]:
import joblib

# Save the trained model as a pickle file
model_filename = "adaboost.pkl"
joblib.dump(ada_model, model_filename)

print(f"Model saved as {model_filename}")

Model saved as adaboost.pkl
