In [15]:
# !pip install streamlit
# !pip install pyngrok
# !pip install xgboost

In [28]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error
from xgboost import XGBRegressor
import pickle
import numpy as np

# Load your dataset
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/ada.csv')

# Assuming 'Procedure Codes' contains comma-separated values, count them
df['Procedure Codes Count'] = df['Procedure Codes'].str.split(',').str.len()
df.drop('Procedure Codes', axis=1, inplace=True)  # Drop the original 'Procedure Codes' column

# Define independent variables (features) and dependent variable (target)
X = df.drop('Length of Stay', axis=1)
y = df['Length of Stay']

# Categorical columns for one-hot encoding
categorical_features = ['Gender', 'Admission Type', 'Primary Diagnosis',
                        'Severity of Illness', 'Comorbidities', 'Ward/Department', 'Discharge Disposition']

# Numerical columns now include 'Procedure Codes Count'
numerical_features = ['Age', 'Procedure Codes Count']

# Create a Column Transformer to apply transformations to the appropriate columns
preprocessor = ColumnTransformer(
    transformers=[
        ('num', 'passthrough', numerical_features),  # No scaling needed for tree-based models
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])

# Create a pipeline that first transforms the data and then fits the model
model_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', XGBRegressor(random_state=42))
])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the hyperparameter grid for XGBoost
param_grid = {
    'regressor__n_estimators': [100, 200],
    'regressor__max_depth': [3, 5, 7],
    'regressor__learning_rate': [0.01, 0.1, 0.2],
    'regressor__subsample': [0.8, 1.0],
    'regressor__colsample_bytree': [0.8, 1.0]
}

# Perform grid search with cross-validation
grid_search = GridSearchCV(model_pipeline, param_grid, cv=5, n_jobs=-1, scoring='neg_mean_squared_error', verbose=2)

# Train the model with grid search
grid_search.fit(X_train, y_train)

# Save the best model
with open('xgb_model_pipeline.pkl', 'wb') as file:
    pickle.dump(grid_search.best_estimator_, file)

# Evaluate the model on the test set
y_pred = grid_search.best_estimator_.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)

print("XGBoost model trained and saved successfully!")
print(f"Test Set Mean Squared Error: {mse}")
print(f"Test Set Root Mean Squared Error: {rmse}")
print(f"Best Hyperparameters: {grid_search.best_params_}")


Fitting 5 folds for each of 72 candidates, totalling 360 fits
XGBoost model trained and saved successfully!
Test Set Mean Squared Error: 11.860842181629302
Test Set Root Mean Squared Error: 3.4439573431779467
Best Hyperparameters: {'regressor__colsample_bytree': 1.0, 'regressor__learning_rate': 0.01, 'regressor__max_depth': 5, 'regressor__n_estimators': 200, 'regressor__subsample': 0.8}


In [13]:
# from sklearn.model_selection import GridSearchCV

# # Define the hyperparameters to tune
# param_grid = {
#     'regressor__n_estimators': [100, 200, 300],
#     'regressor__max_depth': [10, 20, 30, None],
#     'regressor__min_samples_split': [2, 5, 10],
#     'regressor__min_samples_leaf': [1, 2, 4],
#     'regressor__max_features': ['auto', 'sqrt', 'log2']
# }

# # Create a grid search over your pipeline
# grid_search = GridSearchCV(model_pipeline, param_grid, cv=5, scoring='neg_root_mean_squared_error')

# # Fit the grid search
# grid_search.fit(X_train, y_train)

# # Output the best hyperparameters and RMSE
# best_model = grid_search.best_estimator_
# print(f"Best hyperparameters: {grid_search.best_params_}")
# print(f"Best RMSE: {-grid_search.best_score_}")

# # Use the best model to make predictions on the test set
# y_pred = best_model.predict(X_test)
# rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# print(f"Test Set Root Mean Squared Error with Best Model: {rmse}")

In [14]:
# import pickle
# from sklearn.model_selection import GridSearchCV

# # Assume that grid_search has already been performed
# grid_search = GridSearchCV(model_pipeline, param_grid, cv=5, scoring='neg_root_mean_squared_error')

# # Fit the grid search
# grid_search.fit(X_train, y_train)

# # Retrieve the best model (which is a pipeline)
# best_model_pipeline = grid_search.best_estimator_

# # Save the best model pipeline to a file
# with open('model_pipeline_tuned.pkl', 'wb') as f:
#     pickle.dump(best_model_pipeline, f)

# print("Best model pipeline saved successfully!")


In [44]:
%%writefile new.py
import streamlit as st
import pandas as pd
import numpy as np
import pickle

# Load the trained model
model_pipeline = pickle.load(open('/content/xgb_model_pipeline.pkl', 'rb'))

def predict_length_of_stay():
    # Custom CSS for styling
    st.markdown("""
        <style>
            .description {
                font-size: 18px;
                font-family: 'Times New Roman', Times, serif;
                font-weight: bold;
                color: #ffffff; /* Bright white text color */
                text-align: center;
                margin-bottom: 20px;
                padding: 10px;
                background: linear-gradient(135deg, #6a82fb, #fc5c7d); /* Modern violet and pink gradient */
                border-radius: 10px;
                box-shadow: 0 8px 16px rgba(0, 0, 0, 0.15); /* Enhanced shadow for better depth */
                border: 1px solid #ffffff; /* White border for a clean finish */
            }
            .day-box {
                background: linear-gradient(45deg, #f3ec78, #af4261);
                color: white;
                padding: 15px;
                margin-bottom: 10px;
                border-radius: 10px;
                transition: transform 0.3s ease;
                cursor: pointer;
            }
            .day-box:hover {
                transform: translateY(-5px);
                background: linear-gradient(45deg, #af4261, #f3ec78);
            }
            .day-title {
                font-weight: bold;
                font-size: 20px;
            }
        </style>
    """, unsafe_allow_html=True)

    # Project explanation at the top
    st.markdown("""
    <div class="description">
        This project predicts a patient's hospital length of stay based on their medical details,
        including diagnosis, severity, and procedures. The system uses machine learning to assist
        healthcare providers in planning care and resource management effectively.
    </div>
    """, unsafe_allow_html=True)

    st.title('🏥 Hospital Length of Stay Prediction')
    st.markdown("""
        ### Please fill in the patient details below to predict the hospital stay duration.
    """)

    # Streamlit columns for better layout
    col1, col2 = st.columns(2)

    with col1:
        age = st.slider("🧑‍🦳 Age", min_value=0, max_value=120, value=30, step=1, help="Select the patient's age")
        gender = st.selectbox("⚧ Gender", ['Male', 'Female'], help="Select the patient's gender")
        admission_type = st.selectbox("🏥 Admission Type", ['Emergency', 'Elective', 'Urgent'], help="Select the type of admission")

    with col2:
        primary_diagnosis = st.selectbox("🩺 Primary Diagnosis", ['Diabetes', 'Hypertension', 'Asthma', 'Heart Disease'], help="Select the patient's primary diagnosis")
        severity_of_illness = st.selectbox("🩸 Severity of Illness", ['Mild', 'Moderate', 'Severe'], help="Select the severity of the patient's illness")
        comorbidities = st.text_input("🦠 Comorbidities (comma-separated)", placeholder="e.g., Arthritis, None", help="Enter any comorbidities separated by commas")

    st.markdown("---")

    # Second row for additional details
    col3, col4 = st.columns(2)

    with col3:
        procedure_codes = st.text_input("💉 Procedure Codes (comma-separated)", placeholder="e.g., 4019, 2500", help="Enter the codes for any procedures")
        ward_department = st.selectbox("🏢 Ward/Department", ['Cardiology', 'Orthopedics', 'Respiratory'], help="Select the department or ward")

    with col4:
        discharge_disposition = st.selectbox("🚶 Discharge Disposition", ['Home', 'Transferred to another facility'], help="Select where the patient is discharged to")

    # Handle missing or empty inputs
    if not comorbidities:
        comorbidities = 'None'
    procedure_codes_count = len(procedure_codes.split(',')) if procedure_codes else 0

    # Interactive prediction button
    st.markdown("### When ready, click the button to predict the length of stay")
    predict_button = st.button('🔍 Predict Length of Stay')

    if predict_button:
        data = {
            'Age': [age],
            'Gender': [gender],
            'Admission Type': [admission_type],
            'Primary Diagnosis': [primary_diagnosis],
            'Severity of Illness': [severity_of_illness],
            'Comorbidities': [comorbidities],
            'Procedure Codes Count': [procedure_codes_count],
            'Ward/Department': [ward_department],
            'Discharge Disposition': [discharge_disposition]
        }

        input_df = pd.DataFrame(data)
        predicted_length_of_stay = model_pipeline.predict(input_df)
        predicted_length_of_stay = np.round(predicted_length_of_stay[0]).astype(int)

        # Display the predicted length of stay with style
        st.success(f"🚑 The predicted length of stay is: **{predicted_length_of_stay} days**")
        st.balloons()

        # Explain what might happen on each day
        st.markdown("### Breakdown of Each Day's Procedures")
        st.info("Here's an overview of what happens during the patient's hospital stay:")

        for day in range(1, predicted_length_of_stay + 1):
            st.markdown(f"""
            <div class="day-box">
                <p class="day-title">Day {day}:</p>
                <p>{get_day_details(day, predicted_length_of_stay)}</p>
            </div>
            """, unsafe_allow_html=True)

def get_day_details(day, predicted_length_of_stay):
    if day == 1:
        return "Initial assessments, diagnostic tests, and beginning of treatment. The patient is monitored closely for any immediate responses."
    elif day == 2:
        return "Continued monitoring of vital signs and response to treatment. Adjustments to the treatment plan may be made based on the patient's progress."
    elif day == 3:
        return "Procedures or interventions, such as surgeries or specific treatments (based on the diagnosis), may take place. Continued recovery and monitoring."
    elif day <= predicted_length_of_stay - 1:
        return "Ongoing treatment, physiotherapy, or rehabilitation based on the condition. The patient is encouraged to begin mobility exercises if applicable."
    else:
        return "Discharge planning begins. Final assessments and preparation for going home or transfer to another facility are completed."

if __name__ == '__main__':
    predict_length_of_stay()


Overwriting new.py


In [5]:
#killimng the previous run
!pkill -f streamlit

In [6]:
!choco install ngrok

/bin/bash: line 1: choco: command not found


In [7]:
# authentocation
!ngrok config add-authtoken "2kqawfT6uoasLTlQyAR27wE5qYd_7vNpb4kyfFKm8QsNUq9EW"

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:
from pyngrok import ngrok
import subprocess

# Function to start Streamlit
def start_streamlit():
    subprocess.run(["streamlit", "run", "new.py"], check=True)

# Function to start ngrok tunnel
def start_ngrok(port):
    ngrok_tunnel = ngrok.connect(port)
    print(f"Streamlit app is live at: {ngrok_tunnel.public_url}")

# Start the ngrok tunnel for port 8501 (default for Streamlit)
port = 8501
start_ngrok(port)

# Run the Streamlit app
start_streamlit()


Streamlit app is live at: https://3233-104-154-250-209.ngrok-free.app
