<a href="https://colab.research.google.com/github/SeminiNethra/Healthcare-Cost-Management/blob/main/Healthcare_Cost_Prediction_Using_Flask.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt

# Load the data
df = pd.read_csv('/content/Final Year Project Dataset - Sheet1 (1).csv')

# Target column cleanup
target_col = 'What is the average halthcare bill amount?'

# Convert healthcare bill amount to numeric values
def bill_to_num(x):
    if pd.isna(x) or x == 'Nothing':
        return np.nan
    x = str(x).replace(',', '').replace(' ', '').lower()
    if 'under10000' in x:
        return 5000
    elif '10000-50000' in x:
        return 30000
    elif '50000-100000' in x:
        return 75000
    elif 'morethan100000' in x:
        return 150000
    try:
        return float(x)
    except:
        return np.nan

df[target_col] = df[target_col].apply(bill_to_num)
df = df.dropna(subset=[target_col])

# Handle categorical variables
cat_cols = [
    'Sex', 'City You Live In', 'Monthly Income Level',
    'Do You Have Any Chronic Disease', 'Do you have any allergies?',
    'Do you consume alcoholic beverages?', 'Do you smoke or use tobacco products?',
    'what type of hospital do you typically spend on medication per month?'
]

# Make sure all categorical columns exist
cat_cols = [col for col in cat_cols if col in df.columns]

# Replace 'Nothing' with NaN
df = df.replace('Nothing', np.nan)

# Fill missing values in categorical columns
for col in cat_cols:
    df[col] = df[col].fillna('Missing')

# One-hot encode categorical variables
df = pd.get_dummies(df, columns=cat_cols, drop_first=True)

# Drop unnecessary columns
drop_cols = [
    'Name', 'If Yes, please specify',
    'Have you undergone any prior surgeries or procedures?',
    'If yes, please specify',
    'Do you have any other medical history that we should be aware of?'
]

for col in drop_cols:
    if col in df.columns:
        df = df.drop(col, axis=1)

# Convert remaining columns to numeric
for col in df.columns:
    if df[col].dtype == object:
        df[col] = pd.to_numeric(df[col], errors='coerce')

# Fill any remaining NaN values with 0
df = df.fillna(0)

# Split features and target
X = df.drop([target_col], axis=1)
y = df[target_col]

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Save the list of feature columns
joblib.dump(list(X_train.columns), 'train_features.pkl')
print("Saved feature list to train_features.pkl")

# Train the Random Forest model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f"\nModel Evaluation:")
print(f"Mean Absolute Error: {mae:.2f}")
print(f"Root Mean Squared Error: {rmse:.2f}")
print(f"R² Score: {r2:.2f}")

# Save the trained model
joblib.dump(model, 'model_random_forest.pkl')
print("Saved model to model_random_forest.pkl")

Saved feature list to train_features.pkl

Model Evaluation:
Mean Absolute Error: 5581.25
Root Mean Squared Error: 14891.73
R² Score: 0.46
Saved model to model_random_forest.pkl


In [2]:
%%writefile app.py
from flask import Flask, render_template, request
import joblib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import io
import base64
import os
from werkzeug.middleware.proxy_fix import ProxyFix

# Initialize Flask app
app = Flask(__name__)

# Production configuration
app.config['ENV'] = 'production'
app.config['DEBUG'] = False
app.config['TESTING'] = False

# Define paths for model and features
MODEL_PATH = 'model_random_forest.pkl'
FEATURES_PATH = 'train_features.pkl'

# Load model and features
model = None
train_features = None

try:
    if os.path.exists(MODEL_PATH) and os.path.exists(FEATURES_PATH):
        model = joblib.load(MODEL_PATH)
        train_features = joblib.load(FEATURES_PATH)
        print("✅ Model and features loaded successfully")
    else:
        print(f"❌ Model or features file not found. Ensure '{MODEL_PATH}' and '{FEATURES_PATH}' exist.")
except Exception as e:
    print(f"❌ Error loading model or features: {e}")

# Dropdown options
DROPDOWN_OPTIONS = {
    'gender': ['Male', 'Female'],
    'city': ['Colombo', 'Negombo', 'Marawila', 'Homagama', 'Kochchikade',
             'Rathnapura', 'Piliyandala', 'Galle', 'Anuradhapura', 'Wennappuwa',
             'Kegalle', 'Kottawa', 'Mount Lavinia', 'Biyagama', 'Moratuwa', 'Other'],
    'income': ['Less than 25,000', '25,000-50,000', '50,000-100,000', 'More than 100,000'],
    'chronic': ['Yes', 'No'],
    'allergies': ['Yes', 'No'],
    'alcohol': ['Yes', 'No'],
    'tobacco': ['Yes', 'No'],
    'hospital': ['Government', 'Private', 'Both']
}

def prepare_features(form_data):
    """Prepare input features for prediction"""
    if train_features is None:
        raise Exception("Training features not loaded. Cannot prepare input data.")

    input_dict = {feature: 0 for feature in train_features}

    # Basic demographics
    if "Age" in train_features:
        input_dict["Age"] = float(form_data.get('age', 0))

    # One-hot encoded features
    field_to_prefix = {
        'gender': 'Sex',
        'city': 'City You Live In',
        'income': 'Monthly Income Level',
        'chronic': 'Do You Have Any Chronic Disease',
        'allergies': 'Do you have any allergies?',
        'alcohol': 'Do you consume alcoholic beverages?',
        'tobacco': 'Do you smoke or use tobacco products?',
        'hospital': 'what type of hospital do you typically spend on medication per month?'
    }

    for field, prefix in field_to_prefix.items():
        value = form_data.get(field)
        if value is not None:
             if value == 'Missing':
                 col_name = f"{prefix}_Missing"
             else:
                 col_name = f"{prefix}_{value}"

             if col_name in input_dict:
                 input_dict[col_name] = 1

    if "BMI" in train_features:
        input_dict["BMI"] = float(form_data.get('bmi', 0))

    return pd.DataFrame([input_dict])[train_features]

def generate_feature_importance():
    """Generate feature importance plot as base64 image"""
    try:
        if model is None or train_features is None:
             print("Cannot generate feature importance: Model or features not loaded.")
             return None

        importances = model.feature_importances_
        features = train_features

        sorted_idx = importances.argsort()[::-1][:15]
        sorted_idx = sorted_idx[sorted_idx < len(importances)]
        sorted_features = [features[i] for i in sorted_idx]
        sorted_importances = importances[sorted_idx]

        plt.figure(figsize=(10, 6))
        plt.barh(sorted_features[::-1], sorted_importances[::-1])
        plt.xlabel('Importance Score')
        plt.title('Top 15 Most Important Features')
        plt.tight_layout()

        buf = io.BytesIO()
        plt.savefig(buf, format='png', bbox_inches='tight', dpi=300)
        plt.close()
        return base64.b64encode(buf.getvalue()).decode('utf-8')
    except Exception as e:
        print(f"Error generating feature importance: {e}")
        return None

@app.route('/', methods=['GET', 'POST'])
def home():
    prediction = None
    error = None
    plot_url = None

    if request.method == 'GET':
        plot_url = generate_feature_importance()

    if request.method == 'POST':
        try:
            if model is None or train_features is None:
                raise Exception("Model or training features not loaded. Cannot make prediction.")

            input_df = prepare_features(request.form)
            prediction_value = model.predict(input_df)[0]

            if prediction_value < 10000:
                prediction_text = "Under 10,000"
            elif prediction_value < 50000:
                prediction_text = "10,000 - 50,000"
            elif prediction_value < 100000:
                prediction_text = "50,000 - 100,000"
            else:
                prediction_text = "More than 100,000"

            prediction = f"{prediction_text} (LKR {prediction_value:,.2f})"
            plot_url = generate_feature_importance()

        except Exception as e:
            error = f"Prediction Error: {str(e)}"
            print(error)

    return render_template('index.html',
                         prediction=prediction,
                         error=error,
                         plot_url=plot_url,
                         options=DROPDOWN_OPTIONS)

def create_templates():
    """Create default templates if they don't exist"""
    os.makedirs("templates", exist_ok=True)

    template_path = "templates/index.html"
    template_needs_update = False
    if os.path.exists(template_path):
        with open(template_path, "r") as f:
            content = f.read()
            if '<img src="<redacted>">' in content:
                 template_needs_update = True
    else:
        template_needs_update = True

    if template_needs_update:
        with open(template_path, "w") as f:
            f.write("""<!DOCTYPE html>
<html>
<head>
    <title>Healthcare Cost Prediction</title>
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <style>
        body {
            font-family: Arial, sans-serif;
            max-width: 800px;
            margin: 0 auto;
            padding: 20px;
            background-color: #121212;
            color: white;
        }
        .form-container {
            background-color: #1e1e1e;
            padding: 20px;
            border-radius: 8px;
            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
        }
        .form-group {
            margin-bottom: 15px;
        }
        label {
            display: block;
            margin-bottom: 5px;
            font-weight: bold;
        }
        input, select {
            padding: 8px;
            width: 100%;
            max-width: 400px;
            background-color: #2e2e2e;
            color: white;
            border: 1px solid #444;
            border-radius: 4px;
        }
        button {
            padding: 10px 20px;
            background-color: #4CAF50;
            color: white;
            border: none;
            border-radius: 4px;
            cursor: pointer;
            font-weight: bold;
        }
        button:hover {
            background-color: #45a049;
        }
        .prediction {
            color: #4CAF50;
            font-size: 1.2em;
            margin: 20px 0;
            padding: 15px;
            background-color: #1e1e1e;
            border-radius: 4px;
        }
        .error {
            color: #F44336;
            padding: 15px;
            background-color: #1e1e1e;
            border-radius: 4px;
        }
        .feature-importance {
            margin-top: 30px;
            padding-top: 20px;
            border-top: 1px solid #444;
        }
        @media (max-width: 600px) {
            body {
                padding: 10px;
            }
            .form-container {
                padding: 15px;
            }
        }
    </style>
</head>
<body>
    <div class="form-container">
        <h1>Healthcare Cost Prediction</h1>
        <form method="POST">
            <div class="form-group">
                <label for="age">Age:</label>
                <input type="number" id="age" name="age" min="18" max="100" value="30" required>
            </div>

            <div class="form-group">
                <label for="gender">Sex:</label>
                <select id="gender" name="gender" required>
                    {% for option in options.gender %}
                    <option value="{{ option }}">{{ option }}</option>
                    {% endfor %}
                </select>
            </div>

            <div class="form-group">
                <label for="bmi">BMI:</label>
                <input type="number" id="bmi" name="bmi" step="0.01" value="25.0" required>
            </div>

            <div class="form-group">
                <label for="city">City You Live In:</label>
                <select id="city" name="city" required>
                    {% for option in options.city %}
                    <option value="{{ option }}">{{ option }}</option>
                    {% endfor %}
                </select>
            </div>

            <div class="form-group">
                <label for="income">Monthly Income Level:</label>
                <select id="income" name="income" required>
                    {% for option in options.income %}
                    <option value="{{ option }}">{{ option }}</option>
                    {% endfor %}
                </select>
            </div>

            <div class="form-group">
                <label for="chronic">Do You Have Any Chronic Disease:</label>
                <select id="chronic" name="chronic" required>
                    {% for option in options.chronic %}
                    <option value="{{ option }}">{{ option }}</option>
                    {% endfor %}
                </select>
            </div>

             <div class="form-group">
                <label for="allergies">Do you have any allergies?:</label>
                <select id="allergies" name="allergies" required>
                    {% for option in options.allergies %}
                    <option value="{{ option }}">{{ option }}</option>
                    {% endfor %}
                </select>
            </div>

             <div class="form-group">
                <label for="alcohol">Do you consume alcoholic beverages?:</label>
                <select id="alcohol" name="alcohol" required>
                    {% for option in options.alcohol %}
                    <option value="{{ option }}">{{ option }}</option>
                    {% endfor %}
                </select>
            </div>

            <div class="form-group">
                <label for="tobacco">Do you smoke or use tobacco products?:</label>
                <select id="tobacco" name="tobacco" required>
                    {% for option in options.tobacco %}
                    <option value="{{ option }}">{{ option }}</option>
                    {% endfor %}
                </select>
            </div>

            <div class="form-group">
                <label for="hospital">What type of hospital do you typically spend on medication per month?:</label>
                <select id="hospital" name="hospital" required>
                    {% for option in options.hospital %}
                    <option value="{{ option }}">{{ option }}</option>
                    {% endfor %}
                </select>
            </div>

            <button type="submit">Predict Healthcare Cost</button>
        </form>

        {% if prediction %}
        <div class="prediction">
            <h3>Predicted Healthcare Cost: {{ prediction }}</h3>
        </div>
        {% endif %}

        {% if error %}
        <div class="error">
            <p>{{ error }}</p>
        </div>
        {% endif %}

        {% if plot_url %}
        <div class="feature-importance">
            <h2>Model Explainability</h2>
            <p>Most important features influencing the prediction:</p>
            <img src="data:image/png;base64,{{ plot_url }}" alt="Feature Importance" style="max-width: 100%;">
        </div>
        {% endif %}
    </div>
</body>
</html>""")

def create_production_server():
    """Configure production settings"""
    app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1, x_prefix=1)
    return app

if __name__ == '__main__':
    create_templates()
    app.run()
else:
    app = create_production_server()

Writing app.py


In [3]:
!pip install flask flask-ngrok pyngrok


Collecting flask-ngrok
  Downloading flask_ngrok-0.0.25-py3-none-any.whl.metadata (1.8 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.8-py3-none-any.whl.metadata (10 kB)
Downloading flask_ngrok-0.0.25-py3-none-any.whl (3.1 kB)
Downloading pyngrok-7.2.8-py3-none-any.whl (25 kB)
Installing collected packages: pyngrok, flask-ngrok
Successfully installed flask-ngrok-0.0.25 pyngrok-7.2.8


In [None]:
from flask import Flask, render_template, request
import joblib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import io
import base64
import os
from pyngrok import ngrok, conf
from pyngrok.exception import PyngrokNgrokError # Import the specific exception
from werkzeug.middleware.proxy_fix import ProxyFix
import time # Import time module

# Initialize Flask app
app = Flask(__name__)

# Load model and features
MODEL_PATH = 'model_random_forest.pkl'
FEATURES_PATH = 'train_features.pkl'
model = None
train_features = None

try:
    if os.path.exists(MODEL_PATH) and os.path.exists(FEATURES_PATH):
        model = joblib.load(MODEL_PATH)
        train_features = joblib.load(FEATURES_PATH)
        print("✅ Model and features loaded successfully")
    else:
        print(f"❌ Model or features file not found. Ensure '{MODEL_PATH}' and '{FEATURES_PATH}' exist.")
except Exception as e:
    print(f"❌ Error loading model or features: {e}")

# Set up ngrok
NGROK_AUTH_TOKEN = "2wzI3R7bGtkAA2ff4vmGlAIkniJ_62B87dHeFsYi6DdeHLrjM"  # Replace with your token
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

# Explicitly disconnect any active tunnels and kill ngrok process
try:
    print("Attempting to disconnect existing ngrok tunnels...")
    # Get the current ngrok process if it exists
    ngrok_process = ngrok.get_ngrok_process()
    if ngrok_process:
        tunnels = ngrok.get_tunnels()
        for tunnel in tunnels:
            print(f"Disconnecting tunnel: {tunnel.public_url}")
            ngrok.disconnect(tunnel.public_url)
        print("Attempting to kill ngrok process...")
        ngrok.kill()
        time.sleep(2) # Add a small delay to ensure ngrok process is killed
    print("No active ngrok process found or successfully killed.")
except Exception as e:
    print(f"An error occurred while trying to kill ngrok: {e}")
    # Even if killing fails, try to connect. ngrok.connect might handle it.

# Open a ngrok tunnel
try:
    public_url = ngrok.connect(addr=5000, proto="http")
    print(f" * ngrok tunnel \"{public_url}\" -> \"http://127.0.0.1:5000\"")
# Catch the specific PyngrokNgrokError
except PyngrokNgrokError as e:
    print(f"❌ Failed to start ngrok tunnel: {e}")
    print("Please check your ngrok dashboard (https://dashboard.ngrok.com/agents) to ensure no other sessions are active.")

app.run(port=5000, use_reloader=False)

✅ Model and features loaded successfully
Attempting to disconnect existing ngrok tunnels...
Attempting to kill ngrok process...
No active ngrok process found or successfully killed.
 * ngrok tunnel "NgrokTunnel: "https://f7ab-34-169-40-204.ngrok-free.app" -> "http://localhost:5000"" -> "http://127.0.0.1:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
