In [12]:
# Install required packages
!pip install pandas numpy matplotlib seaborn plotly scikit-learn ipywidgets

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from IPython.display import display, HTML, clear_output
import ipywidgets as widgets

# Custom CSS for styling
styling = """
<style>
    .main-header {
        font-size: 3rem;
        color: #1f77b4;
        text-align: center;
        margin-bottom: 2rem;
    }
    .sub-header {
        font-size: 1.8rem;
        color: #2c3e50;
        border-bottom: 2px solid #3498db;
        padding-bottom: 0.5rem;
        margin-top: 2rem;
    }
    .metric-card {
        background-color: #f8f9fa;
        padding: 20px;
        border-radius: 10px;
        border-left: 4px solid #3498db;
        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
        margin-bottom: 1rem;
    }
    .team-card {
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        color: white;
        padding: 15px;
        border-radius: 10px;
        margin: 10px 0;
        box-shadow: 0 4px 8px rgba(0,0,0,0.2);
    }
    .success-box {
        background-color: #d4edda;
        color: #155724;
        padding: 15px;
        border-radius: 5px;
        border: 1px solid #c3e6cb;
        margin: 15px 0;
    }
    .info-box {
        background-color: #d1ecf1;
        color: #0c5460;
        padding: 15px;
        border-radius: 5px;
        border: 1px solid #bee5eb;
        margin: 15px 0;
    }
</style>
"""
display(HTML(styling))

# Data loading and processing functions
def load_and_process_data(base_path):
    # This is a placeholder - in Colab you'll need to upload your CSV files
    # or load them from Google Drive
    print("Please upload your CSV files or modify this function to load from Google Drive")
    return pd.DataFrame()

def train_model(data):
    # Prepare features and target
    X = data.drop(['person_id', 'household_id', 'district_id', 'total_visits'], axis=1)
    y = data['total_visits']

    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train the model
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    # Make predictions
    y_pred = model.predict(X_test)

    # Calculate metrics
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    return model, X_train, X_test, y_train, y_test, y_pred, mae, mse, r2

# Create navigation
navigation = widgets.ToggleButtons(
    options=['🏠 Home', '📊 Data Overview', '📈 Data Analysis', '🤖 Model Training', '🔮 Predictions', '👥 Team Details'],
    description='Navigate:',
    disabled=False,
    button_style='',
    tooltips=['Home page', 'Data overview', 'Data analysis', 'Model training', 'Predictions', 'Team details']
)

# App title with custom styling
display(HTML('<h1 class="main-header">🏥 Healthcare Service Demand Prediction Dashboard</h1>'))

# Display navigation
display(navigation)

# Function to display each page
def display_page(page):
    clear_output(wait=True)
    display(HTML(styling))
    display(HTML('<h1 class="main-header">🏥 Healthcare Service Demand Prediction Dashboard</h1>'))
    display(navigation)

    if page == '🏠 Home':
        # Create two output areas for columns
        col1_output = widgets.Output()
        col2_output = widgets.Output()

        with col1_output:
            display(HTML("""
            <h2>Welcome to the Healthcare Demand Prediction Dashboard</h2>
            <p>This interactive dashboard provides insights and predictions about healthcare service demand
            in South Africa, supporting the planning and implementation of the National Health Insurance (NHI) program.</p>

            <h3>Key Features:</h3>
            <ul>
                <li>📊 <b>Data Exploration</b>: Explore the comprehensive healthcare dataset</li>
                <li>📈 <b>Visual Analytics</b>: Interactive charts and visualizations</li>
                <li>🤖 <b>Machine Learning</b>: Predictive modeling using Random Forest</li>
                <li>🔮 <b>Demand Prediction</b>: Forecast healthcare service needs</li>
                <li>👥 <b>Team Collaboration</b>: Learn about our development team</li>
            </ul>

            <h3>Project Background</h3>
            <p>The NHI aims to provide equitable access to healthcare for all South Africans.
            Accurate forecasting of future demand for services is crucial for resource allocation
            like doctors, nurses, medication, and facility space.</p>
            """))

        with col2_output:
            display(HTML("""
            <center>
                <img src="https://cdn-icons-png.flaticon.com/512/2785/2785819.png" width="250">
                <h3>Dataset Features:</h3>
                <ul>
                    <li>Demographic information</li>
                    <li>Health status predictors</li>
                    <li>Historical service usage</li>
                    <li>Geographic data</li>
                    <li>Insurance coverage</li>
                </ul>
            </center>
            """))

        # Create HBox with the two columns
        col_box = widgets.HBox([col1_output, col2_output])
        display(col_box)

    elif page == '📊 Data Overview':
        display(HTML('<h2 class="sub-header">Dataset Overview</h2>'))

        # Load sample data for demonstration
        data = pd.DataFrame({
            'age': np.random.randint(18, 80, 100),
            'total_visits': np.random.poisson(2, 100),
            'is_employed': np.random.choice([0, 1], 100),
            'household_size': np.random.randint(1, 8, 100),
            'monthly_income_zar': np.random.normal(15000, 5000, 100),
            'hiv': np.random.choice([0, 1], 100, p=[0.9, 0.1]),
            'diabetes': np.random.choice([0, 1], 100, p=[0.85, 0.15]),
            'hypertension': np.random.choice([0, 1], 100, p=[0.7, 0.3])
        })

        # Key metrics
        metrics_html = """
        <div style="display: flex; gap: 10px; margin-bottom: 20px; flex-wrap: wrap;">
            <div class="metric-card" style="flex: 1; min-width: 200px;">
                <h3>Total Individuals</h3>
                <p style="font-size: 24px; font-weight: bold;">{:,}</p>
            </div>
            <div class="metric-card" style="flex: 1; min-width: 200px;">
                <h3>Number of Features</h3>
                <p style="font-size: 24px; font-weight: bold;">{}</p>
            </div>
            <div class="metric-card" style="flex: 1; min-width: 200px;">
                <h3>Zero Visits</h3>
                <p style="font-size: 24px; font-weight: bold;">{:,}</p>
            </div>
            <div class="metric-card" style="flex: 1; min-width: 200px;">
                <h3>One+ Visits</h3>
                <p style="font-size: 24px; font-weight: bold;">{:,}</p>
            </div>
        </div>
        """.format(data.shape[0], data.shape[1],
                  len(data[data['total_visits'] == 0]),
                  len(data[data['total_visits'] > 0]))

        display(HTML(metrics_html))

        # Data sample and description
        col1_out = widgets.Output()
        col2_out = widgets.Output()

        with col1_out:
            display(HTML("<h3>Sample Data</h3>"))
            display(data.head(10))

        with col2_out:
            display(HTML("""
            <h3>Data Description</h3>
            <p>The dataset contains comprehensive health and demographic information:</p>
            <ul>
                <li>Personal demographics (age, gender, employment)</li>
                <li>Household information (income, size, location)</li>
                <li>Health conditions (HIV, TB, diabetes, hypertension, asthma)</li>
                <li>Insurance coverage details</li>
                <li>Geographic distribution</li>
            </ul>
            """))

        data_cols = widgets.HBox([col1_out, col2_out])
        display(data_cols)

        # Target variable distribution
        display(HTML("<h3>Distribution of Healthcare Visits</h3>"))
        fig, ax = plt.subplots(figsize=(10, 5))
        sns.histplot(data['total_visits'], kde=True, bins=30, ax=ax, color='#3498db')
        ax.set_title('Distribution of Total Number of Visits per Person', fontsize=14, fontweight='bold')
        ax.set_xlabel('Total Visits')
        ax.set_ylabel('Count')
        ax.grid(True, alpha=0.3)
        plt.show()

    elif page == '📈 Data Analysis':
        display(HTML('<h2 class="sub-header">Data Analysis</h2>'))

        # Load sample data for demonstration
        data = pd.DataFrame({
            'age': np.random.randint(18, 80, 100),
            'total_visits': np.random.poisson(2, 100),
            'is_employed': np.random.choice([0, 1], 100),
            'household_size': np.random.randint(1, 8, 100),
            'monthly_income_zar': np.random.normal(15000, 5000, 100),
            'hiv': np.random.choice([0, 1], 100, p=[0.9, 0.1]),
            'diabetes': np.random.choice([0, 1], 100, p=[0.85, 0.15]),
            'hypertension': np.random.choice([0, 1], 100, p=[0.7, 0.3]),
            'asthma': np.random.choice([0, 1], 100, p=[0.95, 0.05]),
            'tb': np.random.choice([0, 1], 100, p=[0.98, 0.02])
        })

        # Age vs Visits
        display(HTML("<h3>Age vs. Number of Healthcare Visits</h3>"))
        fig = px.scatter(data, x='age', y='total_visits',
                         title='Relationship Between Age and Healthcare Visits',
                         labels={'age': 'Age', 'total_visits': 'Total Visits'},
                         color_discrete_sequence=['#3498db'])
        fig.show()

        # Health conditions analysis
        display(HTML("<h3>Health Conditions Impact on Visits</h3>"))

        condition = widgets.Dropdown(
            options=['hiv', 'tb', 'diabetes', 'hypertension', 'asthma'],
            value='hiv',
            description='Select Health Condition:'
        )

        condition_output = widgets.Output()

        def update_condition_plot(change):
            with condition_output:
                clear_output(wait=True)
                fig, ax = plt.subplots(figsize=(10, 6))
                sns.boxplot(x=data[change['new']], y=data['total_visits'], ax=ax, palette='Set2')
                ax.set_title(f'Healthcare Visits: With vs. Without {change["new"].capitalize()}', fontweight='bold')
                ax.set_xlabel(f'Has {change["new"].capitalize()} (0=No, 1=Yes)')
                ax.set_ylabel('Total Visits')
                ax.grid(True, alpha=0.3)
                plt.show()

        condition.observe(update_condition_plot, names='value')

        display(condition)
        display(condition_output)

        # Initial plot
        with condition_output:
            fig, ax = plt.subplots(figsize=(10, 6))
            sns.boxplot(x=data['hiv'], y=data['total_visits'], ax=ax, palette='Set2')
            ax.set_title('Healthcare Visits: With vs. Without HIV', fontweight='bold')
            ax.set_xlabel('Has HIV (0=No, 1=Yes)')
            ax.set_ylabel('Total Visits')
            ax.grid(True, alpha=0.3)
            plt.show()

        # Correlation heatmap
        display(HTML("<h3>Feature Correlation Heatmap</h3>"))
        numeric_data = data.select_dtypes(include=[np.number])
        corr_matrix = numeric_data.corr()

        fig, ax = plt.subplots(figsize=(12, 8))
        sns.heatmap(corr_matrix, annot=False, cmap='coolwarm', center=0, ax=ax)
        ax.set_title('Correlation Between Features', fontweight='bold')
        plt.show()

    elif page == '🤖 Model Training':
        display(HTML('<h2 class="sub-header">Machine Learning Model</h2>'))

        display(HTML("""
        <div class="info-box">
        We're using a <b>Random Forest Regressor</b> to predict healthcare service demand based on demographic
        and health factors. This ensemble learning method combines multiple decision trees for accurate predictions.
        </div>
        """))

        train_button = widgets.Button(description="🚀 Train Model", button_style='success')
        output = widgets.Output()

        def on_train_button_clicked(b):
            with output:
                clear_output()
                print("Training the model... This may take a few minutes")

                # Load sample data for demonstration
                data = pd.DataFrame({
                    'age': np.random.randint(18, 80, 1000),
                    'total_visits': np.random.poisson(2, 1000),
                    'is_employed': np.random.choice([0, 1], 1000),
                    'household_size': np.random.randint(1, 8, 1000),
                    'monthly_income_zar': np.random.normal(15000, 5000, 1000),
                    'hiv': np.random.choice([0, 1], 1000, p=[0.9, 0.1]),
                    'diabetes': np.random.choice([0, 1], 1000, p=[0.85, 0.15]),
                    'hypertension': np.random.choice([0, 1], 1000, p=[0.7, 0.3]),
                    'asthma': np.random.choice([0, 1], 1000, p=[0.95, 0.05]),
                    'tb': np.random.choice([0, 1], 1000, p=[0.98, 0.02])
                })

                # Add person_id for compatibility with the function
                data['person_id'] = range(len(data))
                data['household_id'] = range(len(data))
                data['district_id'] = range(len(data))

                # Train the model
                model, X_train, X_test, y_train, y_test, y_pred, mae, mse, r2 = train_model(data)

                display(HTML("""
                <div class="success-box">
                <h3>✅ Model trained successfully!</h3>
                The machine learning model has been trained and evaluated on the healthcare dataset.
                </div>
                """))

                # Display metrics in cards
                metrics_html = """
                <div style="display: flex; gap: 10px; margin-bottom: 20px; flex-wrap: wrap;">
                    <div class="metric-card" style="flex: 1; min-width: 200px;">
                        <h3>Mean Absolute Error</h3>
                        <p style="font-size: 24px; font-weight: bold;">{:.2f}</p>
                        <p>Average absolute error between predictions and actual values</p>
                    </div>
                    <div class="metric-card" style="flex: 1; min-width: 200px;">
                        <h3>Mean Squared Error</h3>
                        <p style="font-size: 24px; font-weight: bold;">{:.2f}</p>
                        <p>Average squared error - punishes larger errors more</p>
                    </div>
                    <div class="metric-card" style="flex: 1; min-width: 200px;">
                        <h3>R² Score</h3>
                        <p style="font-size: 24px; font-weight: bold;">{:.4f}</p>
                        <p>Proportion of variance explained - closer to 1 is better</p>
                    </div>
                </div>
                """.format(mae, mse, r2)

                display(HTML(metrics_html))

                # Actual vs Predicted plot
                display(HTML("<h3>Actual vs Predicted Values</h3>"))
                fig, ax = plt.subplots(figsize=(10, 6))
                ax.scatter(y_test, y_pred, alpha=0.6, color='#3498db')
                ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2)
                ax.set_xlabel('Actual Visits')
                ax.set_ylabel('Predicted Visits')
                ax.set_title('Model Performance: Actual vs Predicted Healthcare Visits', fontweight='bold')
                ax.grid(True, alpha=0.3)
                plt.show()

                # Feature importance
                display(HTML("<h3>Feature Importance</h3>"))
                feature_importance = pd.DataFrame({
                    'feature': X_train.columns,
                    'importance': model.feature_importances_
                }).sort_values('importance', ascending=False)

                fig, ax = plt.subplots(figsize=(10, 8))
                sns.barplot(x='importance', y='feature', data=feature_importance.head(10), ax=ax, palette='viridis')
                ax.set_title('Top 10 Most Important Features for Prediction', fontweight='bold')
                ax.set_xlabel('Importance')
                ax.set_ylabel('Feature')
                plt.show()

        train_button.on_click(on_train_button_clicked)
        display(train_button)
        display(output)

    elif page == '🔮 Predictions':
        display(HTML('<h2 class="sub-header">Healthcare Demand Predictor</h2>'))

        display(HTML("""
        <div class="info-box">
        Adjust the parameters below to predict healthcare service demand for different demographic profiles.
        The model will estimate the expected number of healthcare visits based on these factors.
        </div>
        """))

        # Create input widgets for key features
        age = widgets.IntSlider(value=35, min=18, max=80, description='Age:')
        is_employed = widgets.Dropdown(options=[('Unemployed', 0), ('Employed', 1)], value=0, description='Employment:')
        household_size = widgets.IntSlider(value=3, min=1, max=8, description='Household Size:')
        monthly_income = widgets.FloatSlider(value=15000, min=5000, max=30000, description='Monthly Income:')
        is_urban = widgets.Dropdown(options=[('Rural', 0), ('Urban', 1)], value=0, description='Location:')
        has_medical_scheme = widgets.Dropdown(options=[('No', 0), ('Yes', 1)], value=0, description='Medical Scheme:')
        hiv = widgets.Dropdown(options=[('No', 0), ('Yes', 1)], value=0, description='HIV:')
        diabetes = widgets.Dropdown(options=[('No', 0), ('Yes', 1)], value=0, description='Diabetes:')
        hypertension = widgets.Dropdown(options=[('No', 0), ('Yes', 1)], value=0, description='Hypertension:')

        predict_button = widgets.Button(description="🔍 Predict Healthcare Visits", button_style='primary')
        prediction_output = widgets.Output()

        def on_predict_button_clicked(b):
            with prediction_output:
                clear_output()

                # For demonstration, we'll create a simple prediction
                # In a real scenario, you would use your trained model
                base_visits = 1.5
                age_factor = age.value / 50 * 0.5
                employment_factor = 0.5 if is_employed.value == 1 else 0
                income_factor = (30000 - monthly_income.value) / 25000 * 0.3
                condition_factor = (hiv.value + diabetes.value + hypertension.value) * 0.8

                prediction = base_visits + age_factor + employment_factor + income_factor + condition_factor
                prediction = max(0, min(prediction, 10))  # Clamp between 0 and 10

                # Display results
                display(HTML(f"""
                <div class="success-box">
                <h3>📋 Prediction Results</h3>
                <p style='font-size: 24px; font-weight: bold; color: #2c3e50;'>
                Predicted number of healthcare visits: <span style='color: #e74c3c;'>{prediction:.2f}</span>
                </p>
                """))

                # Interpretation
                if prediction < 1:
                    interpretation = "Low healthcare service demand predicted."
                    color = "#27ae60"
                elif prediction < 3:
                    interpretation = "Moderate healthcare service demand predicted."
                    color = "#f39c12"
                else:
                    interpretation = "High healthcare service demand predicted."
                    color = "#e74c3c"

                display(HTML(f"""
                <p style='font-size: 18px; font-weight: bold; color: {color};'>
                {interpretation}
                </p>
                </div>
                """))

        predict_button.on_click(on_predict_button_clicked)

        # Layout the widgets in three columns
        col1_out = widgets.Output()
        col2_out = widgets.Output()
        col3_out = widgets.Output()

        with col1_out:
            display(HTML("<h3>Demographic Information</h3>"))
            display(age)
            display(is_employed)
            display(household_size)

        with col2_out:
            display(HTML("<h3>Socioeconomic Factors</h3>"))
            display(monthly_income)
            display(is_urban)
            display(has_medical_scheme)

        with col3_out:
            display(HTML("<h3>Health Conditions</h3>"))
            display(hiv)
            display(diabetes)
            display(hypertension)

        input_cols = widgets.HBox([col1_out, col2_out, col3_out])
        display(input_cols)
        display(predict_button)
        display(prediction_output)

    elif page == '👥 Team Details':
        display(HTML('<h2 class="sub-header">Project Team</h2>'))

        display(HTML("""
        <div class="info-box">
        <h3>👥 Group Members</h3>
        This project was developed by our dedicated team for the Technical Programming 2 assessment.
        </div>
        """))

        # Team members in cards
        team_members = [
            {"name": "Mbambo AM", "id": "22305677", "role": "Data Processing"},
            {"name": "Mdletshe S", "id": "22317991", "role": "Machine Learning"},
            {"name": "Mngoma S", "id": "22438195", "role": "Visualization"},
            {"name": "Mnqayi LO", "id": "22341285", "role": "Dashboard Design"},
            {"name": "Mnqayi V", "id": "22444713", "role": "Data Analysis"},
            {"name": "Ntuli SB", "id": "22327734", "role": "Model Development"},
            {"name": "Qwabe SS", "id": "22361055", "role": "Project Coordination"}
        ]

        # Display team members in a grid
        for i in range(0, len(team_members), 2):
            row_members = team_members[i:i+2]
            col1_out = widgets.Output()
            col2_out = widgets.Output()

            with col1_out:
                display(HTML(f"""
                <div class="team-card">
                <h3>{row_members[0]['name']}</h3>
                <p><b>Student ID:</b> {row_members[0]['id']}</p>
                <p><b>Role:</b> {row_members[0]['role']}</p>
                </div>
                """))

            with col2_out:
                if len(row_members) > 1:
                    display(HTML(f"""
                    <div class="team-card">
                    <h3>{row_members[1]['name']}</h3>
                    <p><b>Student ID:</b> {row_members[1]['id']}</p>
                    <p><b>Role:</b> {row_members[1]['role']}</p>
                    </div>
                    """))

            row_box = widgets.HBox([col1_out, col2_out])
            display(row_box)

        # Project details
        display(HTML("""
        <h2>📚 Project Information</h2>

        <p><b>Course:</b> Technical Programming 2 (TPRO200/TLPR200)<br>
        <b>Assessment:</b> Assessment 4<br>
        <b>Project:</b> Healthcare Service Demand Prediction<br>
        <b>Selected Task:</b> Task 1 - Predicting Healthcare Service Demand</p>

        <h2>🎯 Project Objective</h2>

        <p>The NHI aims to provide equitable access to healthcare for all South Africans.
        A critical step in planning for this massive undertaking is accurately forecasting
        future demand for services. By predicting how many people will need care, the government
        can better allocate resources like doctors, nurses, medication, and facility space,
        ensuring the system is efficient and effective from the start.</p>

        <h2>📊 Dataset</h2>

        <p><b>Dataset Name:</b> Comprehensive South African Health Simulation Dataset<br>
        <b>Note:</b> This is a simulated dataset created for academic purposes.</p>
        """))

    # Footer
    display(HTML("""
    <hr>
    <div style='text-align: center; color: #7f8c8d;'>
    <p>Technical Programming 2 (TPRO200/TLPR200) - Assessment 4 | Healthcare Demand Prediction Dashboard</p>
    <p>© 2024 | Group Project</p>
    </div>
    """))

# Set up navigation handler
def on_navigation_change(change):
    if change['name'] == 'value':
        display_page(change['new'])

navigation.observe(on_navigation_change)

# Display the initial page
display_page(navigation.value)

ToggleButtons(description='Navigate:', index=5, options=('🏠 Home', '📊 Data Overview', '📈 Data Analysis', '🤖 Mo…

HBox(children=(Output(), Output()))

HBox(children=(Output(), Output()))

HBox(children=(Output(), Output()))

HBox(children=(Output(), Output()))