### Import Lib

In [1]:
!pip install -q streamlit plotly-express pandas numpy pyngrok catboost

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m29.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m30.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import os
from pyngrok import ngrok
from pyngrok import conf
import time
import threading

conf.get_default().auth_token = "2NTEEVIIj1fBwjtSLAW96oiL7lK_3yCKrcLAxjDFWEM5Bj5L9"
os.makedirs("pages", exist_ok=True)

In [None]:
from google.colab import drive
drive.mount("/content/drive")

## Code

### Dashboard

In [None]:
%%writefile Dashboard.py
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np

DATASET_URL = "/content/drive/MyDrive/ComBio/Dataset/cleaned_dataset.csv"

# Mapping untuk data Hepatitis C
GENDER_MAP = {1: "Pria", 2: "Wanita"}
RACE_ETHNICITY_MAP = {
    1: "Meksiko Amerika",
    2: "Amerika Lainnya",
    3: "Kulit Putih Non-Hispanik",
    4: "Kulit Hitam Non-Hispanik",
    5: "Ras Lainnya"
}
EDUCATION_MAP = {
    1: "Kurang dari Kelas 9",
    2: "Kelas 9-11",
    3: "Lulusan SMA/GED",
    4: "Beberapa Perguruan Tinggi/AA",
    5: "Sarjana atau Lebih"
}
MARITAL_STATUS_MAP = {
    1: "Menikah",
    2: "Janda",
    3: "Bercerai",
    4: "Berpisah",
    5: "Tidak Pernah Menikah",
    6: "Hidup Bersama"
}
YES_NO_MAP = {1: "Ya", 2: "Tidak"}
ALCOHOL_MAP = {1: "Peminum Berat", 2: "Peminum Ringan/Sedang"}
HCV_STATUS_MAP = {0: "Negatif", 1: "Positif"}

# Kolom numerik untuk konversi
NUMERIC_COLS_FOR_CONVERSION = [
    "age",
    "income_ratio",
    "alt", "albumin", "alp", "ast", "bun", "cholesterol",
    "cpk", "creatinine", "ggt", "glucose", "total_bilirubin",
    "total_calcium", "total_protein", "triglycerides", "uric_acid",
    "ldh", "globulin", "osmolality"
]

# Kolom untuk korelasi
CORRELATION_COLS = [
    "age", "income_ratio", "alt", "albumin", "alp", "ast", "bun",
    "cholesterol", "cpk", "creatinine", "ggt", "glucose",
    "total_bilirubin", "total_calcium", "total_protein",
    "triglycerides", "uric_acid", "ldh", "globulin", "osmolality",
    "hcv_status"
]

# Kolom kategorikal yang sudah dipetakan
CATEGORICAL_COLS_MAPPED = [
    "gender_label",
    "race_ethnicity_label",
    "education_label",
    "marital_status_label",
    "age_group",
    "alt_category",
    "ast_category"
]

# Skema warna untuk Hepatitis C
COLOR_NEGATIVE = "#a1d99b"
COLOR_POSITIVE = "#d62728"

HCV_COLOR_MAP = {"Negatif": COLOR_NEGATIVE, "Positif": COLOR_POSITIVE}

REDS_COLOR_SCALE = px.colors.sequential.Reds
REDS_DISCRETE_SEQUENCE = px.colors.sequential.Reds
SINGLE_LINE_COLOR = COLOR_POSITIVE


st.set_page_config(layout="wide", page_title="📊 Dashboard Hepatitis C")


def create_age_group(age):
    """Mengategorikan usia."""
    if pd.isna(age):
        return "Tidak Diketahui"
    if age < 30:
        return "<30"
    elif age < 40:
        return "30-39"
    elif age < 50:
        return "40-49"
    elif age < 60:
        return "50-59"
    elif age < 70:
        return "60-69"
    else:
        return "70+"


def create_alt_category(alt):
    """Mengategorikan nilai ALT (normal: <40 U/L)."""
    if pd.isna(alt):
        return "Tidak Diketahui"
    if alt <= 40:
        return "Normal (≤40)"
    elif alt <= 80:
        return "Sedikit Tinggi (41-80)"
    else:
        return "Tinggi (>80)"


def create_ast_category(ast):
    """Mengategorikan nilai AST (normal: <40 U/L)."""
    if pd.isna(ast):
        return "Tidak Diketahui"
    if ast <= 40:
        return "Normal (≤40)"
    elif ast <= 80:
        return "Sedikit Tinggi (41-80)"
    else:
        return "Tinggi (>80)"


def safe_calculate_prevalence(group):
    """Menghitung persentase prevalensi dengan aman dalam grup pandas."""
    total = group["hcv_status"].count()
    positive = group["hcv_status"].sum()
    prevalence = (positive / total * 100) if total > 0 else 0
    return pd.Series(
        {
            "Jumlah Positif": positive,
            "Jumlah Total": total,
            "Prevalensi (%)": prevalence,
        }
    )


@st.cache_data
def load_and_prepare_data(filepath):
    """Memuat, membersihkan, memetakan, dan merekayasa fitur untuk dataset Hepatitis C."""
    try:
        df = pd.read_csv(filepath)
    except FileNotFoundError:
        st.error(
            f"Error: File tidak ditemukan di '{filepath}'. Pastikan file ada di direktori yang benar."
        )
        st.stop()

    # Konversi target variable
    df["hcv_status"] = pd.to_numeric(df["hcv_status"], errors="coerce")
    df.dropna(subset=["hcv_status"], inplace=True)
    df["hcv_status"] = df["hcv_status"].astype(int)

    # Konversi kolom numerik
    for col in NUMERIC_COLS_FOR_CONVERSION:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors="coerce")
        else:
            if col not in ["dataset"]:
                df[col] = np.nan

    # Mapping kolom kategorikal
    df["gender_label"] = df["gender"].map(GENDER_MAP).fillna("Tidak Diketahui")
    df["race_ethnicity_label"] = df["race_ethnicity"].map(RACE_ETHNICITY_MAP).fillna("Tidak Diketahui")
    df["education_label"] = df["education_level"].map(EDUCATION_MAP).fillna("Tidak Diketahui")
    df["marital_status_label"] = df["marital_status"].map(MARITAL_STATUS_MAP).fillna("Tidak Diketahui")
    df["hcv_status_label"] = df["hcv_status"].map(HCV_STATUS_MAP)

    # Feature engineering
    df["age_group"] = df["age"].apply(create_age_group)
    df["alt_category"] = df["alt"].apply(create_alt_category)
    df["ast_category"] = df["ast"].apply(create_ast_category)

    return df


# Sidebar filters
st.sidebar.header("Filter Dashboard")

df_full = load_and_prepare_data(DATASET_URL)
df = df_full.copy()

# Dataset subset filter
dataset_options = ["Semua", "train", "validation", "test"]
selected_dataset_subset = st.sidebar.selectbox("Pilih Set", dataset_options)
if selected_dataset_subset != "Semua" and "dataset" in df.columns:
    df = df[df["dataset"] == selected_dataset_subset]

# Gender filter
gender_options = ["Semua"] + sorted(
    [g for g in df_full["gender_label"].unique() if g != "Tidak Diketahui"]
)
gender_multiselect_options = sorted([g for g in gender_options if g not in ["Semua"]])
default_genders = gender_multiselect_options if gender_multiselect_options else []
selected_genders = st.sidebar.multiselect(
    "Pilih Jenis Kelamin",
    options=gender_multiselect_options,
    default=default_genders,
)
if selected_genders:
    df = df[df["gender_label"].isin(selected_genders)]
elif len(gender_multiselect_options) > 0:
    df = df.iloc[0:0]

# Age filter
min_age_full = df_full["age"].min()
max_age_full = df_full["age"].max()
if pd.notna(min_age_full) and pd.notna(max_age_full):
    selected_age = st.sidebar.slider(
        "Pilih Rentang Usia",
        int(min_age_full),
        int(max_age_full),
        (int(min_age_full), int(max_age_full)),
    )
    df = df[df["age"].between(selected_age[0], selected_age[1])]
else:
    st.sidebar.warning(
        "Data Usia hilang atau tidak valid, tidak dapat memfilter berdasarkan usia."
    )

# ALT filter
min_alt_full = df_full["alt"].min()
max_alt_full = df_full["alt"].max()
if pd.notna(min_alt_full) and pd.notna(max_alt_full):
    selected_alt = st.sidebar.slider(
        "Pilih Rentang ALT",
        float(min_alt_full),
        float(max_alt_full),
        (float(min_alt_full), float(max_alt_full)),
        step=1.0,
    )
    df = df[df["alt"].between(selected_alt[0], selected_alt[1])]
else:
    st.sidebar.warning(
        "Data ALT hilang atau tidak valid, tidak dapat memfilter berdasarkan ALT."
    )

if df.empty:
    st.error(
        "Tidak ada data yang cocok dengan filter yang dipilih. Silakan sesuaikan kontrol di sidebar."
    )
    st.stop()

st.title("📊 Dashboard Hepatitis C")


total_patients = len(df)
positive_patients = int(df["hcv_status"].sum())
negative_patients = total_patients - positive_patients
prevalence = (positive_patients / total_patients * 100) if total_patients > 0 else 0

st.markdown(
    f"Menganalisis **{total_patients:,}** catatan pasien berdasarkan filter saat ini."
)
if "dataset" in df.columns:
    dataset_label = (
        selected_dataset_subset if selected_dataset_subset != "Semua" else "Semua Data"
    )
    st.markdown(f"Set Dataset: **{dataset_label}**")
st.markdown("---")

# Overview metrics
st.header("Overview Data")
col1, col2, col3, col4 = st.columns(4)
col1.metric("Total Pasien", f"{total_patients:,}")
col2.metric("Hepatitis C Positif", f"{positive_patients:,}")
col3.metric("Hepatitis C Negatif", f"{negative_patients:,}")
col4.metric("Prevalensi Hepatitis C", f"{prevalence:.1f}%")


# Distribution pie chart
if (
    total_patients > 0
    and "hcv_status_label" in df.columns
    and df["hcv_status_label"].nunique() > 0
):
    if df["hcv_status_label"].nunique() > 1:
        fig_dist = px.pie(
            df,
            names="hcv_status_label",
            title="Distribusi Status Hepatitis C",
            color="hcv_status_label",
            color_discrete_map=HCV_COLOR_MAP,
            hole=0.3,
        )

        pull_values = [
            0.05 if label == "Positif" else 0 for label in fig_dist.data[0].labels
        ]
        fig_dist.update_traces(textinfo="percent+label", pull=pull_values)
        fig_dist.update_layout(legend_title_text="Status Hepatitis C")
        st.plotly_chart(fig_dist, use_container_width=True)
    elif df["hcv_status_label"].nunique() == 1:
        unique_label = df["hcv_status_label"].dropna().iloc[0]
        st.info(f"Data terfilter hanya berisi pasien **{unique_label}**.")
else:
    st.warning(
        "Tidak ada data yang tersedia untuk distribusi Status Hepatitis C dengan filter saat ini."
    )
st.markdown("---")

# Categorical Features Analysis
st.header("Fitur Kategorikal vs Hepatitis C")
categorical_cols_for_dist = [
    "gender_label",
    "race_ethnicity_label",
    "education_label",
    "marital_status_label",
    "age_group",
    "alt_category",
    "ast_category",
]
available_cats_dist = [
    col
    for col in categorical_cols_for_dist
    if col in df.columns and df[col].nunique() > 0
]

if available_cats_dist:
    cat_feature_to_plot = st.selectbox(
        "Pilih Fitur Kategorikal",
        available_cats_dist,
        index=(
            available_cats_dist.index("age_group")
            if "age_group" in available_cats_dist
            else (
                available_cats_dist.index("alt_category")
                if "alt_category" in available_cats_dist
                else 0
            )
        ),
    )
    if cat_feature_to_plot and total_patients > 0:
        # Category ordering
        category_orders = {}
        if cat_feature_to_plot == "age_group":
            category_orders = {
                "<30": 0,
                "30-39": 1,
                "40-49": 2,
                "50-59": 3,
                "60-69": 4,
                "70+": 5,
                "Tidak Diketahui": 6,
            }
        elif cat_feature_to_plot == "alt_category":
            category_orders = {
                "Normal (≤40)": 0,
                "Sedikit Tinggi (41-80)": 1,
                "Tinggi (>80)": 2,
                "Tidak Diketahui": 3,
            }
        elif cat_feature_to_plot == "ast_category":
            category_orders = {
                "Normal (≤40)": 0,
                "Sedikit Tinggi (41-80)": 1,
                "Tinggi (>80)": 2,
                "Tidak Diketahui": 3,
            }

        present_cats = df[cat_feature_to_plot].unique()
        category_orders = {
            k: v for k, v in category_orders.items() if k in present_cats
        }

        fig_cat = px.histogram(
            df,
            x=cat_feature_to_plot,
            color="hcv_status_label",
            title=f"Distribusi {cat_feature_to_plot.replace('_label','').replace('_',' ').title()} berdasarkan Status Hepatitis C",
            barmode="group",
            color_discrete_map=HCV_COLOR_MAP,
            category_orders=(
                {cat_feature_to_plot: sorted(category_orders, key=category_orders.get)}
                if category_orders
                else None
            ),
            labels={"hcv_status_label": "Status Hepatitis C"},
        )
        fig_cat.update_layout(
            xaxis_title=cat_feature_to_plot.replace("_label", "")
            .replace("_", " ")
            .title(),
            yaxis_title="Jumlah",
        )
        st.plotly_chart(fig_cat, use_container_width=True)
else:
    st.warning(
        "Tidak ada fitur kategorikal yang tersedia untuk analisis distribusi dengan filter saat ini."
    )
st.markdown("---")

# Prevalence Analysis
st.header("Prevalensi Hepatitis C dalam Kategori")
available_cats_prev = [
    col
    for col in CATEGORICAL_COLS_MAPPED
    if col in df.columns and df[col].nunique() > 1
]

if available_cats_prev:
    cat_feature_for_prevalence = st.selectbox(
        "Pilih Kategori untuk Analisis Prevalensi",
        available_cats_prev,
        key="prevalence_cat",
        index=(
            available_cats_prev.index("age_group")
            if "age_group" in available_cats_prev
            else (
                available_cats_prev.index("alt_category")
                if "alt_category" in available_cats_prev
                else 0
            )
        ),
    )
    if cat_feature_for_prevalence and total_patients > 0:
        # Calculate prevalence
        prevalence_df = (
            df.groupby(cat_feature_for_prevalence)
            .apply(safe_calculate_prevalence)
            .reset_index()
        )

        # Ordering categories for prevalence
        ordered_categories_prev = {}
        if cat_feature_for_prevalence == "age_group":
            ordered_categories_prev = {
                "<30": 0,
                "30-39": 1,
                "40-49": 2,
                "50-59": 3,
                "60-69": 4,
                "70+": 5,
                "Tidak Diketahui": 6,
            }
        elif cat_feature_for_prevalence == "alt_category":
            ordered_categories_prev = {
                "Normal (≤40)": 0,
                "Sedikit Tinggi (41-80)": 1,
                "Tinggi (>80)": 2,
                "Tidak Diketahui": 3,
            }
        elif cat_feature_for_prevalence == "ast_category":
            ordered_categories_prev = {
                "Normal (≤40)": 0,
                "Sedikit Tinggi (41-80)": 1,
                "Tinggi (>80)": 2,
                "Tidak Diketahui": 3,
            }

        if ordered_categories_prev:
            present_cats_prev = prevalence_df[cat_feature_for_prevalence].unique()
            ordered_categories_prev = {
                k: v
                for k, v in ordered_categories_prev.items()
                if k in present_cats_prev
            }
            prevalence_df["sort_order"] = prevalence_df[cat_feature_for_prevalence].map(
                ordered_categories_prev
            )
            prevalence_df = prevalence_df.sort_values("sort_order").drop(
                "sort_order", axis=1
            )

        fig_prev = px.bar(
            prevalence_df,
            x=cat_feature_for_prevalence,
            y="Prevalensi (%)",
            title=f"Prevalensi Hepatitis C dalam setiap Kategori '{cat_feature_for_prevalence.replace('_label','').replace('_',' ').title()}'",
            color="Prevalensi (%)",
            color_continuous_scale=REDS_COLOR_SCALE,
            text_auto=".1f",
            labels={"Prevalensi (%)": "Prevalensi Hepatitis C (%)"},
        )

        fig_prev.update_traces(marker_coloraxis="coloraxis")
        fig_prev.update_traces(textangle=0, textposition="outside")
        fig_prev.update_layout(
            xaxis_title=cat_feature_for_prevalence.replace("_label", "")
            .replace("_", " ")
            .title(),
            yaxis_title="Prevalensi Hepatitis C (%)",
            yaxis_ticksuffix="%",
        )
        st.plotly_chart(fig_prev, use_container_width=True)

        with st.expander("Tampilkan Tabel Data Prevalensi"):
            st.dataframe(
                prevalence_df[
                    [
                        cat_feature_for_prevalence,
                        "Jumlah Positif",
                        "Jumlah Total",
                        "Prevalensi (%)",
                    ]
                ].round(1)
            )
else:
    st.warning(
        "Tidak ada fitur kategorikal yang cocok tersedia untuk analisis prevalensi dengan filter saat ini (membutuhkan >1 kategori)."
    )
st.markdown("---")

# Numerical Features Distribution
st.header("Distribusi Fitur Numerik & Ambang Batas Klinis")

numerical_cols = sorted(
    [
        col
        for col in df.select_dtypes(include=np.number).columns
        if col
        not in [
            "hcv_status",
            "gender",
            "race_ethnicity",
            "education_level",
            "marital_status",
            "jaundice_history",
            "receive_transfusion",
            "liver_condition",
            "injectable_drug_use",
            "drug_use",
            "alcohol_consumption",
        ]
        and df[col].nunique() > 1
    ]
)

if not numerical_cols:
    st.warning(
        "Tidak ada fitur numerik dengan varians yang tersedia untuk analisis distribusi dengan filter saat ini."
    )
else:
    # Default feature selection
    default_feature_idx = 0
    if "alt" in numerical_cols:
        default_feature_idx = numerical_cols.index("alt")
    elif "ast" in numerical_cols:
        default_feature_idx = numerical_cols.index("ast")
    feature_to_plot = st.selectbox(
        "Pilih Fitur Numerik untuk Divisualisasikan",
        numerical_cols,
        index=default_feature_idx,
        key="dist_feature_select",
    )

    if feature_to_plot and total_patients > 0:
        # Histogram with overlay
        hist_args = dict(
            x=feature_to_plot,
            color="hcv_status_label",
            marginal="box",
            color_discrete_map=HCV_COLOR_MAP,
            barmode="overlay",
            opacity=0.7,
            labels={"hcv_status_label": "Status Hepatitis C"},
        )

        fig_dist_num = px.histogram(
            df,
            **hist_args,
            title=f"Histogram {feature_to_plot} berdasarkan Status Hepatitis C",
        )
        fig_dist_num.update_layout(xaxis_title=feature_to_plot)
        st.plotly_chart(fig_dist_num, use_container_width=True)

        # Box plot
        fig_box = px.box(
            df,
            x="hcv_status_label",
            y=feature_to_plot,
            color="hcv_status_label",
            title=f"Box Plot {feature_to_plot} berdasarkan Status Hepatitis C",
            color_discrete_map=HCV_COLOR_MAP,
            labels={
                "hcv_status_label": "Status Hepatitis C",
                feature_to_plot: feature_to_plot,
            },
            points="outliers",
            notched=True,
        )
        st.plotly_chart(fig_box, use_container_width=True)
st.markdown("---")

# Bivariate Analysis (Scatter Plot)
st.header("Analisis Bivariat (Scatter Plot)")
if len(numerical_cols) < 2:
    st.warning(
        "Dibutuhkan setidaknya dua fitur numerik dengan varians untuk analisis scatter plot dengan filter saat ini."
    )
else:
    col_scatter1, col_scatter2 = st.columns(2)

    default_x_idx = numerical_cols.index("alt") if "alt" in numerical_cols else 0
    default_y_idx = (
        numerical_cols.index("ast")
        if "ast" in numerical_cols
        else (1 if len(numerical_cols) > 1 else 0)
    )

    if default_x_idx == default_y_idx and len(numerical_cols) > 1:
        default_y_idx = 1

    x_axis = col_scatter1.selectbox(
        "Pilih Fitur Sumbu X", numerical_cols, index=default_x_idx, key="scatter_x"
    )
    y_axis = col_scatter2.selectbox(
        "Pilih Fitur Sumbu Y", numerical_cols, index=default_y_idx, key="scatter_y"
    )

    if x_axis and y_axis and x_axis != y_axis and total_patients > 0:
        # Scatter plot
        fig_scatter = px.scatter(
            df,
            x=x_axis,
            y=y_axis,
            color="hcv_status_label",
            title=f"{y_axis} vs. {x_axis} berdasarkan Status Hepatitis C",
            color_discrete_map=HCV_COLOR_MAP,
            hover_data=[
                col
                for col in ["age", "gender_label", "alt", "ast", "albumin", "alp"]
                if col in df.columns
            ],
            labels={"hcv_status_label": "Status Hepatitis C"},
            opacity=0.7,
        )
        fig_scatter.update_layout(xaxis_title=x_axis, yaxis_title=y_axis)
        st.plotly_chart(fig_scatter, use_container_width=True)
    elif x_axis == y_axis:
        st.warning("Silakan pilih fitur yang berbeda untuk sumbu X dan Y.")
st.markdown("---")

# Correlation Matrix
st.header("Matriks Korelasi Fitur Numerik")

corr_cols_available = [
    col for col in CORRELATION_COLS if col in df.columns and df[col].nunique() > 1
]

if len(corr_cols_available) > 1 and total_patients > 1:
    # Correlation subset selection
    corr_subset = st.radio(
        "Hitung Korelasi Untuk:",
        ("Semua Data Terfilter", "Hanya Positif", "Hanya Negatif"),
        horizontal=True,
        key="corr_radio",
    )
    df_corr_subset = df.copy()
    if corr_subset == "Hanya Positif":
        df_corr_subset = df[df["hcv_status"] == 1].copy()
    elif corr_subset == "Hanya Negatif":
        df_corr_subset = df[df["hcv_status"] == 0].copy()

    corr_cols_final = [
        col
        for col in corr_cols_available
        if col in df_corr_subset.columns and df_corr_subset[col].nunique() > 1
    ]

    if len(df_corr_subset) > 1 and len(corr_cols_final) > 1:
        corr_method = "spearman"

        corr_matrix = df_corr_subset[corr_cols_final].corr(method=corr_method)

        fig_corr = px.imshow(
            corr_matrix,
            text_auto=".2f",
            aspect="auto",
            title=f"Matriks Korelasi - {corr_subset}",
            color_continuous_scale=REDS_COLOR_SCALE,
        )
        fig_corr.update_xaxes(side="bottom")
        fig_corr.update_layout(coloraxis_colorbar=dict(title="Korelasi"))
        st.plotly_chart(fig_corr, use_container_width=True)
    else:
        st.warning(
            f"Data tidak cukup (perlu > 1 baris dan > 1 kolom dengan varians) dalam grup '{corr_subset}' untuk perhitungan korelasi dengan filter saat ini."
        )
else:
    st.warning(
        "Kolom numerik dengan varians (perlu > 1) atau titik data (perlu > 1) tidak cukup untuk matriks korelasi berdasarkan filter saat ini."
    )
st.markdown("---")

# Trend Analysis
st.header("Trend Prevalensi Hepatitis C")

grouping_options_ordered = ["age_group", "alt_category", "ast_category", "education_label"]

available_groupings = [
    opt
    for opt in grouping_options_ordered
    if opt in df.columns and df[opt].nunique() > 1
]

if available_groupings:
    # X-axis grouping selection
    x_grouping = st.selectbox(
        "Kelompokkan Prevalensi Berdasarkan (Sumbu X)",
        available_groupings,
        index=(
            available_groupings.index("age_group")
            if "age_group" in available_groupings
            else 0
        ),
        key="line_grouping",
    )

    # Segmentation options
    segment_options_all = [
        "gender_label",
        "race_ethnicity_label",
        "education_label",
        "marital_status_label",
    ]

    available_segments = ["Tidak Ada"] + [
        opt
        for opt in segment_options_all
        if opt in df.columns and opt != x_grouping and df[opt].nunique() > 1
    ]
    segment_by = st.selectbox(
        "Segmentasi Garis Berdasarkan (Opsional)",
        available_segments,
        key="line_segment",
    )

    if x_grouping and total_patients > 0:
        # Category ordering
        ordered_categories = {}
        if x_grouping == "age_group":
            ordered_categories = {
                "<30": 0,
                "30-39": 1,
                "40-49": 2,
                "50-59": 3,
                "60-69": 4,
                "70+": 5,
                "Tidak Diketahui": 6,
            }
        elif x_grouping == "alt_category":
            ordered_categories = {
                "Normal (≤40)": 0,
                "Sedikit Tinggi (41-80)": 1,
                "Tinggi (>80)": 2,
                "Tidak Diketahui": 3,
            }
        elif x_grouping == "ast_category":
            ordered_categories = {
                "Normal (≤40)": 0,
                "Sedikit Tinggi (41-80)": 1,
                "Tinggi (>80)": 2,
                "Tidak Diketahui": 3,
            }

        grouping_cols = [x_grouping]
        if segment_by != "Tidak Ada":
            grouping_cols.append(segment_by)

        grouped_data = (
            df.groupby(grouping_cols).apply(safe_calculate_prevalence).reset_index()
        )

        grouped_data = grouped_data[grouped_data["Jumlah Total"] > 0]

        if ordered_categories:
            present_cats_line = grouped_data[x_grouping].unique()
            current_order = {
                k: v for k, v in ordered_categories.items() if k in present_cats_line
            }
            grouped_data["sort_order"] = grouped_data[x_grouping].map(current_order)
            sort_cols = (
                [segment_by, "sort_order"]
                if segment_by != "Tidak Ada"
                else ["sort_order"]
            )
            grouped_data = grouped_data.sort_values(sort_cols).drop(
                "sort_order", axis=1
            )
        elif segment_by != "Tidak Ada":
            grouped_data = grouped_data.sort_values([segment_by, x_grouping])
        else:
            grouped_data = grouped_data.sort_values(x_grouping)

        plot_title = f"Prevalensi Hepatitis C berdasarkan {x_grouping.replace('_label','').replace('_', ' ').title()}"
        color_col = segment_by if segment_by != "Tidak Ada" else None

        color_args = {}
        if segment_by == "Tidak Ada":
            # Single line
            color_args["color_discrete_sequence"] = [SINGLE_LINE_COLOR]
        else:
            # Multiple lines
            plot_title += (
                f" dan {segment_by.replace('_label','').replace('_', ' ').title()}"
            )
            color_args["color"] = segment_by
            # Use a portion of the red sequence for multiple lines
            color_args["color_discrete_sequence"] = REDS_DISCRETE_SEQUENCE[3:]

        fig_line = px.line(
            grouped_data,
            x=x_grouping,
            y="Prevalensi (%)",
            title=plot_title,
            markers=True,
            line_shape="linear",
            labels={"Prevalensi (%)": "Prevalensi Hepatitis C (%)"},
            hover_data=["Jumlah Positif", "Jumlah Total"],
            **color_args,
        )

        fig_line.update_layout(
            xaxis_title=x_grouping.replace("_label", "").replace("_", " ").title(),
            yaxis_title="Prevalensi Hepatitis C (%)",
            yaxis_ticksuffix="%",
            legend_title=(
                segment_by.replace("_label", "").replace("_", " ").title()
                if segment_by != "Tidak Ada"
                else None
            ),
        )

        if segment_by == "Tidak Ada":
            # Single line with text labels
            fig_line.update_traces(
                text=grouped_data["Prevalensi (%)"].apply(lambda x: f"{x:.1f}%"),
                textposition="top center",
                mode="lines+markers+text",
                line=dict(width=2.5),
            )
        else:
            # Multiple lines without text labels
            fig_line.update_traces(mode="lines+markers", line=dict(width=3))

        st.plotly_chart(fig_line, use_container_width=True)

        with st.expander("Tampilkan Tabel Data Tren"):
            display_cols = grouping_cols + [
                "Jumlah Positif",
                "Jumlah Total",
                "Prevalensi (%)",
            ]
            st.dataframe(grouped_data[display_cols].round(1))
else:
    st.warning(
        "Tidak ada variabel kategorikal terurut yang cocok tersedia untuk analisis grafik garis dengan filter saat ini."
    )
st.markdown("---")


st.header("Data Explorer")
st.dataframe(df)


csv = df.to_csv(index=False).encode("utf-8")
st.download_button(
    label="Unduh Data sebagai CSV",
    data=csv,
    file_name="hepatitis_c_data.csv",
    mime="text/csv",
)

### Glosarium

In [None]:
%%writefile pages/2_Glosarium.py
import streamlit as st

st.set_page_config(page_title="🧾 Glosarium Dataset", layout="wide")

st.title("🧾 Glosarium Dataset Hepatitis C")
st.markdown(
    "Halaman ini menyediakan definisi dan satuan untuk semua fitur yang digunakan dalam aplikasi prediksi dan di dalam dataset Hepatitis C."
)
st.markdown("---")

# Demographics
st.markdown("### Usia (Age)")
st.write("Usia pasien dalam tahun pada saat pengumpulan data (diukur dalam **tahun**).")
st.markdown("---")

st.markdown("### Jenis Kelamin (Gender)")
st.write("Jenis kelamin biologis pasien:")
st.markdown("- **1:** Pria")
st.markdown("- **2:** Wanita")
st.markdown("---")

st.markdown("### Ras/Etnisitas (Race Ethnicity)")
st.write("Kategori ras dan etnisitas pasien:")
st.markdown("- **1:** Meksiko Amerika")
st.markdown("- **2:** Amerika Lainnya")
st.markdown("- **3:** Kulit Putih Non-Hispanik")
st.markdown("- **4:** Kulit Hitam Non-Hispanik")
st.markdown("- **5:** Ras Lainnya")
st.markdown("---")

st.markdown("### Tingkat Pendidikan (Education Level)")
st.write("Tingkat pendidikan tertinggi yang dicapai pasien:")
st.markdown("- **1:** Kurang dari Kelas 9")
st.markdown("- **2:** Kelas 9-11")
st.markdown("- **3:** Lulusan SMA/GED")
st.markdown("- **4:** Beberapa Perguruan Tinggi/AA")
st.markdown("- **5:** Sarjana atau Lebih")
st.markdown("---")

st.markdown("### Rasio Pendapatan (Income Ratio)")
st.write(
    "Rasio pendapatan keluarga terhadap garis kemiskinan federal. Nilai 1.0 menunjukkan pendapatan setara dengan garis kemiskinan."
)
st.markdown("---")

st.markdown("### Status Pernikahan (Marital Status)")
st.write("Status pernikahan pasien:")
st.markdown("- **1:** Menikah")
st.markdown("- **2:** Janda")
st.markdown("- **3:** Bercerai")
st.markdown("- **4:** Berpisah")
st.markdown("- **5:** Tidak Pernah Menikah")
st.markdown("- **6:** Hidup Bersama")
st.markdown("---")

# Risk Factors
st.markdown("### Riwayat Jaundice (Jaundice History)")
st.write("Riwayat penyakit kuning pada pasien:")
st.markdown("- **1:** Ya")
st.markdown("- **2:** Tidak")
st.markdown("---")

st.markdown("### Menerima Transfusi (Receive Transfusion)")
st.write("Riwayat menerima transfusi darah:")
st.markdown("- **1:** Ya")
st.markdown("- **2:** Tidak")
st.markdown("---")

st.markdown("### Kondisi Hati (Liver Condition)")
st.write("Riwayat kondisi atau penyakit hati:")
st.markdown("- **1:** Ya")
st.markdown("- **2:** Tidak")
st.markdown("---")

st.markdown("### Penggunaan Obat Suntik (Injectable Drug Use)")
st.write("Riwayat penggunaan obat-obatan suntik:")
st.markdown("- **1:** Ya")
st.markdown("- **2:** Tidak")
st.markdown("---")

st.markdown("### Penggunaan Obat (Drug Use)")
st.write("Riwayat penggunaan obat-obatan:")
st.markdown("- **1:** Ya")
st.markdown("- **2:** Tidak")
st.markdown("---")

st.markdown("### Konsumsi Alkohol (Alcohol Consumption)")
st.write("Tingkat konsumsi alkohol pasien:")
st.markdown("- **1:** Peminum Berat")
st.markdown("- **2:** Peminum Ringan/Sedang")
st.markdown("---")

# Laboratory Tests
st.markdown("### ALT (Alanine Aminotransferase)")
st.write(
    "Enzim hati yang mengindikasikan kerusakan atau peradangan hati. Nilai normal biasanya <40 U/L (diukur dalam **U/L**)."
)
st.markdown("##### Kategori ALT (`alt_category`)")
st.write("Berdasarkan nilai ALT, hasil dikategorikan menjadi:")
st.markdown("- **Normal (≤40):** ALT ≤ 40 U/L")
st.markdown("- **Sedikit Tinggi (41-80):** 41 ≤ ALT ≤ 80 U/L")
st.markdown("- **Tinggi (>80):** ALT > 80 U/L")
st.markdown("---")

st.markdown("### AST (Aspartate Aminotransferase)")
st.write(
    "Enzim yang ditemukan di hati dan jaringan lain. Nilai tinggi dapat mengindikasikan kerusakan hati. Nilai normal biasanya <40 U/L (diukur dalam **U/L**)."
)
st.markdown("##### Kategori AST (`ast_category`)")
st.write("Berdasarkan nilai AST, hasil dikategorikan menjadi:")
st.markdown("- **Normal (≤40):** AST ≤ 40 U/L")
st.markdown("- **Sedikit Tinggi (41-80):** 41 ≤ AST ≤ 80 U/L")
st.markdown("- **Tinggi (>80):** AST > 80 U/L")
st.markdown("---")

st.markdown("### Albumin")
st.write(
    "Protein utama dalam darah yang diproduksi oleh hati. Nilai rendah dapat mengindikasikan penyakit hati (diukur dalam **g/dL**)."
)
st.markdown("---")

st.markdown("### ALP (Alkaline Phosphatase)")
st.write(
    "Enzim yang ditemukan di hati, tulang, dan jaringan lain. Nilai tinggi dapat mengindikasikan penyakit hati atau tulang (diukur dalam **U/L**)."
)
st.markdown("---")

st.markdown("### BUN (Blood Urea Nitrogen)")
st.write(
    "Jumlah nitrogen dalam darah yang berasal dari urea. Digunakan untuk mengevaluasi fungsi ginjal (diukur dalam **mg/dL**)."
)
st.markdown("---")

st.markdown("### Kolesterol Total (Cholesterol)")
st.write("Jumlah total kolesterol dalam darah (diukur dalam **mg/dL**).")
st.markdown("---")

st.markdown("### CPK (Creatine Phosphokinase)")
st.write(
    "Enzim yang ditemukan di otot jantung, otot rangka, dan otak. Nilai tinggi dapat mengindikasikan kerusakan otot (diukur dalam **U/L**)."
)
st.markdown("---")

st.markdown("### Kreatinin (Creatinine)")
st.write(
    "Produk limbah dari otot yang disaring oleh ginjal. Digunakan untuk mengevaluasi fungsi ginjal (diukur dalam **mg/dL**)."
)
st.markdown("---")

st.markdown("### GGT (Gamma-Glutamyl Transferase)")
st.write(
    "Enzim hati yang sensitif terhadap kerusakan hati dan konsumsi alkohol (diukur dalam **U/L**)."
)
st.markdown("---")

st.markdown("### Glukosa (Glucose)")
st.write("Kadar gula darah (diukur dalam **mg/dL**).")
st.markdown("---")

st.markdown("### Bilirubin Total (Total Bilirubin)")
st.write(
    "Produk pemecahan sel darah merah. Nilai tinggi dapat menyebabkan jaundice dan mengindikasikan masalah hati (diukur dalam **mg/dL**)."
)
st.markdown("---")

st.markdown("### Kalsium Total (Total Calcium)")
st.write(
    "Mineral penting untuk tulang, otot, dan fungsi saraf (diukur dalam **mg/dL**)."
)
st.markdown("---")

st.markdown("### Protein Total (Total Protein)")
st.write(
    "Jumlah total protein dalam darah, termasuk albumin dan globulin (diukur dalam **g/dL**)."
)
st.markdown("---")

st.markdown("### Trigliserida (Triglycerides)")
st.write(
    "Jenis lemak (lipid) dalam darah. Kadar tinggi dapat meningkatkan risiko penyakit jantung (diukur dalam **mg/dL**)."
)
st.markdown("---")

st.markdown("### Asam Urat (Uric Acid)")
st.write(
    "Produk limbah dari pemecahan purin. Nilai tinggi dapat menyebabkan gout (diukur dalam **mg/dL**)."
)
st.markdown("---")

st.markdown("### LDH (Lactate Dehydrogenase)")
st.write(
    "Enzim yang ditemukan di hampir semua jaringan tubuh. Nilai tinggi dapat mengindikasikan kerusakan jaringan (diukur dalam **U/L**)."
)
st.markdown("---")

st.markdown("### Globulin")
st.write(
    "Kelompok protein dalam darah yang membantu melawan infeksi (diukur dalam **g/dL**)."
)
st.markdown("---")

st.markdown("### Osmolalitas (Osmolality)")
st.write(
    "Ukuran konsentrasi partikel dalam darah, membantu mengevaluasi keseimbangan air dan elektrolit (diukur dalam **mOsm/kg**)."
)
st.markdown("---")

# Target Variable
st.markdown("### Status Hepatitis C (HCV Status) - Variabel Target")
st.write("Menunjukkan apakah pasien terinfeksi Hepatitis C:")
st.markdown("- **0:** Negatif (Tidak Terinfeksi)")
st.markdown("- **1:** Positif (Terinfeksi)")
st.markdown("---")

# Additional Information
st.markdown("### Dataset Split")
st.write("Pembagian dataset untuk machine learning:")
st.markdown("- **train:** Data pelatihan untuk melatih model")
st.markdown("- **validation:** Data validasi untuk tuning hyperparameter")
st.markdown("- **test:** Data pengujian untuk evaluasi final model")
st.markdown("---")

st.markdown("### Informasi Penting")
st.write(
    "Dataset ini berasal dari NHANES (National Health and Nutrition Examination Survey) "
    "dan telah diproses untuk analisis Hepatitis C. Semua nilai numerik telah dinormalisasi "
    "dan missing values telah ditangani sesuai dengan standar preprocessing."
)

### Prediction App

In [None]:
%%writefile "pages/3_Prediction App.py"
import streamlit as st
import pandas as pd
import joblib
import os
import sys

base_path = "/content/drive/MyDrive/ComBio/Models"  # Path lokal untuk model

st.set_page_config(layout="wide", page_title="🧬 Aplikasi Prediksi Hepatitis C")

st.title("🧬 Aplikasi Prediksi Hepatitis C")
st.markdown(
    """
Masukkan detail pasien untuk memprediksi status Hepatitis C dari berbagai model machine learning.
Arahkan kursor ke ikon (?) untuk melihat penjelasan singkat setiap parameter.
"""
)


@st.cache_resource
def load_model(path):
    if os.path.exists(path):
        return joblib.load(path)
    return None


# Nilai default untuk contoh pasien Hepatitis C Negatif
negative_values = {
    "age": 35,
    "gender": 2,  # Wanita
    "race_ethnicity": 3,  # Kulit Putih Non-Hispanik
    "education_level": 4,  # Beberapa Perguruan Tinggi/AA
    "income_ratio": 2.5,
    "marital_status": 1,  # Menikah
    "jaundice_history": 2,  # Tidak
    "receive_transfusion": 2,  # Tidak
    "liver_condition": 2,  # Tidak
    "injectable_drug_use": 2,  # Tidak
    "drug_use": 2,  # Tidak
    "alcohol_consumption": 2,  # Peminum Ringan/Sedang
    "alt": 20.0,
    "albumin": 4.2,
    "alp": 60.0,
    "ast": 25.0,
    "bun": 15.0,
    "cholesterol": 180.0,
    "cpk": 100.0,
    "creatinine": 0.8,
    "ggt": 20.0,
    "glucose": 95.0,
    "total_bilirubin": 0.5,
    "total_calcium": 9.5,
    "total_protein": 7.2,
    "triglycerides": 120.0,
    "uric_acid": 5.0,
    "ldh": 140.0,
    "globulin": 3.0,
    "osmolality": 280.0,
}

# Nilai default untuk contoh pasien Hepatitis C Positif
positive_values = {
    "age": 55,
    "gender": 1,  # Pria
    "race_ethnicity": 4,  # Kulit Hitam Non-Hispanik
    "education_level": 2,  # Kelas 9-11
    "income_ratio": 1.2,
    "marital_status": 2,  # Bercerai/Berpisah
    "jaundice_history": 1,  # Ya
    "receive_transfusion": 1,  # Ya
    "liver_condition": 1,  # Ya
    "injectable_drug_use": 1,  # Ya
    "drug_use": 1,  # Ya
    "alcohol_consumption": 1,  # Peminum Berat
    "alt": 85.0,
    "albumin": 3.2,
    "alp": 120.0,
    "ast": 95.0,
    "bun": 25.0,
    "cholesterol": 220.0,
    "cpk": 180.0,
    "creatinine": 1.3,
    "ggt": 85.0,
    "glucose": 110.0,
    "total_bilirubin": 1.8,
    "total_calcium": 8.8,
    "total_protein": 6.5,
    "triglycerides": 200.0,
    "uric_acid": 7.2,
    "ldh": 220.0,
    "globulin": 2.2,
    "osmolality": 290.0,
}


with st.sidebar:
    st.header("Konfigurasi Model")

    example_option = st.selectbox(
        "Contoh Input",
        ["Hepatitis C Negatif", "Hepatitis C Positif"],
        help="Pilih contoh input yang ingin digunakan."
    )

    st.header("Status Model")
    default_values = negative_values if example_option == "Hepatitis C Negatif" else positive_values

    # Model paths - simplified to use Models/ folder directly
    model_paths = {
        "XGBoost": f"{base_path}/XGBoost.pkl",
        "Random Forest": f"{base_path}/Random Forest.pkl",
        "Gradient Boosting": f"{base_path}/Gradient Boosting.pkl",
        "Decision Tree": f"{base_path}/Decision Tree.pkl",
        "CatBoost": f"{base_path}/CatBoost.pkl",
    }
    scaler_path = f"{base_path}/scaler.joblib"

    models = {}
    all_models_loaded = True
    failed_models = []

    for name, path in model_paths.items():
        model = load_model(path)
        if model:
            models[name] = model
        else:
            all_models_loaded = False
            failed_models.append(name)

    if failed_models:
        for name in failed_models:
            st.error(f"{name} gagal dimuat")
    else:
        st.success(f"✅ Semua model berhasil dimuat!")

    scaler = load_model(scaler_path)
    if scaler is None:
        st.error(f"Scaler gagal dimuat")
        all_models_loaded = False


# Fitur yang digunakan dalam model Hepatitis C
features = [
    "age",
    "gender",
    "race_ethnicity",
    "education_level",
    "income_ratio",
    "marital_status",
    "jaundice_history",
    "receive_transfusion",
    "liver_condition",
    "injectable_drug_use",
    "drug_use",
    "alcohol_consumption",
    "alt",
    "albumin",
    "alp",
    "ast",
    "bun",
    "cholesterol",
    "cpk",
    "creatinine",
    "ggt",
    "glucose",
    "total_bilirubin",
    "total_calcium",
    "total_protein",
    "triglycerides",
    "uric_acid",
    "ldh",
    "globulin",
    "osmolality",
]

input_data = {}

# Form input dibagi menjadi 3 kolom
col1, col2, col3 = st.columns(3)

with col1:
    st.subheader("Informasi Demografis")

    input_data["age"] = st.number_input(
        "Usia (tahun)",
        min_value=0,
        max_value=120,
        value=default_values["age"],
        help="Masukkan usia pasien dalam tahun.",
    )

    gender_options = ["Pria", "Wanita"]
    gender_map = {"Pria": 1, "Wanita": 2}
    gender_default = "Pria" if default_values["gender"] == 1 else "Wanita"
    gender = st.selectbox(
        "Jenis Kelamin",
        gender_options,
        index=gender_options.index(gender_default),
        help="Pilih jenis kelamin biologis pasien.",
    )
    input_data["gender"] = gender_map[gender]

    race_options = {
        "Meksiko Amerika": 1,
        "Amerika Lainnya": 2,
        "Kulit Putih Non-Hispanik": 3,
        "Kulit Hitam Non-Hispanik": 4,
        "Ras Lainnya": 5
    }
    race_default = [k for k, v in race_options.items() if v == default_values["race_ethnicity"]][0]
    race = st.selectbox(
        "Ras/Etnisitas",
        list(race_options.keys()),
        index=list(race_options.keys()).index(race_default),
        help="Pilih kategori ras dan etnisitas pasien.",
    )
    input_data["race_ethnicity"] = race_options[race]

    education_options = {
        "Kurang dari Kelas 9": 1,
        "Kelas 9-11": 2,
        "Lulusan SMA/GED": 3,
        "Beberapa Perguruan Tinggi/AA": 4,
        "Sarjana atau Lebih": 5
    }
    education_default = [k for k, v in education_options.items() if v == default_values["education_level"]][0]
    education = st.selectbox(
        "Tingkat Pendidikan",
        list(education_options.keys()),
        index=list(education_options.keys()).index(education_default),
        help="Pilih tingkat pendidikan tertinggi yang dicapai.",
    )
    input_data["education_level"] = education_options[education]

    input_data["income_ratio"] = st.number_input(
        "Rasio Pendapatan",
        min_value=0.0,
        max_value=10.0,
        value=default_values["income_ratio"],
        step=0.1,
        help="Rasio pendapatan keluarga terhadap garis kemiskinan federal.",
    )

    marital_options = {
        "Menikah": 1,
        "Janda": 2,
        "Bercerai": 3,
        "Berpisah": 4,
        "Tidak Pernah Menikah": 5,
        "Hidup Bersama": 6
    }
    marital_default = [k for k, v in marital_options.items() if v == default_values["marital_status"]][0]
    marital = st.selectbox(
        "Status Pernikahan",
        list(marital_options.keys()),
        index=list(marital_options.keys()).index(marital_default),
        help="Pilih status pernikahan saat ini.",
    )
    input_data["marital_status"] = marital_options[marital]

with col2:
    st.subheader("Faktor Risiko")

    jaundice_options = ["Tidak", "Ya"]
    jaundice_map = {"Tidak": 2, "Ya": 1}
    jaundice_default = "Ya" if default_values["jaundice_history"] == 1 else "Tidak"
    jaundice = st.selectbox(
        "Riwayat Jaundice",
        jaundice_options,
        index=jaundice_options.index(jaundice_default),
        help="Apakah pasien pernah mengalami penyakit kuning?",
    )
    input_data["jaundice_history"] = jaundice_map[jaundice]

    transfusion_options = ["Tidak", "Ya"]
    transfusion_map = {"Tidak": 2, "Ya": 1}
    transfusion_default = "Ya" if default_values["receive_transfusion"] == 1 else "Tidak"
    transfusion = st.selectbox(
        "Menerima Transfusi",
        transfusion_options,
        index=transfusion_options.index(transfusion_default),
        help="Apakah pasien pernah menerima transfusi darah?",
    )
    input_data["receive_transfusion"] = transfusion_map[transfusion]

    liver_options = ["Tidak", "Ya"]
    liver_map = {"Tidak": 2, "Ya": 1}
    liver_default = "Ya" if default_values["liver_condition"] == 1 else "Tidak"
    liver = st.selectbox(
        "Kondisi Hati",
        liver_options,
        index=liver_options.index(liver_default),
        help="Apakah pasien memiliki riwayat penyakit hati?",
    )
    input_data["liver_condition"] = liver_map[liver]

    injectable_options = ["Tidak", "Ya"]
    injectable_map = {"Tidak": 2, "Ya": 1}
    injectable_default = "Ya" if default_values["injectable_drug_use"] == 1 else "Tidak"
    injectable = st.selectbox(
        "Penggunaan Obat Suntik",
        injectable_options,
        index=injectable_options.index(injectable_default),
        help="Apakah pasien pernah menggunakan obat-obatan suntik?",
    )
    input_data["injectable_drug_use"] = injectable_map[injectable]

    drug_options = ["Tidak", "Ya"]
    drug_map = {"Tidak": 2, "Ya": 1}
    drug_default = "Ya" if default_values["drug_use"] == 1 else "Tidak"
    drug = st.selectbox(
        "Penggunaan Obat",
        drug_options,
        index=drug_options.index(drug_default),
        help="Apakah pasien memiliki riwayat penggunaan obat-obatan?",
    )
    input_data["drug_use"] = drug_map[drug]

    alcohol_options = ["Peminum Ringan/Sedang", "Peminum Berat"]
    alcohol_map = {"Peminum Ringan/Sedang": 2, "Peminum Berat": 1}
    alcohol_default = "Peminum Berat" if default_values["alcohol_consumption"] == 1 else "Peminum Ringan/Sedang"
    alcohol = st.selectbox(
        "Konsumsi Alkohol",
        alcohol_options,
        index=alcohol_options.index(alcohol_default),
        help="Pilih tingkat konsumsi alkohol pasien.",
    )
    input_data["alcohol_consumption"] = alcohol_map[alcohol]

with col3:
    st.subheader("Tes Laboratorium")

    input_data["alt"] = st.number_input(
        "ALT (U/L)",
        min_value=0.0,
        max_value=500.0,
        value=default_values["alt"],
        step=1.0,
        help="Alanine Aminotransferase - enzim hati.",
    )

    input_data["ast"] = st.number_input(
        "AST (U/L)",
        min_value=0.0,
        max_value=500.0,
        value=default_values["ast"],
        step=1.0,
        help="Aspartate Aminotransferase - enzim hati.",
    )

    input_data["albumin"] = st.number_input(
        "Albumin (g/dL)",
        min_value=1.0,
        max_value=6.0,
        value=default_values["albumin"],
        step=0.1,
        help="Protein utama dalam darah yang diproduksi oleh hati.",
    )

    input_data["alp"] = st.number_input(
        "ALP (U/L)",
        min_value=0.0,
        max_value=300.0,
        value=default_values["alp"],
        step=1.0,
        help="Alkaline Phosphatase - enzim yang ditemukan di hati dan tulang.",
    )

    input_data["bun"] = st.number_input(
        "BUN (mg/dL)",
        min_value=0.0,
        max_value=100.0,
        value=default_values["bun"],
        step=1.0,
        help="Blood Urea Nitrogen - indikator fungsi ginjal.",
    )

    input_data["cholesterol"] = st.number_input(
        "Kolesterol Total (mg/dL)",
        min_value=50.0,
        max_value=500.0,
        value=default_values["cholesterol"],
        step=1.0,
        help="Jumlah total kolesterol dalam darah.",
    )

# Kolom tambahan untuk tes laboratorium lainnya
st.markdown("---")
st.subheader("Tes Laboratorium")

col4, col5, col6 = st.columns(3)

with col4:
    input_data["cpk"] = st.number_input(
        "CPK (U/L)",
        min_value=0.0,
        max_value=1000.0,
        value=default_values["cpk"],
        step=1.0,
        help="Creatine Phosphokinase - enzim otot.",
    )

    input_data["creatinine"] = st.number_input(
        "Kreatinin (mg/dL)",
        min_value=0.1,
        max_value=10.0,
        value=default_values["creatinine"],
        step=0.1,
        help="Indikator fungsi ginjal.",
    )

    input_data["ggt"] = st.number_input(
        "GGT (U/L)",
        min_value=0.0,
        max_value=500.0,
        value=default_values["ggt"],
        step=1.0,
        help="Gamma-Glutamyl Transferase - enzim hati.",
    )

    input_data["glucose"] = st.number_input(
        "Glukosa (mg/dL)",
        min_value=30.0,
        max_value=500.0,
        value=default_values["glucose"],
        step=1.0,
        help="Kadar gula darah.",
    )

with col5:
    input_data["total_bilirubin"] = st.number_input(
        "Bilirubin Total (mg/dL)",
        min_value=0.0,
        max_value=20.0,
        value=default_values["total_bilirubin"],
        step=0.1,
        help="Produk pemecahan sel darah merah.",
    )

    input_data["total_calcium"] = st.number_input(
        "Kalsium Total (mg/dL)",
        min_value=5.0,
        max_value=15.0,
        value=default_values["total_calcium"],
        step=0.1,
        help="Mineral penting untuk tulang dan otot.",
    )

    input_data["total_protein"] = st.number_input(
        "Protein Total (g/dL)",
        min_value=3.0,
        max_value=10.0,
        value=default_values["total_protein"],
        step=0.1,
        help="Jumlah total protein dalam darah.",
    )

    input_data["triglycerides"] = st.number_input(
        "Trigliserida (mg/dL)",
        min_value=20.0,
        max_value=1000.0,
        value=default_values["triglycerides"],
        step=1.0,
        help="Jenis lemak dalam darah.",
    )

with col6:
    input_data["uric_acid"] = st.number_input(
        "Asam Urat (mg/dL)",
        min_value=1.0,
        max_value=15.0,
        value=default_values["uric_acid"],
        step=0.1,
        help="Produk limbah dari pemecahan purin.",
    )

    input_data["ldh"] = st.number_input(
        "LDH (U/L)",
        min_value=50.0,
        max_value=1000.0,
        value=default_values["ldh"],
        step=1.0,
        help="Lactate Dehydrogenase - enzim jaringan.",
    )

    input_data["globulin"] = st.number_input(
        "Globulin (g/dL)",
        min_value=1.0,
        max_value=6.0,
        value=default_values["globulin"],
        step=0.1,
        help="Protein yang membantu melawan infeksi.",
    )

    input_data["osmolality"] = st.number_input(
        "Osmolalitas (mOsm/kg)",
        min_value=200.0,
        max_value=400.0,
        value=default_values["osmolality"],
        step=1.0,
        help="Konsentrasi partikel dalam darah.",
    )

st.markdown("---")

if st.button("🔬 Prediksi Status Hepatitis C", type="primary"):
    if not all_models_loaded:
        st.error("Beberapa model gagal dimuat. Tidak dapat melanjutkan.")
    elif scaler is None:
        st.error("Scaler gagal dimuat. Tidak dapat melanjutkan.")
    else:
        try:
            input_df = pd.DataFrame([input_data])[features]

            st.subheader("📋 Data yang Diinput")
            st.dataframe(input_df, hide_index=True)

            # Normalisasi data
            input_scaled = scaler.transform(input_df)

            st.subheader(f"🎯 Hasil Prediksi Model")
            results = {}

            # Prediksi dengan semua model
            for model_name, model in models.items():
                try:
                    pred = model.predict(input_scaled)[0]
                    if pred == 1:
                        prob = model.predict_proba(input_scaled)[0][1]
                    else:
                        prob = model.predict_proba(input_scaled)[0][0]
                    label = "Positif" if pred == 1 else "Negatif"
                    results[model_name] = (label, prob)
                except Exception as e:
                    st.error(f"Kesalahan prediksi untuk {model_name}: {e}")
                    results[model_name] = ("Kesalahan", None)

            # Tampilkan hasil
            cols = st.columns(len(results))
            for i, (name, (label, prob)) in enumerate(results.items()):
                with cols[i]:
                    st.markdown(
                        """
                    <style>
                    [data-testid="stMetricValue"] {
                        font-size: 25px;
                        font-weight: bold;
                    }
                    </style>
                    """,
                        unsafe_allow_html=True,
                    )

                    st.metric(name, label)
                    if prob is not None:
                        st.write(f"Probabilitas: {prob * 100:.2f}%")
                    else:
                        st.write("Tidak tersedia")

            # Konsensus hasil
            positive_count = sum(1 for label, _ in results.values() if "Positif" in str(label))
            total_models = len([r for r in results.values() if r[0] != "Kesalahan"])

            if total_models > 0:
                consensus_percentage = (positive_count / total_models) * 100

                st.markdown("---")
                st.subheader("📊 Konsensus Model")

                if consensus_percentage >= 70:
                    st.error(f"⚠️ **RISIKO TINGGI**: {positive_count}/{total_models} model memprediksi Hepatitis C Positif ({consensus_percentage:.0f}%)")
                    st.markdown("**Rekomendasi:** Segera konsultasi dengan dokter untuk pemeriksaan lebih lanjut.")
                elif consensus_percentage >= 30:
                    st.warning(f"⚡ **RISIKO SEDANG**: {positive_count}/{total_models} model memprediksi Hepatitis C Positif ({consensus_percentage:.0f}%)")
                    st.markdown("**Rekomendasi:** Disarankan untuk melakukan pemeriksaan laboratorium.")
                else:
                    st.success(f"✅ **RISIKO RENDAH**: {positive_count}/{total_models} model memprediksi Hepatitis C Positif ({consensus_percentage:.0f}%)")
                    st.markdown("**Rekomendasi:** Tetap jaga pola hidup sehat dan lakukan pemeriksaan rutin.")


        except Exception as e:
            st.error(f"Kesalahan dalam proses prediksi: {e}")

## Run Streamlit

In [None]:
!pkill -f ngrok
!pkill -f streamlit

def run():
    !streamlit run Dashboard.py --server.fileWatcherType none --server.port 8501 > /content/logs.txt

thread = threading.Thread(target=run)
thread.start()
time.sleep(2)

public_url = ngrok.connect(addr="8501")
print(f"🔗 Akses Streamlit : {public_url}")

In [None]:
!pkill -f ngrok
!pkill -f streamlit