In [3]:
!pip install streamlit pandas numpy joblib scikit-learn xgboost


Collecting streamlit
  Downloading streamlit-1.50.0-py3-none-any.whl.metadata (9.5 kB)
Collecting altair!=5.4.0,!=5.4.1,<6,>=4.0 (from streamlit)
  Downloading altair-5.5.0-py3-none-any.whl.metadata (11 kB)
Collecting blinker<2,>=1.5.0 (from streamlit)
  Downloading blinker-1.9.0-py3-none-any.whl.metadata (1.6 kB)
Collecting cachetools<7,>=4.0 (from streamlit)
  Downloading cachetools-6.2.1-py3-none-any.whl.metadata (5.5 kB)
Collecting pyarrow>=7.0 (from streamlit)
  Downloading pyarrow-21.0.0-cp39-cp39-win_amd64.whl.metadata (3.4 kB)
Collecting tenacity<10,>=8.1.0 (from streamlit)
  Downloading tenacity-9.1.2-py3-none-any.whl.metadata (1.2 kB)
Collecting toml<2,>=0.10.1 (from streamlit)
  Downloading toml-0.10.2-py2.py3-none-any.whl.metadata (7.1 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-win_amd64.whl.metadata (44 kB)
Collecting gitpython!=3.1.19,<4,>=3.0.7 (from streamlit)
  Downloading gitpython-3.1.45-py3-none-any.whl.metadata (13 kB)


In [4]:
code = r"""
code = r"""
import streamlit as st
import pandas as pd
import numpy as np
import joblib, os

st.set_page_config(page_title="Liver Disease Prediction", page_icon="ðŸ©º")
st.title("ðŸ©º Liver Disease Prediction App")
st.write("KNN / XGBoost models ke saath manual input & CSV upload. Headers auto-fix + NaN-safe preprocessing.")

FEATURES = [
    "Age","Gender","Total_Bilirubin","Direct_Bilirubin","Alkaline_Phosphotase",
    "Alamine_Aminotransferase","Aspartate_Aminotransferase","Total_Protiens",
    "Albumin","Albumin_and_Globulin_Ratio"
]

@st.cache_resource
def load_models():
    m = {}
    if os.path.exists("best_knn_liver_model.joblib"): m["KNN"]=joblib.load("best_knn_liver_model.joblib")
    if os.path.exists("best_xgb_liver_model.joblib"): m["XGBoost"]=joblib.load("best_xgb_liver_model.joblib")
    if os.path.exists("scaler_for_knn.joblib"):       m["SCALER"]=joblib.load("scaler_for_knn.joblib")
    return m

MODELS = load_models()
AVAILABLE = [m for m in ["KNN","XGBoost"] if m in MODELS]
if not AVAILABLE:
    st.error("Model files nahi mile. Place best_knn_liver_model.joblib / best_xgb_liver_model.joblib (and scaler_for_knn.joblib for KNN).")
    st.stop()

model_choice = st.radio("Select model", AVAILABLE, horizontal=True)
st.caption("Required columns: " + ", ".join(FEATURES))

def normalize_headers(df: pd.DataFrame) -> pd.DataFrame:
    def norm(s): return str(s).strip().lower().replace(" ","_").replace("-","_").replace("/","_")
    mapping = {
        "age":"Age","gender":"Gender","total_bilirubin":"Total_Bilirubin","direct_bilirubin":"Direct_Bilirubin",
        "alkaline_phosphotase":"Alkaline_Phosphotase","alkaline_phosphatase":"Alkaline_Phosphotase",
        "alamine_aminotransferase":"Alamine_Aminotransferase","aspartate_aminotransferase":"Aspartate_Aminotransferase",
        "total_proteins":"Total_Protiens","total_protiens":"Total_Protiens",
        "albumin":"Albumin","albumin_and_globulin_ratio":"Albumin_and_Globulin_Ratio","agr":"Albumin_and_Globulin_Ratio","a_g_ratio":"Albumin_and_Globulin_Ratio"
    }
    return df.rename(columns={c: mapping.get(norm(c), c) for c in df.columns})

def preprocess(df_raw: pd.DataFrame) -> pd.DataFrame:
    df = df_raw.copy()
    df.columns = [c.strip() for c in df.columns]
    df = normalize_headers(df)

    missing = [c for c in FEATURES if c not in df.columns]
    if missing:
        # index-like extra col drop
        if df.shape[1] == len(FEATURES) + 1:
            first = df.columns[0]
            if str(first).lower().startswith("unnamed") or str(first).lower() in ("index",""):
                df = df.drop(columns=[first])
        if df.shape[1] == len(FEATURES):
            df.columns = FEATURES
        else:
            raise ValueError(f"Missing required columns: {missing}")

    if df["Gender"].dtype == object:
        df["Gender"] = df["Gender"].astype(str).str.strip().map({"Male":1,"Male ":1,"M":1,"male":1,"Female":0,"female":0,"F":0})

    X = df[FEATURES].copy()
    for col in FEATURES:
        if col != "Gender":
            X[col] = pd.to_numeric(X[col], errors="coerce")
    num_cols = X.select_dtypes(include=[np.number]).columns
    for c in num_cols:
        X[c] = X[c].fillna(X[c].median())
    X = X.fillna(X.median(numeric_only=True))

    if model_choice == "KNN" and "SCALER" in MODELS:
        X[num_cols] = MODELS["SCALER"].transform(X[num_cols])

    return X

def smart_read_csv(uploaded_file):
    tries = []
    def normal(f):   return pd.read_csv(f)
    def autosep(f):  return pd.read_csv(f, sep=None, engine="python")
    def headerless(f):
        df = pd.read_csv(f, header=None)
        if df.shape[1] == len(FEATURES) + 1:
            df = df.drop(columns=df.columns[0])
        if df.shape[1] != len(FEATURES):
            raise ValueError(f"Headerless CSV must have {len(FEATURES)} cols, found {df.shape[1]}")
        df.columns = FEATURES
        return df
    for reader in (normal, autosep, headerless):
        uploaded_file.seek(0)
        try:
            return reader(uploaded_file)
        except Exception as e:
            tries.append(str(e))
    raise ValueError("; ".join(tries))

tab1, tab2 = st.tabs(["ðŸ”¢ Manual Input", "ðŸ“„ CSV Upload"])

with tab1:
    st.subheader("Enter patient features")
    c1, c2 = st.columns(2)
    Age = c1.number_input("Age", 0, 120, 45)
    Gender = c2.selectbox("Gender", ["Male","Female"])
    Total_Bilirubin = c1.number_input("Total_Bilirubin", 0.0, 999.0, 1.0)
    Direct_Bilirubin = c2.number_input("Direct_Bilirubin", 0.0, 999.0, 0.3)
    Alkaline_Phosphotase = c1.number_input("Alkaline_Phosphotase", 0, 10000, 200)
    Alamine_Aminotransferase = c2.number_input("Alamine_Aminotransferase", 0, 10000, 30)
    Aspartate_Aminotransferase = c1.number_input("Aspartate_Aminotransferase", 0, 10000, 35)
    Total_Protiens = c2.number_input("Total_Protiens", 0.0, 999.0, 6.5)
    Albumin = c1.number_input("Albumin", 0.0, 999.0, 3.5)
    Albumin_and_Globulin_Ratio = c2.number_input("Albumin_and_Globulin_Ratio", 0.0, 999.0, 1.0)

    if st.button("Predict", use_container_width=True):
        row = {
            "Age": Age, "Gender": 1 if Gender=="Male" else 0,
            "Total_Bilirubin": Total_Bilirubin, "Direct_Bilirubin": Direct_Bilirubin,
            "Alkaline_Phosphotase": Alkaline_Phosphotase, "Alamine_Aminotransferase": Alamine_Aminotransferase,
            "Aspartate_Aminotransferase": Aspartate_Aminotransferase, "Total_Protiens": Total_Protiens,
            "Albumin": Albumin, "Albumin_and_Globulin_Ratio": Albumin_and_Globulin_Ratio
        }
        try:
            X = preprocess(pd.DataFrame([row]))
            model = MODELS[model_choice]
            pred = int(model.predict(X)[0])
            st.success(f"Prediction: **{'Liver Disease' if pred==1 else 'No Disease'}**")
            if hasattr(model, 'predict_proba'):
                st.write(f"Probability (class 1): **{model.predict_proba(X)[0,1]:.2f}**")
        except Exception as e:
            st.error(f"Error during prediction: {e}")

with tab2:
    st.subheader("Upload CSV (auto-fix headers/index)")
    debug = st.checkbox("Show debug info", value=True)
    file = st.file_uploader("Select CSV file", type=["csv"])
    if file is not None:
        try:
            raw = smart_read_csv(file)
            if debug:
                st.write("ðŸ”¹ Raw detected columns:", list(raw.columns))
                st.write("ðŸ”¹ Raw head:", raw.head())
            X = preprocess(raw)
            if debug:
                st.write("ðŸ”¹ Final dtypes:", {k:str(v) for k,v in X.dtypes.items()})
                st.write("ðŸ”¹ NaN counts:", X.isna().sum().to_dict())
            model = MODELS[model_choice]
            preds = model.predict(X)
            out = raw.copy()
            out["prediction"] = preds
            if hasattr(model, "predict_proba"):
                out["proba_1"] = model.predict_proba(X)[:,1]
            st.write("Sample predictions:", out.head())
            st.download_button(label="Download predictions CSV",
                               data=out.to_csv(index=False).encode("utf-8"),
                               file_name="liver_predictions.csv",
                               mime="text/csv")
        except Exception as e:
            st.error(f"Failed to score CSV: {e}")

st.info("Gender: Male=1, Female=0. Numeric NaNs -> median. KNN uses saved scaler.")
"""
with open("app.py","w",encoding="utf-8") as f:
    f.write(code)
print("âœ… app.py overwritten cleanly.")

"""
with open("app.py", "w", encoding="utf-8") as f:
    f.write(code)
print("âœ… app.py overwritten cleanly.")


2025-10-28 13:57:05.753 
  command:

    streamlit run C:\Users\HP\anaconda3\envs\datascience_env\lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
2025-10-28 13:57:17.110 Session state does not function when running a script without `streamlit run`


âœ… app.py overwritten cleanly.
