In [2]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.46.0-py3-none-any.whl.metadata (9.0 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.46.0-py3-none-any.whl (10.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m69.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m105.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [5]:
import streamlit as st
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import ttest_ind, chi2_contingency

# Configure page
st.set_page_config(page_title='CKD One-Page Dashboard', layout='wide')

@st.cache_data
def load_data():
    df = pd.read_csv('cleaned_kidney_disease.csv')
    # Encode target
    df['class'] = df['class'].map({
        'Chronic Kidney Disease': 1,
        'Not Chronic Kidney Disease': 0
    })
    # Create age groups
    bins = [0, 30, 50, 70, df['age'].max() + 1]
    labels = ['<30', '30-50', '50-70', '70+']
    df['age_group'] = pd.cut(df['age'], bins=bins, labels=labels, right=False)
    return df

df = load_data()

# Data dictionary
data_dict = {
    'age': 'Years',
    'blood_pressure': 'Systolic mmHg',
    'serum_creatinine': 'mg/dL',
    'hemoglobin': 'g/dL',
    'blood_glucose_random': 'mg/dL',
    'packed_cell_volume': '%',
    'blood_urea': 'mg/dL',
    'albumin': 'Urine albumin (0–5)',
    'sugar': 'Urine sugar (0–5)',
    'hypertension': '1=Yes',
    'diabetes_mellitus': '1=Yes',
    'coronary_artery_disease': '1=Yes',
    'appetite': '1=Poor',
    'pedal_edema': '1=Yes',
    'anemia': '1=Yes'
}
st.sidebar.expander('Data Dictionary').write(data_dict)

# Sidebar filter
with st.sidebar:
    st.header('Filters')
    status = st.selectbox('CKD Status', ['All', 'CKD', 'Non-CKD'])
    if status == 'CKD':
        df = df[df['class'] == 1]
    elif status == 'Non-CKD':
        df = df[df['class'] == 0]

# Tabs
tabs = st.tabs([
    'Overview', 'Clinical', 'Categorical',
    'Stats', 'Correlation', 'Risk Factors', 'Incidence'
])

# Overview
with tabs[0]:
    st.subheader('CKD Overview')
    col1, col2 = st.columns(2)
    total = len(df)
    prev = df['class'].mean() * 100
    col1.metric('Total Patients', total)
    col2.metric('CKD Prevalence', f"{prev:.1f}%")

    # Prevalence by age group
    age_prev = df.groupby('age_group')['class'].mean() * 100
    fig, ax = plt.subplots()
    age_prev.plot(kind='bar', color='teal', ax=ax)
    ax.set_ylim(0, 100)
    ax.set_ylabel('% CKD')
    ax.set_title('Prevalence by Age Group')
    for i, v in enumerate(age_prev):
        ax.text(i, v + 2, f"{v:.1f}%", ha='center')
    st.pyplot(fig)

# Clinical
with tabs[1]:
    st.subheader('Clinical Indicators')
    indicators = ['serum_creatinine', 'hemoglobin', 'blood_glucose_random', 'packed_cell_volume']
    choice = st.selectbox('Indicator', indicators)
    fig, ax = plt.subplots()
    sns.boxplot(x='class', y=choice, data=df, palette='Set2', ax=ax)
    ax.set_xticklabels(['Non-CKD', 'CKD'])
    ax.set_ylabel(choice.replace('_', ' ').title())
    ax.set_title(f'{choice.replace("_", " ").title()} by CKD')
    st.pyplot(fig)

# Categorical
with tabs[2]:
    st.subheader('Categorical Predictors')
    cat_vars = ['hypertension', 'diabetes_mellitus', 'coronary_artery_disease', 'appetite', 'pedal_edema', 'anemia']
    for var in cat_vars:
        st.write(f"**{var.replace('_', ' ').title()}**")
        prop = df.groupby(var)['class'].value_counts(normalize=True).unstack().fillna(0) * 100
        prop = prop.rename(columns={0: 'Non-CKD', 1: 'CKD'})
        fig, ax = plt.subplots()
        prop.plot(kind='bar', ax=ax)
        ax.set_ylabel('% Patients')
        ax.set_xlabel(var.replace('_', ' ').title())
        ax.set_xticklabels(['No', 'Yes'], rotation=0)
        for p in ax.patches:
            ax.annotate(f"{p.get_height():.1f}%", (p.get_x() + p.get_width()/2, p.get_height()), ha='center', va='bottom')
        st.pyplot(fig)

# Stats
with tabs[3]:
    st.subheader('Statistical Tests')
    st.write('**Numeric (t-test)**')
    for var in ['age', 'blood_pressure', 'serum_creatinine', 'hemoglobin']:
        stat, p = ttest_ind(df[df['class'] == 1][var], df[df['class'] == 0][var])
        st.write(f"{var.replace('_',' ').title()}: p = {p:.3f}")
    st.write('**Categorical (Chi-square)**')
    for var in ['hypertension', 'diabetes_mellitus', 'pedal_edema', 'anemia']:
        cont = pd.crosstab(df[var], df['class'])
        chi2, p, _, _ = chi2_contingency(cont)
        st.write(f"{var.replace('_',' ').title()}: p = {p:.3f}")

# Correlation
with tabs[4]:
    st.subheader('Key Correlations')
    corr = df.corr(numeric_only=True)
    mask = np.abs(corr) >= 0.3
    filt = corr.where(mask).fillna(0)
    fig, ax = plt.subplots(figsize=(6, 6))
    sns.heatmap(filt, annot=True, fmt='.2f', cmap='coolwarm', ax=ax)
    ax.set_title('Significant Correlations (|r| ≥ 0.3)')
    st.pyplot(fig)

# Risk Factors
with tabs[5]:
    st.subheader('Risk Factors (Odds Ratios)')
    facts = ['hypertension', 'diabetes_mellitus', 'pedal_edema', 'anemia', 'coronary_artery_disease']
    or_vals = {}
    for var in facts:
        tab = pd.crosstab(df[var], df['class']) + 0.5
        a, b = tab.loc[1, 1], tab.loc[1, 0]
        c, d = tab.loc[0, 1], tab.loc[0, 0]
        or_vals[var.replace('_',' ').title()] = (a / b) / (c / d)
    or_df = pd.DataFrame.from_dict(or_vals, orient='index', columns=['OR']).sort_values('OR')
    fig, ax = plt.subplots()
    or_df['OR'].plot(kind='barh', color='orange', ax=ax)
    ax.set_xlabel('Odds Ratio')
    ax.set_title('Key Risk Factors for CKD')
    for i, v in enumerate(or_df['OR']):
        ax.text(v + or_df['OR'].max() * 0.01, i, f"{v:.1f}")
    st.pyplot(fig)

# Incidence
with tabs[6]:
    st.subheader('Incidence Analysis')
    # Albuminuria prevalence
    alb_prev = df.groupby('albumin')['class'].mean() * 100
    fig, ax = plt.subplots()
    alb_prev.plot(kind='bar', color='purple', ax=ax)
    ax.set_title('CKD Prevalence by Albuminuria Level')
    ax.set_xlabel('Albumin Level')
    ax.set_ylabel('Prevalence (%)')
    for i, v in enumerate(alb_prev):
        ax.text(i, v + 1, f"{v:.1f}%", ha='center')
    st.pyplot(fig)

    # Abnormal lab incidence
    defs = {
        'High Creatinine': df['serum_creatinine'] > 1.2,
        'Low Hemoglobin': df['hemoglobin'] < 12,
        'Proteinuria': df['albumin'] > 0,
        'Glycosuria': df['sugar'] > 0,
        'Pedal Edema': df['pedal_edema'] == 1,
        'Anemia': df['anemia'] == 1
    }
    data = []
    for lab, cond in defs.items():
        non = cond[df['class']==0].mean() * 100
        ck = cond[df['class']==1].mean() * 100
        data.append([non, ck])
    inc_df = pd.DataFrame(data, index=defs.keys(), columns=['Non-CKD (%)', 'CKD (%)'])
    fig, ax = plt.subplots(figsize=(6, 4))
    sns.heatmap(inc_df, annot=True, fmt='.1f', cmap='coolwarm', ax=ax)
    ax.set_title('Incidence of Abnormal Findings (%)')
    st.pyplot(fig)


2025-06-22 12:13:14.275 No runtime found, using MemoryCacheStorageManager
2025-06-22 12:13:14.281 No runtime found, using MemoryCacheStorageManager
  age_prev = df.groupby('age_group')['class'].mean() * 100

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(x='class', y=choice, data=df, palette='Set2', ax=ax)
  ax.set_xticklabels(['Non-CKD', 'CKD'])
