In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


sns.set_theme(style="whitegrid", palette="pastel")
plt.rcParams['figure.figsize'] = (10, 6)


try:
    df = pd.read_csv('new_model.csv')


    if df.duplicated().sum() > 0:
        df.drop_duplicates(inplace=True)

    for col in df.columns:
        if df[col].dtype != 'object':
            df[col] = df[col].fillna(df[col].median())

    X = df.drop('Class', axis=1)
    y = df['Class']


    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Scaling
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # ---------------------------------------------------------
    # 2. Hyperparameters ÿ™ŸÖŸÜÿπ ÿßŸÑŸÄ Overfitting
    # ---------------------------------------------------------
    print("\n--- Training Robust Model  ---")
    rf_model = RandomForestClassifier(
        n_estimators=100,
        max_depth=5,
        min_samples_leaf=4,
        min_samples_split=10,
        random_state=42
    )

    rf_model.fit(X_train_scaled, y_train)

    # ---------------------------------------------------------
    # 3.  Train vs Test Score
    # ---------------------------------------------------------
    train_pred = rf_model.predict(X_train_scaled)
    test_pred = rf_model.predict(X_test_scaled)

    train_acc = accuracy_score(y_train, train_pred)
    test_acc = accuracy_score(y_test, test_pred)

    print(f" Training Accuracy: {train_acc*100:.2f}%")
    print(f" Testing Accuracy:  {test_acc*100:.2f}%")

    diff = train_acc - test_acc
    if diff > 0.05:
        print(f" Warning: Still potential overfitting (Gap: {diff*100:.1f}%)ËÄÖ„Å´Êïô„Åà„Å¶„ÇÇ„Çâ„Å£„Åü")
    else:
        print(f"Great! The gap is small ({diff*100:.1f}%), meaning the model generalizes well.")

    # ---------------------------------------------------------
    # 4. Cross Validation
    # ---------------------------------------------------------
    cv_scores = cross_val_score(rf_model, X_train_scaled, y_train, cv=5)
    print(f"\n Cross-Validation Mean Score: {cv_scores.mean()*100:.2f}% (Std: {cv_scores.std():.4f})")


except FileNotFoundError:
    print("Error: 'new_model.csv' not found. Please make sure the file is in the correct directory.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")


--- Training Robust Model  ---
 Training Accuracy: 99.06%
 Testing Accuracy:  100.00%
Great! The gap is small (-0.9%), meaning the model generalizes well.

 Cross-Validation Mean Score: 98.75% (Std: 0.0250)


In [2]:
!pip install dash plotly pandas scikit-learn -q
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from dash import Dash, dcc, html, Input, Output, callback
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix


try:
    df = pd.read_csv('new_model.csv')


    if df.duplicated().sum() > 0: df.drop_duplicates(inplace=True)
    for col in df.columns:
        if df[col].dtype != 'object': df[col] = df[col].fillna(df[col].median())


    X = df.drop('Class', axis=1)
    y = df['Class']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    rf = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
    rf.fit(X_train, y_train)

    acc = accuracy_score(y_test, rf.predict(X_test))

    # ÿ≠ÿ≥ÿßÿ® ÿ£ŸáŸÖŸäÿ© ÿßŸÑŸÖŸäÿ≤ÿßÿ™ ŸÑŸÑÿ±ÿ≥ŸÖ
    importances = rf.feature_importances_
    feat_df = pd.DataFrame({'Feature': X.columns, 'Importance': importances}).sort_values(by='Importance', ascending=False)

except Exception as e:
    print(f"Error loading data: {e}")

    df = pd.DataFrame(columns=['Hemo', 'Bp', 'Class'])

# ==========================================
# 3. ÿ™ÿµŸÖŸäŸÖ ÿßŸÑÿØÿßÿ¥ ÿ®Ÿàÿ±ÿØ (Layout)
# ==========================================
# (Cyberpunk Style)
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = Dash(__name__, external_stylesheets=external_stylesheets)

colors = {
    'background': '#0f0f0f',
    'text': '#00ffcc',
    'panel': '#1a1a1a'
}

app.layout = html.Div(style={'backgroundColor': colors['background'], 'color': 'white', 'padding': '20px', 'minHeight': '100vh'}, children=[

    # ÿßŸÑÿπŸÜŸàÿßŸÜ
    html.H1(" Kidney Disease AI Dashboard", style={'textAlign': 'center', 'color': colors['text'], 'fontWeight': 'bold'}),
    html.Div(f"Model Accuracy: {acc*100:.2f}%", style={'textAlign': 'center', 'fontSize': '20px', 'color': '#ffcc00', 'marginBottom': '30px'}),


    html.Div([
        # Feature Importance
        html.Div([
            html.H4("Top Risk Factors", style={'textAlign': 'center'}),
            dcc.Graph(
                figure=px.bar(feat_df.head(7), x='Importance', y='Feature', orientation='h',
                              template='plotly_dark', color='Importance', color_continuous_scale='Viridis')
                              .update_layout(paper_bgcolor=colors['panel'], plot_bgcolor=colors['panel'])
            )
        ], style={'width': '48%', 'display': 'inline-block', 'verticalAlign': 'top'}),

        # Scatter Plot
        html.Div([
            html.H4("Interactive Analysis ", style={'textAlign': 'center', 'color': '#00ffcc'}),


            html.Div([
                html.Label("Select X Axis:"),
                dcc.Dropdown(
                    id='xaxis-column',
                    options=[{'label': i, 'value': i} for i in df.columns if i != 'Class'],
                    value=feat_df.iloc[0]['Feature'],
                    style={'color': 'black'}
                ),
            ], style={'width': '48%', 'display': 'inline-block'}),

            html.Div([
                html.Label("Select Y Axis:"),
                dcc.Dropdown(
                    id='yaxis-column',
                    options=[{'label': i, 'value': i} for i in df.columns if i != 'Class'],
                    value=feat_df.iloc[1]['Feature'], # Default: 2nd Most important
                    style={'color': 'black'}
                ),
            ], style={'width': '48%', 'display': 'inline-block', 'float': 'right'}),


            dcc.Graph(id='indicator-graphic')

        ], style={'width': '48%', 'display': 'inline-block', 'float': 'right', 'backgroundColor': colors['panel'], 'padding': '10px', 'borderRadius': '10px'})

    ], style={'marginBottom': '20px'}),


    html.Div([
        html.H4("Distribution Analysis", style={'textAlign': 'center'}),
        dcc.Graph(id='dist-graphic')
    ], style={'width': '100%', 'backgroundColor': colors['panel'], 'padding': '10px', 'borderRadius': '10px'})

])

# ==========================================
# 4. ÿßŸÑÿ™ŸÅÿßÿπŸÑ (Callbacks) - ŸÖÿÆ ÿßŸÑÿ™ÿ∑ÿ®ŸäŸÇ
# ==========================================
@app.callback(
    [Output('indicator-graphic', 'figure'),
     Output('dist-graphic', 'figure')],
    [Input('xaxis-column', 'value'),
     Input('yaxis-column', 'value')]
)
def update_graph(xaxis_name, yaxis_name):
    # 1. Scatter Plot
    fig1 = px.scatter(df, x=xaxis_name, y=yaxis_name, color='Class',
                     color_discrete_map={0: '#00ccff', 1: '#ff3333'},
                     template='plotly_dark', title=f'{xaxis_name} vs {yaxis_name}')
    fig1.update_layout(transition_duration=500, paper_bgcolor=colors['panel'], plot_bgcolor=colors['panel'])

    # 2. Box Plot
    fig2 = px.box(df, x='Class', y=xaxis_name, color='Class',
                  color_discrete_map={0: '#00ccff', 1: '#ff3333'},
                  template='plotly_dark', title=f'Distribution of {xaxis_name} by Class')
    fig2.update_layout(paper_bgcolor=colors['panel'], plot_bgcolor=colors['panel'])

    return fig1, fig2

# ==========================================
# 5. ÿßŸÑÿ™ÿ¥ÿ∫ŸäŸÑ (Run)
# ==========================================
if __name__ == '__main__':

    app.run(jupyter_mode='inline', port=8051)


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [3]:
import joblib
joblib.dump(rf_model, 'kidney_model_final.pkl')
joblib.dump(scaler, 'scaler_final.pkl')
joblib.dump(X.columns, 'features_names.pkl')

print(" Model, Scaler, and Feature names saved successfully!")

 Model, Scaler, and Feature names saved successfully!


In [4]:
%%writefile app.py
import streamlit as st
import joblib
import numpy as np
import pandas as pd

# 1. ÿ™ÿ≠ŸÖŸäŸÑ ÿßŸÑŸÖŸÑŸÅÿßÿ™ ÿßŸÑŸÖÿ≠ŸÅŸàÿ∏ÿ©
model = joblib.load('kidney_model_final.pkl')
scaler = joblib.load('scaler_final.pkl')
feature_names = joblib.load('features_names.pkl')

# 2. ÿ™ÿ∏ÿ®Ÿäÿ∑ ÿ¥ŸÉŸÑ ÿßŸÑÿµŸÅÿ≠ÿ© (The Glow)
st.set_page_config(page_title="Kidney AI Doctor", page_icon="ü©∫", layout="centered")

st.markdown("""
    <style>
    .main {background-color: #0e1117;}
    h1 {color: #00ffcc; text-align: center;}
    .stButton>button {width: 100%; background-color: #ff4b4b; color: white;}
    </style>
    """, unsafe_allow_html=True)

st.title(" Kidney Disease Prediction AI")
st.write("### Please enter patient data below:")
col1, col2 = st.columns(2)
user_inputs = []


for i, col_name in enumerate(feature_names):
    with (col1 if i % 2 == 0 else col2):
        val = st.number_input(f"{col_name}", value=0.0, step=0.1)
        user_inputs.append(val)

if st.button("Analyze Result"):
    data_array = np.array([user_inputs])
    data_scaled = scaler.transform(data_array)


    prediction = model.predict(data_scaled)
    prob = model.predict_proba(data_scaled)[0][1]
    st.divider()

    if prediction[0] == 1:
        st.error(f" Positive for CKD (Chronic Kidney Disease)")
        st.write(f"Confidence: **{prob*100:.1f}%**")
        st.warning("Please consult a Nephrologist immediately.")
    else:
        st.success(f" Negative (Healthy)")
        st.write(f"Confidence: **{(1-prob)*100:.1f}%**")
        st.balloons()

Overwriting app.py


In [6]:
feature_names = joblib.load('features_names.pkl')
feature_names

Index(['Bp', 'Sg', 'Al', 'Su', 'Rbc', 'Bu', 'Sc', 'Sod', 'Pot', 'Hemo', 'Wbcc',
       'Rbcc', 'Htn'],
      dtype='object')

In [7]:
feature_names = joblib.load('scaler_final.pkl')
feature_names

In [8]:
df.nunique()

Bp        11
Sg         5
Al         6
Su         6
Rbc        2
Bu       118
Sc        85
Sod       35
Pot       41
Hemo     116
Wbcc      90
Rbcc      46
Htn        3
Class      2
dtype: int64