In [31]:
import os

csv_path = r"F:\data Analyst\project\JN\Kindney Diseases3\CSV"

print("Folder ke andar ye files hain:")
print(os.listdir(csv_path))


Folder ke andar ye files hain:
['acute_kidney_injury_aki (1).csv', 'acute_kidney_injury_aki.csv', 'chronic_kidney_disease.csv', 'glomerulonephritis.csv', 'kidney_failure_esrd.csv', 'nephrotic_syndrome.csv', 'renal_tubular_acidosis_rta.csv', 'synthetic_AKI_dataset.csv', 'uti_affecting_kidney.csv']


In [33]:
import pandas as pd
import os

csv_path = r"F:\data Analyst\project\JN\Kindney Diseases3\CSV"
dataframes = []
for file in os.listdir(csv_path):
    if file.endswith(".csv") or file.endswith(".CSV"):   # dono case cover
        filepath = os.path.join(csv_path, file)
        try:
            df = pd.read_csv(filepath)
            disease_name = os.path.splitext(file)[0]   # file name se label
            df["Label"] = disease_name
            dataframes.append(df)
            print(f"Loaded: {file} → {df.shape}")
        except Exception as e:
            print(f"⚠️ Error loading {file}: {e}")

if dataframes:
    final_df = pd.concat(dataframes, ignore_index=True)
    final_df.to_csv("merged_kidney_dataset.csv", index=False)
    print("✅ Merged dataset saved: merged_kidney_dataset.csv, shape =", final_df.shape)
else:
    print("❌ Abhi bhi koi CSV load nahi hui.")


Loaded: acute_kidney_injury_aki (1).csv → (800, 14)
Loaded: acute_kidney_injury_aki.csv → (800, 14)
Loaded: chronic_kidney_disease.csv → (800, 14)
Loaded: glomerulonephritis.csv → (800, 15)
Loaded: kidney_failure_esrd.csv → (800, 15)
Loaded: nephrotic_syndrome.csv → (800, 15)
Loaded: renal_tubular_acidosis_rta.csv → (800, 16)
Loaded: synthetic_AKI_dataset.csv → (1000, 9)
Loaded: uti_affecting_kidney.csv → (800, 16)
✅ Merged dataset saved: merged_kidney_dataset.csv, shape = (7400, 23)


In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, r2_score


In [2]:
df = pd.read_csv("merged_kidney_dataset.csv")
print("Dataset shape:", df.shape)

Dataset shape: (7400, 23)


In [3]:
df

Unnamed: 0,Age,Blood_Pressure,Serum_Creatinine,Blood_Urea,eGFR,Albumin,Urine_Protein,Hemoglobin,Sodium,Potassium,...,Label,Hematuria_flag,On_Dialysis_flag,Cholesterol,Bicarbonate,Blood_pH,Urine_Output(L/day),AKI_Diagnosis,Urine_Culture_Positive,Fever_flag
0,58,96,1.183615,89.386016,63.733595,3.69,0.00,10.0,142.4,4.05,...,acute_kidney_injury_aki (1),,,,,,,,,
1,45,143,3.302938,24.381352,1.000000,4.08,0.12,14.3,139.9,3.93,...,acute_kidney_injury_aki (1),,,,,,,,,
2,33,156,3.058597,72.085706,52.412168,3.73,0.41,16.6,132.4,5.14,...,acute_kidney_injury_aki (1),,,,,,,,,
3,52,99,1.879894,44.724641,1.000000,4.71,0.00,8.1,136.1,4.38,...,acute_kidney_injury_aki (1),,,,,,,,,
4,53,150,4.882418,80.062529,1.000000,3.84,0.01,14.7,140.0,4.47,...,acute_kidney_injury_aki (1),,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7395,60,142,2.556737,47.700000,95.300000,3.72,0.16,17.0,141.1,4.02,...,uti_affecting_kidney,,,,,,,,1.0,1.0
7396,28,142,2.013805,33.400000,94.900000,2.96,0.18,11.6,135.0,4.13,...,uti_affecting_kidney,,,,,,,,0.0,1.0
7397,58,132,1.376119,24.100000,85.700000,3.16,0.31,14.9,143.2,3.51,...,uti_affecting_kidney,,,,,,,,1.0,0.0
7398,82,140,1.315826,22.400000,128.400000,4.27,0.00,11.9,137.0,3.81,...,uti_affecting_kidney,,,,,,,,1.0,1.0


In [4]:
df.columns
df.isnull().sum()

Age                          0
Blood_Pressure               0
Serum_Creatinine             0
Blood_Urea                   0
eGFR                      1000
Albumin                   1000
Urine_Protein             1000
Hemoglobin                1000
Sodium                       0
Potassium                    0
Urine_Output_L_per_day    1000
WBC_count                 1000
Diagnosis                 1000
Label                        0
Hematuria_flag            6600
On_Dialysis_flag          6600
Cholesterol               6600
Bicarbonate               6600
Blood_pH                  6600
Urine_Output(L/day)       6400
AKI_Diagnosis             6400
Urine_Culture_Positive    6600
Fever_flag                6600
dtype: int64

In [5]:
# Disease ka solution dictionary
disease_solutions = {
    "AKI": "Acute Kidney Injury: Hydration maintain karo, nephrologist consult karo, avoid nephrotoxic drugs.",
    "CKD": "Chronic Kidney Disease: Low-salt diet, blood pressure control, dialysis/join nephrologist plan.",
    "uti_affecting_kidney": "Urinary Tract Infection: Antibiotics course, hydration, avoid holding urine.",
    "Kidney_Stone": "Kidney Stone: Hydration (3-4L/day), pain management, lithotripsy if needed.",
    "Nephrotic_Syndrome": "Nephrotic Syndrome: Steroids, salt restriction, protein monitoring.",
    "Renal_Tubular_Acidosis": "RTA: Bicarbonate therapy, potassium supplements.",
    "ESRD": "End Stage Renal Disease: Regular dialysis or kidney transplant.",
    "Glomerulonephritis": "Glomerulonephritis: Immunosuppressive drugs, blood pressure control."
}


In [6]:
# Features select
features = ['Age', 'Blood_Pressure', 'Serum_Creatinine', 'Blood_Urea',
            'Sodium', 'Potassium']

In [7]:
X = df[features]
y = df['Label']

In [8]:
# Encode labels
le = LabelEncoder()
y = le.fit_transform(y)

In [9]:
# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
# Train model
model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X_train, y_train)

0,1,2
,n_estimators,200
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [11]:
# suppose your trained model is named `model`
y_train_pred = model.predict(X_train)

print(r2_score(y_train, y_train_pred))

0.987660136455698


In [12]:
sample = [[57,93,2.81,34.20,138.7,4.45]]

In [13]:
# Predict
pred = model.predict(sample)
pred_disease = le.inverse_transform(pred)[0]



In [14]:
# Disease ka solution lao
solution = disease_solutions.get(pred_disease, "⚠️ Consult a nephrologist for detailed evaluation.")

In [15]:
print("🔮 Predicted Disease:", pred_disease)
print("💊 Suggested Solution:", solution)

🔮 Predicted Disease: uti_affecting_kidney
💊 Suggested Solution: Urinary Tract Infection: Antibiotics course, hydration, avoid holding urine.


In [17]:
import pickle
# Save model
with open("clinical_model.pkl", "wb") as file:
    pickle.dump(model, file)

print("✅ Model saved as clinical_model.pkl")

✅ Model saved as clinical_model.pkl
