In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, f1_score
from sklearn.preprocessing import StandardScaler
import pickle



In [10]:
# Load and preprocess the data
dataset = pd.read_csv('CKD.csv')
dataset = pd.get_dummies(dataset, drop_first=True)

In [11]:
# Define features and target
independent = dataset[['age', 'bp', 'al', 'su', 'bgr', 'bu', 'sc', 'sod', 'pot', 'hrmo', 'pcv', 'wc', 'rc',
                       'sg_b', 'sg_c', 'sg_d', 'sg_e', 'rbc_normal', 'pc_normal', 'pcc_present', 'ba_present',
                       'htn_yes', 'dm_yes', 'cad_yes', 'appet_yes', 'pe_yes', 'ane_yes']]
dependent = dataset[['classification_yes']]


In [12]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(independent, dependent, test_size=0.30, random_state=0)


In [13]:

# Convert y_train and y_test to 1D arrays
y_train = y_train.values.ravel()
y_test = y_test.values.ravel()



In [14]:
# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)



In [15]:
# Define and train the model
grid_params = {'n_estimators': [10, 100, 1000],
               'criterion': ['gini', 'entropy'],
               'max_features': ['auto', 'sqrt', 'log2']}
grid = GridSearchCV(RandomForestClassifier(), grid_params, refit=True, verbose=0, n_jobs=1, scoring='f1_weighted')



In [16]:
# Fit the model
try:
    grid.fit(X_train_scaled, y_train)
except Exception as e:
    print("Error during GridSearchCV:", e)

In [17]:
# Predictions and metrics
grid_predictions = grid.predict(X_test_scaled)
cm = confusion_matrix(y_test, grid_predictions)
print("Confusion Matrix:\n", cm)
clf_report = classification_report(y_test, grid_predictions)
print("Classification Report:\n", clf_report)
roc_auc = roc_auc_score(y_test, grid.predict_proba(X_test_scaled)[:, 1])
print(f"ROC AUC Score: {roc_auc}")
f1_macro = f1_score(y_test, grid_predictions, average='weighted')
print(f"Weighted F1 Score: {f1_macro}")

Confusion Matrix:
 [[44  1]
 [ 1 74]]
Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.98      0.98        45
           1       0.99      0.99      0.99        75

    accuracy                           0.98       120
   macro avg       0.98      0.98      0.98       120
weighted avg       0.98      0.98      0.98       120

ROC AUC Score: 0.9997037037037036
Weighted F1 Score: 0.9833333333333335


In [18]:
# Print best parameters
print(f"Best parameters: {grid.best_params_}")



Best parameters: {'criterion': 'gini', 'max_features': 'auto', 'n_estimators': 100}


In [19]:
# Save and load model
filename = 'RandomForestCKDModel.sav'
pickle.dump(grid, open(filename, 'wb'))
loaded_model = pickle.load(open(filename, 'rb'))



In [20]:
# Diet recommendation (simplified example)
def recommend_diet(prediction):
    if prediction == 1:
        return "Follow a low sodium, low potassium diet. Increase hydration and monitor protein intake."
    else:
        return "Maintain a balanced diet with regular health check-ups."


In [23]:

# User input and prediction
age = int(input("Enter your Age: "))
bp = float(input("Enter bp level: "))
al = float(input("Enter al level: "))
su = float(input("Enter su level: "))
bgr = float(input("Enter bgr level: "))
bu = float(input("Enter bu level: "))
sc = float(input("Enter sc level: "))
sod = float(input("Enter sod level: "))
pot = float(input("Enter pot level: "))
hrmo = float(input("Enter hrmo level: "))
pcv = float(input("Enter pcv level: "))
wc = float(input("Enter wc level: "))
rc = float(input("Enter rc level: "))
sg_b = int(input("Enter sg_b 0 or 1: "))
sg_c = int(input("Enter sg_c 0 or 1: "))
sg_d = int(input("Enter sg_d 0 or 1: "))
sg_e = int(input("Enter sg_e 0 or 1: "))
rbc_normal = int(input("Enter rbc_normal 0 or 1: "))
pc_normal = int(input("Enter pc_normal 0 or 1: "))
pcc_present = int(input("Enter pcc_present 0 or 1: "))
ba_present = int(input("Enter ba_present 0 or 1: "))
htn_yes = int(input("Enter htn_yes 0 or 1: "))
dm_yes = int(input("Enter dm_yes 0 or 1: "))
cad_yes = int(input("Enter cad_yes 0 or 1: "))
appet_yes = int(input("Enter appet_yes 0 or 1: "))
pe_yes = int(input("Enter pe_yes 0 or 1: "))
ane_yes = int(input("Enter ane_yes 0 or 1: "))



Enter your Age: 45
Enter bp level: 5
Enter al level: 6
Enter su level: 9
Enter bgr level: 8
Enter bu level: 7
Enter sc level: 87
Enter sod level: 8
Enter pot level: 7
Enter hrmo level: 8
Enter pcv level: 6
Enter wc level: 8
Enter rc level: 6
Enter sg_b 0 or 1: 0
Enter sg_c 0 or 1: 0
Enter sg_d 0 or 1: 0
Enter sg_e 0 or 1: 0
Enter rbc_normal 0 or 1: 0
Enter pc_normal 0 or 1: 0
Enter pcc_present 0 or 1: 0
Enter ba_present 0 or 1: 0
Enter htn_yes 0 or 1: 0
Enter dm_yes 0 or 1: 0
Enter cad_yes 0 or 1: 0
Enter appet_yes 0 or 1: 1
Enter pe_yes 0 or 1: 1
Enter ane_yes 0 or 1: 1


In [24]:
# Make prediction
input_features = [[age, bp, al, su, bgr, bu, sc, sod, pot, hrmo, pcv, wc, rc, sg_b, sg_c, sg_d, sg_e, rbc_normal,
                    pc_normal, pcc_present, ba_present, htn_yes, dm_yes, cad_yes, appet_yes, pe_yes, ane_yes]]
input_features_scaled = scaler.transform(input_features)
future_predictions = loaded_model.predict(input_features_scaled)
recommendation = recommend_diet(future_predictions[0])
print(f"Future Prediction: {future_predictions[0]}")
print(f"Diet Recommendation: {recommendation}")

Future Prediction: 1
Diet Recommendation: Follow a low sodium, low potassium diet. Increase hydration and monitor protein intake.


  "X does not have valid feature names, but"
