## Chronic Kidney Disease (CKD)

[Colab NoteBook](https://colab.research.google.com/drive/1Ygz6NTw-u_PvycUeTjgB3VxLAVvJkGXA)

[Github](https://github.com/z5208980/machine-learning-health/tree/main/chronic_kidney_disease) (dataset, model, notebook)

[Dataset Source](https://www.kaggle.com/datasets/mansoordaku/ckdisease)

In [42]:
import numpy as np
import pandas as pd
import pickle

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import binarize, LabelEncoder, MinMaxScaler, StandardScaler
from sklearn import metrics
from sklearn.metrics import accuracy_score, mean_squared_error, precision_recall_curve
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression

In [43]:
df = pd.read_csv('https://raw.githubusercontent.com/z5208980/machine-learning-health/main/chronic_kidney_disease/data/data.csv')
df.drop("id",axis=1,inplace=True)

df.head()

# Drop rows that have a column that has NaN
df.dropna(axis=0,inplace=True)
df

Unnamed: 0,age,bp,sg,al,su,rbc,pc,pcc,ba,bgr,...,pcv,wc,rc,htn,dm,cad,appet,pe,ane,classification
3,48.0,70.0,1.005,4.0,0.0,normal,abnormal,present,notpresent,117.0,...,32,6700,3.9,yes,no,no,poor,yes,yes,ckd
9,53.0,90.0,1.020,2.0,0.0,abnormal,abnormal,present,notpresent,70.0,...,29,12100,3.7,yes,yes,no,poor,no,yes,ckd
11,63.0,70.0,1.010,3.0,0.0,abnormal,abnormal,present,notpresent,380.0,...,32,4500,3.8,yes,yes,no,poor,yes,no,ckd
14,68.0,80.0,1.010,3.0,2.0,normal,abnormal,present,present,157.0,...,16,11000,2.6,yes,yes,yes,poor,yes,no,ckd
20,61.0,80.0,1.015,2.0,0.0,abnormal,abnormal,notpresent,notpresent,173.0,...,24,9200,3.2,yes,yes,yes,poor,yes,yes,ckd
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,55.0,80.0,1.020,0.0,0.0,normal,normal,notpresent,notpresent,140.0,...,47,6700,4.9,no,no,no,good,no,no,notckd
396,42.0,70.0,1.025,0.0,0.0,normal,normal,notpresent,notpresent,75.0,...,54,7800,6.2,no,no,no,good,no,no,notckd
397,12.0,80.0,1.020,0.0,0.0,normal,normal,notpresent,notpresent,100.0,...,49,6600,5.4,no,no,no,good,no,no,notckd
398,17.0,60.0,1.025,0.0,0.0,normal,normal,notpresent,notpresent,114.0,...,51,7200,5.9,no,no,no,good,no,no,notckd


In [44]:
# Encoding classification means ckd = 1, notckd = 0
encode_features = ["htn", "dm", "cad", "pe", "ane", "rbc", "pc", "appet", "pcc", "ba", "classification"]
for feature in encode_features:
  encoder = LabelEncoder()
  encoder.fit(df[feature])
  df[feature] = encoder.transform(df[feature])

scaler_features = ["age", "bp", "sg", "al", "bgr", "wc", "rc", "bu", "sc", "sod", "pot", "hemo", "pcv"]
for feature in scaler_features:
  scaler = MinMaxScaler()
  df[feature] = scaler.fit_transform(df[[feature]])

df.head()

filename = '/content/sample_data/processed.csv'
df.to_csv(filename)  

In [45]:
X = df.drop("classification",axis=1)
y = df.classification

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=200)

In [46]:
model = RandomForestClassifier()
model.fit(X_train, np.ravel(y_train))

y_pred_class = model.predict(X_test)

print('RESULT')
print('Accuracy:', metrics.accuracy_score(y_test, y_pred_class))

filename = '/content/sample_data/model.sav'
pickle.dump(model, open(filename, 'wb'))

RESULT
Accuracy: 1.0


In [47]:
model = pickle.load(open('/content/sample_data/model.sav', 'rb'))   # load model

val = []
row = 23
for x in X_train.iloc[row]:
  val.append(x)

input = [val]
output = model.predict(input)

print("X=%s, Predicted=%s, Actually=%s" % (input[0], output[0], y_train.iloc[row]))

X=[0.2987012987012987, 0.4999999999999999, 0.7500000000000071, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.030952380952380953, 0.13043478260869565, 0.033783783783783786, 0.7179487179487176, 0.01797752808988763, 0.9795918367346936, 0.6888888888888889, 0.26991150442477874, 0.4406779661016949, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], Predicted=1, Actually=1


  "X does not have valid feature names, but"
